Jungshik Shin | 87232d8 | 2017-05-13 21:10:13 -0700 | [diff] [blame] | 1 | // © 2016 and later: Unicode, Inc. and others. |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 2 | // License & terms of use: http://www.unicode.org/copyright.html |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 3 | /* |
| 4 | ********************************************************************** |
| 5 | * Copyright (c) 2002-2006, International Business Machines |
| 6 | * Corporation and others. All Rights Reserved. |
| 7 | ********************************************************************** |
| 8 | */ |
| 9 | #include "unicode/usetiter.h" |
| 10 | #include "unicode/uniset.h" |
| 11 | #include "unicode/unistr.h" |
| 12 | #include "uvector.h" |
| 13 | |
| 14 | U_NAMESPACE_BEGIN |
| 15 | |
| 16 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSetIterator) |
| 17 | |
| 18 | /** |
| 19 | * Create an iterator |
| 20 | * @param set set to iterate over |
| 21 | */ |
| 22 | UnicodeSetIterator::UnicodeSetIterator(const UnicodeSet& uSet) { |
| 23 | cpString = NULL; |
| 24 | reset(uSet); |
| 25 | } |
| 26 | |
| 27 | /** |
| 28 | * Create an iterator. Convenience for when the contents are to be set later. |
| 29 | */ |
| 30 | UnicodeSetIterator::UnicodeSetIterator() { |
| 31 | this->set = NULL; |
| 32 | cpString = NULL; |
| 33 | reset(); |
| 34 | } |
| 35 | |
| 36 | UnicodeSetIterator::~UnicodeSetIterator() { |
| 37 | delete cpString; |
| 38 | } |
| 39 | |
| 40 | /** |
| 41 | * Returns the next element in the set. |
| 42 | * @return true if there was another element in the set. |
| 43 | * if so, if codepoint == IS_STRING, the value is a string in the string field |
| 44 | * else the value is a single code point in the codepoint field. |
| 45 | * <br>You are guaranteed that the codepoints are in sorted order, and the strings are in sorted order, |
| 46 | * and that all code points are returned before any strings are returned. |
| 47 | * <br>Note also that the codepointEnd is undefined after calling this method. |
| 48 | */ |
| 49 | UBool UnicodeSetIterator::next() { |
| 50 | if (nextElement <= endElement) { |
| 51 | codepoint = codepointEnd = nextElement++; |
| 52 | string = NULL; |
| 53 | return TRUE; |
| 54 | } |
| 55 | if (range < endRange) { |
| 56 | loadRange(++range); |
| 57 | codepoint = codepointEnd = nextElement++; |
| 58 | string = NULL; |
| 59 | return TRUE; |
| 60 | } |
| 61 | |
| 62 | if (nextString >= stringCount) return FALSE; |
| 63 | codepoint = (UChar32)IS_STRING; // signal that value is actually a string |
| 64 | string = (const UnicodeString*) set->strings->elementAt(nextString++); |
| 65 | return TRUE; |
| 66 | } |
| 67 | |
| 68 | /** |
| 69 | * @return true if there was another element in the set. |
| 70 | * if so, if codepoint == IS_STRING, the value is a string in the string field |
| 71 | * else the value is a range of codepoints in the <codepoint, codepointEnd> fields. |
| 72 | * <br>Note that the codepoints are in sorted order, and the strings are in sorted order, |
| 73 | * and that all code points are returned before any strings are returned. |
| 74 | * <br>You are guaranteed that the ranges are in sorted order, and the strings are in sorted order, |
| 75 | * and that all ranges are returned before any strings are returned. |
| 76 | * <br>You are also guaranteed that ranges are disjoint and non-contiguous. |
| 77 | * <br>Note also that the codepointEnd is undefined after calling this method. |
| 78 | */ |
| 79 | UBool UnicodeSetIterator::nextRange() { |
| 80 | string = NULL; |
| 81 | if (nextElement <= endElement) { |
| 82 | codepointEnd = endElement; |
| 83 | codepoint = nextElement; |
| 84 | nextElement = endElement+1; |
| 85 | return TRUE; |
| 86 | } |
| 87 | if (range < endRange) { |
| 88 | loadRange(++range); |
| 89 | codepointEnd = endElement; |
| 90 | codepoint = nextElement; |
| 91 | nextElement = endElement+1; |
| 92 | return TRUE; |
| 93 | } |
| 94 | |
| 95 | if (nextString >= stringCount) return FALSE; |
| 96 | codepoint = (UChar32)IS_STRING; // signal that value is actually a string |
| 97 | string = (const UnicodeString*) set->strings->elementAt(nextString++); |
| 98 | return TRUE; |
| 99 | } |
| 100 | |
| 101 | /** |
| 102 | *@param set the set to iterate over. This allows reuse of the iterator. |
| 103 | */ |
| 104 | void UnicodeSetIterator::reset(const UnicodeSet& uSet) { |
| 105 | this->set = &uSet; |
| 106 | reset(); |
| 107 | } |
| 108 | |
| 109 | /** |
| 110 | * Resets to the start, to allow the iteration to start over again. |
| 111 | */ |
| 112 | void UnicodeSetIterator::reset() { |
| 113 | if (set == NULL) { |
| 114 | // Set up indices to empty iteration |
| 115 | endRange = -1; |
| 116 | stringCount = 0; |
| 117 | } else { |
| 118 | endRange = set->getRangeCount() - 1; |
Jungshik Shin | d13a96f | 2018-11-14 09:22:09 -0800 | [diff] [blame^] | 119 | stringCount = set->stringsSize(); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 120 | } |
| 121 | range = 0; |
| 122 | endElement = -1; |
| 123 | nextElement = 0; |
| 124 | if (endRange >= 0) { |
| 125 | loadRange(range); |
| 126 | } |
| 127 | nextString = 0; |
| 128 | string = NULL; |
| 129 | } |
| 130 | |
| 131 | void UnicodeSetIterator::loadRange(int32_t iRange) { |
| 132 | nextElement = set->getRangeStart(iRange); |
| 133 | endElement = set->getRangeEnd(iRange); |
| 134 | } |
| 135 | |
| 136 | |
| 137 | const UnicodeString& UnicodeSetIterator::getString() { |
| 138 | if (string==NULL && codepoint!=(UChar32)IS_STRING) { |
| 139 | if (cpString == NULL) { |
| 140 | cpString = new UnicodeString(); |
| 141 | } |
| 142 | if (cpString != NULL) { |
| 143 | cpString->setTo((UChar32)codepoint); |
| 144 | } |
| 145 | string = cpString; |
| 146 | } |
| 147 | return *string; |
| 148 | } |
| 149 | |
| 150 | U_NAMESPACE_END |
| 151 | |
| 152 | //eof |