Jungshik Shin | 87232d8 | 2017-05-13 21:10:13 -0700 | [diff] [blame] | 1 | // © 2016 and later: Unicode, Inc. and others. |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 2 | // License & terms of use: http://www.unicode.org/copyright.html |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 3 | /* |
| 4 | ******************************************************************************* |
| 5 | * Copyright (C) 2010-2012, International Business Machines |
| 6 | * Corporation and others. All Rights Reserved. |
| 7 | ******************************************************************************* |
| 8 | * file name: bytestrieiterator.cpp |
Jungshik Shin | 87232d8 | 2017-05-13 21:10:13 -0700 | [diff] [blame] | 9 | * encoding: UTF-8 |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 10 | * tab size: 8 (not used) |
| 11 | * indentation:4 |
| 12 | * |
| 13 | * created on: 2010nov03 |
| 14 | * created by: Markus W. Scherer |
| 15 | */ |
| 16 | |
| 17 | #include "unicode/utypes.h" |
| 18 | #include "unicode/bytestrie.h" |
| 19 | #include "unicode/stringpiece.h" |
| 20 | #include "charstr.h" |
| 21 | #include "uvectr32.h" |
| 22 | |
| 23 | U_NAMESPACE_BEGIN |
| 24 | |
| 25 | BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength, |
| 26 | UErrorCode &errorCode) |
| 27 | : bytes_(static_cast<const uint8_t *>(trieBytes)), |
| 28 | pos_(bytes_), initialPos_(bytes_), |
| 29 | remainingMatchLength_(-1), initialRemainingMatchLength_(-1), |
| 30 | str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { |
| 31 | if(U_FAILURE(errorCode)) { |
| 32 | return; |
| 33 | } |
| 34 | // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into |
| 35 | // a public API header for which we would want it to depend only on |
| 36 | // other public headers. |
| 37 | // Unlike BytesTrie itself, its Iterator performs memory allocations anyway |
| 38 | // via the CharString and UVector32 implementations, so this additional |
| 39 | // cost is minimal. |
| 40 | str_=new CharString(); |
| 41 | stack_=new UVector32(errorCode); |
| 42 | if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) { |
| 43 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 44 | } |
| 45 | } |
| 46 | |
| 47 | BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength, |
| 48 | UErrorCode &errorCode) |
| 49 | : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_), |
| 50 | remainingMatchLength_(trie.remainingMatchLength_), |
| 51 | initialRemainingMatchLength_(trie.remainingMatchLength_), |
| 52 | str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { |
| 53 | if(U_FAILURE(errorCode)) { |
| 54 | return; |
| 55 | } |
| 56 | str_=new CharString(); |
| 57 | stack_=new UVector32(errorCode); |
| 58 | if(U_FAILURE(errorCode)) { |
| 59 | return; |
| 60 | } |
| 61 | if(str_==NULL || stack_==NULL) { |
| 62 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 63 | return; |
| 64 | } |
| 65 | int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. |
| 66 | if(length>=0) { |
| 67 | // Pending linear-match node, append remaining bytes to str_. |
| 68 | ++length; |
| 69 | if(maxLength_>0 && length>maxLength_) { |
| 70 | length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. |
| 71 | } |
| 72 | str_->append(reinterpret_cast<const char *>(pos_), length, errorCode); |
| 73 | pos_+=length; |
| 74 | remainingMatchLength_-=length; |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | BytesTrie::Iterator::~Iterator() { |
| 79 | delete str_; |
| 80 | delete stack_; |
| 81 | } |
| 82 | |
| 83 | BytesTrie::Iterator & |
| 84 | BytesTrie::Iterator::reset() { |
| 85 | pos_=initialPos_; |
| 86 | remainingMatchLength_=initialRemainingMatchLength_; |
| 87 | int32_t length=remainingMatchLength_+1; // Remaining match length. |
| 88 | if(maxLength_>0 && length>maxLength_) { |
| 89 | length=maxLength_; |
| 90 | } |
| 91 | str_->truncate(length); |
| 92 | pos_+=length; |
| 93 | remainingMatchLength_-=length; |
| 94 | stack_->setSize(0); |
| 95 | return *this; |
| 96 | } |
| 97 | |
| 98 | UBool |
| 99 | BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } |
| 100 | |
| 101 | UBool |
| 102 | BytesTrie::Iterator::next(UErrorCode &errorCode) { |
| 103 | if(U_FAILURE(errorCode)) { |
| 104 | return FALSE; |
| 105 | } |
| 106 | const uint8_t *pos=pos_; |
| 107 | if(pos==NULL) { |
| 108 | if(stack_->isEmpty()) { |
| 109 | return FALSE; |
| 110 | } |
| 111 | // Pop the state off the stack and continue with the next outbound edge of |
| 112 | // the branch node. |
| 113 | int32_t stackSize=stack_->size(); |
| 114 | int32_t length=stack_->elementAti(stackSize-1); |
| 115 | pos=bytes_+stack_->elementAti(stackSize-2); |
| 116 | stack_->setSize(stackSize-2); |
| 117 | str_->truncate(length&0xffff); |
| 118 | length=(int32_t)((uint32_t)length>>16); |
| 119 | if(length>1) { |
| 120 | pos=branchNext(pos, length, errorCode); |
| 121 | if(pos==NULL) { |
| 122 | return TRUE; // Reached a final value. |
| 123 | } |
| 124 | } else { |
| 125 | str_->append((char)*pos++, errorCode); |
| 126 | } |
| 127 | } |
| 128 | if(remainingMatchLength_>=0) { |
| 129 | // We only get here if we started in a pending linear-match node |
| 130 | // with more than maxLength remaining bytes. |
| 131 | return truncateAndStop(); |
| 132 | } |
| 133 | for(;;) { |
| 134 | int32_t node=*pos++; |
| 135 | if(node>=kMinValueLead) { |
| 136 | // Deliver value for the byte sequence so far. |
| 137 | UBool isFinal=(UBool)(node&kValueIsFinal); |
| 138 | value_=readValue(pos, node>>1); |
| 139 | if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) { |
| 140 | pos_=NULL; |
| 141 | } else { |
| 142 | pos_=skipValue(pos, node); |
| 143 | } |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 144 | return TRUE; |
| 145 | } |
| 146 | if(maxLength_>0 && str_->length()==maxLength_) { |
| 147 | return truncateAndStop(); |
| 148 | } |
| 149 | if(node<kMinLinearMatch) { |
| 150 | if(node==0) { |
| 151 | node=*pos++; |
| 152 | } |
| 153 | pos=branchNext(pos, node+1, errorCode); |
| 154 | if(pos==NULL) { |
| 155 | return TRUE; // Reached a final value. |
| 156 | } |
| 157 | } else { |
| 158 | // Linear-match node, append length bytes to str_. |
| 159 | int32_t length=node-kMinLinearMatch+1; |
| 160 | if(maxLength_>0 && str_->length()+length>maxLength_) { |
| 161 | str_->append(reinterpret_cast<const char *>(pos), |
| 162 | maxLength_-str_->length(), errorCode); |
| 163 | return truncateAndStop(); |
| 164 | } |
| 165 | str_->append(reinterpret_cast<const char *>(pos), length, errorCode); |
| 166 | pos+=length; |
| 167 | } |
| 168 | } |
| 169 | } |
| 170 | |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 171 | StringPiece |
| 172 | BytesTrie::Iterator::getString() const { |
| 173 | return str_ == NULL ? StringPiece() : str_->toStringPiece(); |
| 174 | } |
| 175 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 176 | UBool |
| 177 | BytesTrie::Iterator::truncateAndStop() { |
| 178 | pos_=NULL; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 179 | value_=-1; // no real value for str |
| 180 | return TRUE; |
| 181 | } |
| 182 | |
| 183 | // Branch node, needs to take the first outbound edge and push state for the rest. |
| 184 | const uint8_t * |
| 185 | BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) { |
| 186 | while(length>kMaxBranchLinearSubNodeLength) { |
| 187 | ++pos; // ignore the comparison byte |
| 188 | // Push state for the greater-or-equal edge. |
| 189 | stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode); |
| 190 | stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode); |
| 191 | // Follow the less-than edge. |
| 192 | length>>=1; |
| 193 | pos=jumpByDelta(pos); |
| 194 | } |
| 195 | // List of key-value pairs where values are either final values or jump deltas. |
| 196 | // Read the first (key, value) pair. |
| 197 | uint8_t trieByte=*pos++; |
| 198 | int32_t node=*pos++; |
| 199 | UBool isFinal=(UBool)(node&kValueIsFinal); |
| 200 | int32_t value=readValue(pos, node>>1); |
| 201 | pos=skipValue(pos, node); |
| 202 | stack_->addElement((int32_t)(pos-bytes_), errorCode); |
| 203 | stack_->addElement(((length-1)<<16)|str_->length(), errorCode); |
| 204 | str_->append((char)trieByte, errorCode); |
| 205 | if(isFinal) { |
| 206 | pos_=NULL; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 207 | value_=value; |
| 208 | return NULL; |
| 209 | } else { |
| 210 | return pos+value; |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | U_NAMESPACE_END |