jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame^] | 1 | /* |
| 2 | ******************************************************************************* |
| 3 | * Copyright (C) 2010-2012, International Business Machines |
| 4 | * Corporation and others. All Rights Reserved. |
| 5 | ******************************************************************************* |
| 6 | * file name: bytestrieiterator.cpp |
| 7 | * encoding: US-ASCII |
| 8 | * tab size: 8 (not used) |
| 9 | * indentation:4 |
| 10 | * |
| 11 | * created on: 2010nov03 |
| 12 | * created by: Markus W. Scherer |
| 13 | */ |
| 14 | |
| 15 | #include "unicode/utypes.h" |
| 16 | #include "unicode/bytestrie.h" |
| 17 | #include "unicode/stringpiece.h" |
| 18 | #include "charstr.h" |
| 19 | #include "uvectr32.h" |
| 20 | |
| 21 | U_NAMESPACE_BEGIN |
| 22 | |
| 23 | BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength, |
| 24 | UErrorCode &errorCode) |
| 25 | : bytes_(static_cast<const uint8_t *>(trieBytes)), |
| 26 | pos_(bytes_), initialPos_(bytes_), |
| 27 | remainingMatchLength_(-1), initialRemainingMatchLength_(-1), |
| 28 | str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { |
| 29 | if(U_FAILURE(errorCode)) { |
| 30 | return; |
| 31 | } |
| 32 | // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into |
| 33 | // a public API header for which we would want it to depend only on |
| 34 | // other public headers. |
| 35 | // Unlike BytesTrie itself, its Iterator performs memory allocations anyway |
| 36 | // via the CharString and UVector32 implementations, so this additional |
| 37 | // cost is minimal. |
| 38 | str_=new CharString(); |
| 39 | stack_=new UVector32(errorCode); |
| 40 | if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) { |
| 41 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 42 | } |
| 43 | } |
| 44 | |
| 45 | BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength, |
| 46 | UErrorCode &errorCode) |
| 47 | : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_), |
| 48 | remainingMatchLength_(trie.remainingMatchLength_), |
| 49 | initialRemainingMatchLength_(trie.remainingMatchLength_), |
| 50 | str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { |
| 51 | if(U_FAILURE(errorCode)) { |
| 52 | return; |
| 53 | } |
| 54 | str_=new CharString(); |
| 55 | stack_=new UVector32(errorCode); |
| 56 | if(U_FAILURE(errorCode)) { |
| 57 | return; |
| 58 | } |
| 59 | if(str_==NULL || stack_==NULL) { |
| 60 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 61 | return; |
| 62 | } |
| 63 | int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. |
| 64 | if(length>=0) { |
| 65 | // Pending linear-match node, append remaining bytes to str_. |
| 66 | ++length; |
| 67 | if(maxLength_>0 && length>maxLength_) { |
| 68 | length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. |
| 69 | } |
| 70 | str_->append(reinterpret_cast<const char *>(pos_), length, errorCode); |
| 71 | pos_+=length; |
| 72 | remainingMatchLength_-=length; |
| 73 | } |
| 74 | } |
| 75 | |
| 76 | BytesTrie::Iterator::~Iterator() { |
| 77 | delete str_; |
| 78 | delete stack_; |
| 79 | } |
| 80 | |
| 81 | BytesTrie::Iterator & |
| 82 | BytesTrie::Iterator::reset() { |
| 83 | pos_=initialPos_; |
| 84 | remainingMatchLength_=initialRemainingMatchLength_; |
| 85 | int32_t length=remainingMatchLength_+1; // Remaining match length. |
| 86 | if(maxLength_>0 && length>maxLength_) { |
| 87 | length=maxLength_; |
| 88 | } |
| 89 | str_->truncate(length); |
| 90 | pos_+=length; |
| 91 | remainingMatchLength_-=length; |
| 92 | stack_->setSize(0); |
| 93 | return *this; |
| 94 | } |
| 95 | |
| 96 | UBool |
| 97 | BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } |
| 98 | |
| 99 | UBool |
| 100 | BytesTrie::Iterator::next(UErrorCode &errorCode) { |
| 101 | if(U_FAILURE(errorCode)) { |
| 102 | return FALSE; |
| 103 | } |
| 104 | const uint8_t *pos=pos_; |
| 105 | if(pos==NULL) { |
| 106 | if(stack_->isEmpty()) { |
| 107 | return FALSE; |
| 108 | } |
| 109 | // Pop the state off the stack and continue with the next outbound edge of |
| 110 | // the branch node. |
| 111 | int32_t stackSize=stack_->size(); |
| 112 | int32_t length=stack_->elementAti(stackSize-1); |
| 113 | pos=bytes_+stack_->elementAti(stackSize-2); |
| 114 | stack_->setSize(stackSize-2); |
| 115 | str_->truncate(length&0xffff); |
| 116 | length=(int32_t)((uint32_t)length>>16); |
| 117 | if(length>1) { |
| 118 | pos=branchNext(pos, length, errorCode); |
| 119 | if(pos==NULL) { |
| 120 | return TRUE; // Reached a final value. |
| 121 | } |
| 122 | } else { |
| 123 | str_->append((char)*pos++, errorCode); |
| 124 | } |
| 125 | } |
| 126 | if(remainingMatchLength_>=0) { |
| 127 | // We only get here if we started in a pending linear-match node |
| 128 | // with more than maxLength remaining bytes. |
| 129 | return truncateAndStop(); |
| 130 | } |
| 131 | for(;;) { |
| 132 | int32_t node=*pos++; |
| 133 | if(node>=kMinValueLead) { |
| 134 | // Deliver value for the byte sequence so far. |
| 135 | UBool isFinal=(UBool)(node&kValueIsFinal); |
| 136 | value_=readValue(pos, node>>1); |
| 137 | if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) { |
| 138 | pos_=NULL; |
| 139 | } else { |
| 140 | pos_=skipValue(pos, node); |
| 141 | } |
| 142 | sp_.set(str_->data(), str_->length()); |
| 143 | return TRUE; |
| 144 | } |
| 145 | if(maxLength_>0 && str_->length()==maxLength_) { |
| 146 | return truncateAndStop(); |
| 147 | } |
| 148 | if(node<kMinLinearMatch) { |
| 149 | if(node==0) { |
| 150 | node=*pos++; |
| 151 | } |
| 152 | pos=branchNext(pos, node+1, errorCode); |
| 153 | if(pos==NULL) { |
| 154 | return TRUE; // Reached a final value. |
| 155 | } |
| 156 | } else { |
| 157 | // Linear-match node, append length bytes to str_. |
| 158 | int32_t length=node-kMinLinearMatch+1; |
| 159 | if(maxLength_>0 && str_->length()+length>maxLength_) { |
| 160 | str_->append(reinterpret_cast<const char *>(pos), |
| 161 | maxLength_-str_->length(), errorCode); |
| 162 | return truncateAndStop(); |
| 163 | } |
| 164 | str_->append(reinterpret_cast<const char *>(pos), length, errorCode); |
| 165 | pos+=length; |
| 166 | } |
| 167 | } |
| 168 | } |
| 169 | |
| 170 | UBool |
| 171 | BytesTrie::Iterator::truncateAndStop() { |
| 172 | pos_=NULL; |
| 173 | sp_.set(str_->data(), str_->length()); |
| 174 | value_=-1; // no real value for str |
| 175 | return TRUE; |
| 176 | } |
| 177 | |
| 178 | // Branch node, needs to take the first outbound edge and push state for the rest. |
| 179 | const uint8_t * |
| 180 | BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) { |
| 181 | while(length>kMaxBranchLinearSubNodeLength) { |
| 182 | ++pos; // ignore the comparison byte |
| 183 | // Push state for the greater-or-equal edge. |
| 184 | stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode); |
| 185 | stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode); |
| 186 | // Follow the less-than edge. |
| 187 | length>>=1; |
| 188 | pos=jumpByDelta(pos); |
| 189 | } |
| 190 | // List of key-value pairs where values are either final values or jump deltas. |
| 191 | // Read the first (key, value) pair. |
| 192 | uint8_t trieByte=*pos++; |
| 193 | int32_t node=*pos++; |
| 194 | UBool isFinal=(UBool)(node&kValueIsFinal); |
| 195 | int32_t value=readValue(pos, node>>1); |
| 196 | pos=skipValue(pos, node); |
| 197 | stack_->addElement((int32_t)(pos-bytes_), errorCode); |
| 198 | stack_->addElement(((length-1)<<16)|str_->length(), errorCode); |
| 199 | str_->append((char)trieByte, errorCode); |
| 200 | if(isFinal) { |
| 201 | pos_=NULL; |
| 202 | sp_.set(str_->data(), str_->length()); |
| 203 | value_=value; |
| 204 | return NULL; |
| 205 | } else { |
| 206 | return pos+value; |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | U_NAMESPACE_END |