Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | ********************************************************************** |
| 5 | * Copyright (C) 2001-2009, International Business Machines |
| 6 | * Corporation and others. All Rights Reserved. |
| 7 | ********************************************************************** |
| 8 | * Date Name Description |
| 9 | * 05/23/00 aliu Creation. |
| 10 | ********************************************************************** |
| 11 | */ |
| 12 | |
| 13 | #include <algorithm> |
| 14 | #include <vector> |
| 15 | #include "unicode/utypes.h" |
| 16 | #include "unicode/edits.h" |
| 17 | #include "unicode/unistr.h" |
| 18 | #include "unicode/utf16.h" |
| 19 | #include "cmemory.h" |
| 20 | #include "testutil.h" |
| 21 | #include "intltest.h" |
| 22 | |
| 23 | static const UChar HEX[] = u"0123456789ABCDEF"; |
| 24 | |
| 25 | UnicodeString &TestUtility::appendHex(UnicodeString &buf, UChar32 ch) { |
| 26 | if (ch >= 0x10000) { |
| 27 | if (ch >= 0x100000) { |
| 28 | buf.append(HEX[0xF&(ch>>20)]); |
| 29 | } |
| 30 | buf.append(HEX[0xF&(ch>>16)]); |
| 31 | } |
| 32 | buf.append(HEX[0xF&(ch>>12)]); |
| 33 | buf.append(HEX[0xF&(ch>>8)]); |
| 34 | buf.append(HEX[0xF&(ch>>4)]); |
| 35 | buf.append(HEX[0xF&ch]); |
| 36 | return buf; |
| 37 | } |
| 38 | |
| 39 | UnicodeString TestUtility::hex(UChar32 ch) { |
| 40 | UnicodeString buf; |
| 41 | appendHex(buf, ch); |
| 42 | return buf; |
| 43 | } |
| 44 | |
| 45 | UnicodeString TestUtility::hex(const UnicodeString& s) { |
| 46 | return hex(s, u','); |
| 47 | } |
| 48 | |
| 49 | UnicodeString TestUtility::hex(const UnicodeString& s, UChar sep) { |
| 50 | UnicodeString result; |
| 51 | if (s.isEmpty()) return result; |
| 52 | UChar32 c; |
| 53 | for (int32_t i = 0; i < s.length(); i += U16_LENGTH(c)) { |
| 54 | c = s.char32At(i); |
| 55 | if (i > 0) { |
| 56 | result.append(sep); |
| 57 | } |
| 58 | appendHex(result, c); |
| 59 | } |
| 60 | return result; |
| 61 | } |
| 62 | |
| 63 | UnicodeString TestUtility::hex(const uint8_t* bytes, int32_t len) { |
| 64 | UnicodeString buf; |
| 65 | for (int32_t i = 0; i < len; ++i) { |
| 66 | buf.append(HEX[0x0F & (bytes[i] >> 4)]); |
| 67 | buf.append(HEX[0x0F & bytes[i]]); |
| 68 | } |
| 69 | return buf; |
| 70 | } |
| 71 | |
| 72 | namespace { |
| 73 | |
| 74 | UnicodeString printOneEdit(const Edits::Iterator &ei) { |
| 75 | if (ei.hasChange()) { |
| 76 | return UnicodeString() + ei.oldLength() + u"->" + ei.newLength(); |
| 77 | } else { |
| 78 | return UnicodeString() + ei.oldLength() + u"=" + ei.newLength(); |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | /** |
| 83 | * Maps indexes according to the expected edits. |
| 84 | * A destination index can occur multiple times when there are source deletions. |
| 85 | * Map according to the last occurrence, normally in a non-empty destination span. |
| 86 | * Simplest is to search from the back. |
| 87 | */ |
| 88 | int32_t srcIndexFromDest(const EditChange expected[], int32_t expLength, |
| 89 | int32_t srcLength, int32_t destLength, int32_t index) { |
| 90 | int32_t srcIndex = srcLength; |
| 91 | int32_t destIndex = destLength; |
| 92 | int32_t i = expLength; |
| 93 | while (index < destIndex && i > 0) { |
| 94 | --i; |
| 95 | int32_t prevSrcIndex = srcIndex - expected[i].oldLength; |
| 96 | int32_t prevDestIndex = destIndex - expected[i].newLength; |
| 97 | if (index == prevDestIndex) { |
| 98 | return prevSrcIndex; |
| 99 | } else if (index > prevDestIndex) { |
| 100 | if (expected[i].change) { |
| 101 | // In a change span, map to its end. |
| 102 | return srcIndex; |
| 103 | } else { |
| 104 | // In an unchanged span, offset within it. |
| 105 | return prevSrcIndex + (index - prevDestIndex); |
| 106 | } |
| 107 | } |
| 108 | srcIndex = prevSrcIndex; |
| 109 | destIndex = prevDestIndex; |
| 110 | } |
| 111 | // index is outside the string. |
| 112 | return srcIndex; |
| 113 | } |
| 114 | |
| 115 | int32_t destIndexFromSrc(const EditChange expected[], int32_t expLength, |
| 116 | int32_t srcLength, int32_t destLength, int32_t index) { |
| 117 | int32_t srcIndex = srcLength; |
| 118 | int32_t destIndex = destLength; |
| 119 | int32_t i = expLength; |
| 120 | while (index < srcIndex && i > 0) { |
| 121 | --i; |
| 122 | int32_t prevSrcIndex = srcIndex - expected[i].oldLength; |
| 123 | int32_t prevDestIndex = destIndex - expected[i].newLength; |
| 124 | if (index == prevSrcIndex) { |
| 125 | return prevDestIndex; |
| 126 | } else if (index > prevSrcIndex) { |
| 127 | if (expected[i].change) { |
| 128 | // In a change span, map to its end. |
| 129 | return destIndex; |
| 130 | } else { |
| 131 | // In an unchanged span, offset within it. |
| 132 | return prevDestIndex + (index - prevSrcIndex); |
| 133 | } |
| 134 | } |
| 135 | srcIndex = prevSrcIndex; |
| 136 | destIndex = prevDestIndex; |
| 137 | } |
| 138 | // index is outside the string. |
| 139 | return destIndex; |
| 140 | } |
| 141 | |
| 142 | } // namespace |
| 143 | |
| 144 | // For debugging, set -v to see matching edits up to a failure. |
| 145 | UBool TestUtility::checkEqualEdits(IntlTest &test, const UnicodeString &name, |
| 146 | const Edits &e1, const Edits &e2, UErrorCode &errorCode) { |
| 147 | Edits::Iterator ei1 = e1.getFineIterator(); |
| 148 | Edits::Iterator ei2 = e2.getFineIterator(); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 149 | UBool ok = true; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 150 | for (int32_t i = 0; ok; ++i) { |
| 151 | UBool ei1HasNext = ei1.next(errorCode); |
| 152 | UBool ei2HasNext = ei2.next(errorCode); |
| 153 | ok &= test.assertEquals(name + u" next()[" + i + u"]" + __LINE__, |
| 154 | ei1HasNext, ei2HasNext); |
| 155 | ok &= test.assertSuccess(name + u" errorCode[" + i + u"]" + __LINE__, errorCode); |
| 156 | ok &= test.assertEquals(name + u" edit[" + i + u"]" + __LINE__, |
| 157 | printOneEdit(ei1), printOneEdit(ei2)); |
| 158 | if (!ei1HasNext || !ei2HasNext) { |
| 159 | break; |
| 160 | } |
| 161 | test.logln(); |
| 162 | } |
| 163 | return ok; |
| 164 | } |
| 165 | |
| 166 | void TestUtility::checkEditsIter( |
| 167 | IntlTest &test, |
| 168 | const UnicodeString &name, |
| 169 | Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators |
| 170 | const EditChange expected[], int32_t expLength, UBool withUnchanged, |
| 171 | UErrorCode &errorCode) { |
| 172 | test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(-1, errorCode)); |
| 173 | test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(-1, errorCode)); |
| 174 | |
| 175 | int32_t expSrcIndex = 0; |
| 176 | int32_t expDestIndex = 0; |
| 177 | int32_t expReplIndex = 0; |
| 178 | for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) { |
| 179 | const EditChange &expect = expected[expIndex]; |
| 180 | UnicodeString msg = UnicodeString(name).append(u' ') + expIndex; |
| 181 | if (withUnchanged || expect.change) { |
| 182 | test.assertTrue(msg + u":" + __LINE__, ei1.next(errorCode)); |
| 183 | test.assertEquals(msg + u":" + __LINE__, expect.change, ei1.hasChange()); |
| 184 | test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei1.oldLength()); |
| 185 | test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei1.newLength()); |
| 186 | test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei1.sourceIndex()); |
| 187 | test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei1.destinationIndex()); |
| 188 | test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex()); |
| 189 | } |
| 190 | |
| 191 | if (expect.oldLength > 0) { |
| 192 | test.assertTrue(msg + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode)); |
| 193 | test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange()); |
| 194 | test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength()); |
| 195 | test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei2.newLength()); |
| 196 | test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei2.sourceIndex()); |
| 197 | test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei2.destinationIndex()); |
| 198 | test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei2.replacementIndex()); |
| 199 | if (!withUnchanged) { |
| 200 | // For some iterators, move past the current range |
| 201 | // so that findSourceIndex() has to look before the current index. |
| 202 | ei2.next(errorCode); |
| 203 | ei2.next(errorCode); |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | if (expect.newLength > 0) { |
| 208 | test.assertTrue(msg + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode)); |
| 209 | test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange()); |
| 210 | test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength()); |
| 211 | test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei2.newLength()); |
| 212 | test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei2.sourceIndex()); |
| 213 | test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei2.destinationIndex()); |
| 214 | test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei2.replacementIndex()); |
| 215 | if (!withUnchanged) { |
| 216 | // For some iterators, move past the current range |
| 217 | // so that findSourceIndex() has to look before the current index. |
| 218 | ei2.next(errorCode); |
| 219 | ei2.next(errorCode); |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | expSrcIndex += expect.oldLength; |
| 224 | expDestIndex += expect.newLength; |
| 225 | if (expect.change) { |
| 226 | expReplIndex += expect.newLength; |
| 227 | } |
| 228 | } |
| 229 | UnicodeString msg = UnicodeString(name).append(u" end"); |
| 230 | test.assertFalse(msg + u":" + __LINE__, ei1.next(errorCode)); |
| 231 | test.assertFalse(msg + u":" + __LINE__, ei1.hasChange()); |
| 232 | test.assertEquals(msg + u":" + __LINE__, 0, ei1.oldLength()); |
| 233 | test.assertEquals(msg + u":" + __LINE__, 0, ei1.newLength()); |
| 234 | test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei1.sourceIndex()); |
| 235 | test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei1.destinationIndex()); |
| 236 | test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex()); |
| 237 | |
| 238 | test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode)); |
| 239 | test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode)); |
| 240 | |
| 241 | // Check mapping of all indexes against a simple implementation |
| 242 | // that works on the expected changes. |
| 243 | // Iterate once forward, once backward, to cover more runtime conditions. |
| 244 | int32_t srcLength = expSrcIndex; |
| 245 | int32_t destLength = expDestIndex; |
| 246 | std::vector<int32_t> srcIndexes; |
| 247 | std::vector<int32_t> destIndexes; |
| 248 | srcIndexes.push_back(-1); |
| 249 | destIndexes.push_back(-1); |
| 250 | int32_t srcIndex = 0; |
| 251 | int32_t destIndex = 0; |
| 252 | for (int32_t i = 0; i < expLength; ++i) { |
| 253 | if (expected[i].oldLength > 0) { |
| 254 | srcIndexes.push_back(srcIndex); |
| 255 | if (expected[i].oldLength > 1) { |
| 256 | srcIndexes.push_back(srcIndex + 1); |
| 257 | if (expected[i].oldLength > 2) { |
| 258 | srcIndexes.push_back(srcIndex + expected[i].oldLength - 1); |
| 259 | } |
| 260 | } |
| 261 | } |
| 262 | if (expected[i].newLength > 0) { |
| 263 | destIndexes.push_back(destIndex); |
| 264 | if (expected[i].newLength > 1) { |
| 265 | destIndexes.push_back(destIndex + 1); |
| 266 | if (expected[i].newLength > 2) { |
| 267 | destIndexes.push_back(destIndex + expected[i].newLength - 1); |
| 268 | } |
| 269 | } |
| 270 | } |
| 271 | srcIndex += expected[i].oldLength; |
| 272 | destIndex += expected[i].newLength; |
| 273 | } |
| 274 | srcIndexes.push_back(srcLength); |
| 275 | destIndexes.push_back(destLength); |
| 276 | srcIndexes.push_back(srcLength + 1); |
| 277 | destIndexes.push_back(destLength + 1); |
| 278 | std::reverse(destIndexes.begin(), destIndexes.end()); |
| 279 | // Zig-zag across the indexes to stress next() <-> previous(). |
| 280 | static const int32_t ZIG_ZAG[] = { 0, 1, 2, 3, 2, 1 }; |
| 281 | for (auto i = 0; i < (int32_t)srcIndexes.size(); ++i) { |
| 282 | for (int32_t ij = 0; ij < UPRV_LENGTHOF(ZIG_ZAG); ++ij) { |
| 283 | int32_t j = ZIG_ZAG[ij]; |
| 284 | if ((i + j) < (int32_t)srcIndexes.size()) { |
| 285 | int32_t si = srcIndexes[i + j]; |
| 286 | test.assertEquals(name + u" destIndexFromSrc(" + si + u"):" + __LINE__, |
| 287 | destIndexFromSrc(expected, expLength, srcLength, destLength, si), |
| 288 | ei2.destinationIndexFromSourceIndex(si, errorCode)); |
| 289 | } |
| 290 | } |
| 291 | } |
| 292 | for (auto i = 0; i < (int32_t)destIndexes.size(); ++i) { |
| 293 | for (int32_t ij = 0; ij < UPRV_LENGTHOF(ZIG_ZAG); ++ij) { |
| 294 | int32_t j = ZIG_ZAG[ij]; |
| 295 | if ((i + j) < (int32_t)destIndexes.size()) { |
| 296 | int32_t di = destIndexes[i + j]; |
| 297 | test.assertEquals(name + u" srcIndexFromDest(" + di + u"):" + __LINE__, |
| 298 | srcIndexFromDest(expected, expLength, srcLength, destLength, di), |
| 299 | ei2.sourceIndexFromDestinationIndex(di, errorCode)); |
| 300 | } |
| 301 | } |
| 302 | } |
| 303 | } |