Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /******************************************************************** |
| 4 | * COPYRIGHT: |
| 5 | * Copyright (c) 1997-2010, International Business Machines Corporation and |
| 6 | * others. All Rights Reserved. |
| 7 | ********************************************************************/ |
| 8 | |
| 9 | #include "unicode/utypes.h" |
| 10 | #include "unicode/uchar.h" |
| 11 | #include "unicode/normlzr.h" |
| 12 | #include "unicode/uniset.h" |
| 13 | #include "unicode/usetiter.h" |
| 14 | #include "unicode/schriter.h" |
| 15 | #include "tstnorm.h" |
| 16 | |
| 17 | #if !UCONFIG_NO_NORMALIZATION |
| 18 | |
| 19 | static UErrorCode status = U_ZERO_ERROR; |
| 20 | |
| 21 | // test APIs that are not otherwise used - improve test coverage |
| 22 | void |
| 23 | BasicNormalizerTest::TestNormalizerAPI() { |
| 24 | // instantiate a Normalizer from a CharacterIterator |
| 25 | UnicodeString s=UnicodeString("a\\u0308\\uac00\\U0002f800", "").unescape(); |
| 26 | s.append(s); // make s a bit longer and more interesting |
| 27 | StringCharacterIterator iter(s); |
| 28 | Normalizer norm(iter, UNORM_NFC); |
| 29 | if(norm.next()!=0xe4) { |
| 30 | dataerrln("error in Normalizer(CharacterIterator).next()"); |
| 31 | } |
| 32 | |
| 33 | // test copy constructor |
| 34 | Normalizer copy(norm); |
| 35 | if(copy.next()!=0xac00) { |
| 36 | dataerrln("error in Normalizer(Normalizer(CharacterIterator)).next()"); |
| 37 | } |
| 38 | |
| 39 | // test clone(), ==, and hashCode() |
| 40 | Normalizer *clone=copy.clone(); |
| 41 | if(*clone!=copy) { |
| 42 | errln("error in Normalizer(Normalizer(CharacterIterator)).clone()!=copy"); |
| 43 | } |
| 44 | // clone must have the same hashCode() |
| 45 | if(clone->hashCode()!=copy.hashCode()) { |
| 46 | errln("error in Normalizer(Normalizer(CharacterIterator)).clone()->hashCode()!=copy.hashCode()"); |
| 47 | } |
| 48 | if(clone->next()!=0x4e3d) { |
| 49 | dataerrln("error in Normalizer(Normalizer(CharacterIterator)).clone()->next()"); |
| 50 | } |
| 51 | // position changed, must change hashCode() |
| 52 | if(clone->hashCode()==copy.hashCode()) { |
| 53 | errln("error in Normalizer(Normalizer(CharacterIterator)).clone()->next().hashCode()==copy.hashCode()"); |
| 54 | } |
| 55 | delete clone; |
| 56 | clone=0; |
| 57 | |
| 58 | // test compose() and decompose() |
| 59 | UnicodeString tel, nfkc, nfkd; |
| 60 | tel=UnicodeString(1, (UChar32)0x2121, 10); |
| 61 | tel.insert(1, (UChar)0x301); |
| 62 | |
| 63 | UErrorCode errorCode=U_ZERO_ERROR; |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 64 | Normalizer::compose(tel, true, 0, nfkc, errorCode); |
| 65 | Normalizer::decompose(tel, true, 0, nfkd, errorCode); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 66 | if(U_FAILURE(errorCode)) { |
| 67 | dataerrln("error in Normalizer::(de)compose(): %s", u_errorName(errorCode)); |
| 68 | } else if( |
| 69 | nfkc!=UnicodeString("TE\\u0139TELTELTELTELTELTELTELTELTEL", "").unescape() || |
| 70 | nfkd!=UnicodeString("TEL\\u0301TELTELTELTELTELTELTELTELTEL", "").unescape() |
| 71 | ) { |
| 72 | errln("error in Normalizer::(de)compose(): wrong result(s)"); |
| 73 | } |
| 74 | |
| 75 | // test setIndex() |
| 76 | norm.setIndexOnly(3); |
| 77 | if(norm.current()!=0x4e3d) { |
| 78 | dataerrln("error in Normalizer(CharacterIterator).setIndex(3)"); |
| 79 | } |
| 80 | |
| 81 | // test setText(CharacterIterator) and getText() |
| 82 | UnicodeString out, out2; |
| 83 | errorCode=U_ZERO_ERROR; |
| 84 | copy.setText(iter, errorCode); |
| 85 | if(U_FAILURE(errorCode)) { |
| 86 | errln("error Normalizer::setText() failed: %s", u_errorName(errorCode)); |
| 87 | } else { |
| 88 | copy.getText(out); |
| 89 | iter.getText(out2); |
| 90 | if( out!=out2 || |
| 91 | copy.startIndex()!=iter.startIndex() || |
| 92 | copy.endIndex()!=iter.endIndex() |
| 93 | ) { |
| 94 | errln("error in Normalizer::setText() or Normalizer::getText()"); |
| 95 | } |
| 96 | } |
| 97 | |
| 98 | // test setText(UChar *), getUMode() and setMode() |
| 99 | errorCode=U_ZERO_ERROR; |
| 100 | copy.setText(s.getBuffer()+1, s.length()-1, errorCode); |
| 101 | copy.setMode(UNORM_NFD); |
| 102 | if(copy.getUMode()!=UNORM_NFD) { |
| 103 | errln("error in Normalizer::setMode() or Normalizer::getUMode()"); |
| 104 | } |
| 105 | if(copy.next()!=0x308 || copy.next()!=0x1100) { |
| 106 | dataerrln("error in Normalizer::setText(UChar *) or Normalizer::setMode()"); |
| 107 | } |
| 108 | |
| 109 | // test setText(UChar *, length=-1) |
| 110 | errorCode=U_ZERO_ERROR; |
| 111 | |
| 112 | // NUL-terminate s |
| 113 | s.append((UChar)0); // append NUL |
| 114 | s.truncate(s.length()-1); // undo length change |
| 115 | |
| 116 | copy.setText(s.getBuffer()+1, -1, errorCode); |
| 117 | if(copy.endIndex()!=s.length()-1) { |
| 118 | errln("error in Normalizer::setText(UChar *, -1)"); |
| 119 | } |
| 120 | |
| 121 | // test setOption() and getOption() |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 122 | copy.setOption(0xaa0000, true); |
| 123 | copy.setOption(0x20000, false); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 124 | if(!copy.getOption(0x880000) || copy.getOption(0x20000)) { |
| 125 | errln("error in Normalizer::setOption() or Normalizer::getOption()"); |
| 126 | } |
| 127 | |
| 128 | // test last()/previous() with an internal buffer overflow |
| 129 | errorCode=U_ZERO_ERROR; |
| 130 | copy.setText(UnicodeString(1000, (UChar32)0x308, 1000), errorCode); |
| 131 | if(copy.last()!=0x308) { |
| 132 | errln("error in Normalizer(1000*U+0308).last()"); |
| 133 | } |
| 134 | |
| 135 | // test UNORM_NONE |
| 136 | norm.setMode(UNORM_NONE); |
| 137 | if(norm.first()!=0x61 || norm.next()!=0x308 || norm.last()!=0x2f800) { |
| 138 | errln("error in Normalizer(UNORM_NONE).first()/next()/last()"); |
| 139 | } |
| 140 | Normalizer::normalize(s, UNORM_NONE, 0, out, status); |
| 141 | if(out!=s) { |
| 142 | errln("error in Normalizer::normalize(UNORM_NONE)"); |
| 143 | } |
| 144 | |
| 145 | // test that the same string can be used as source and destination |
| 146 | s.setTo((UChar)0xe4); |
| 147 | Normalizer::normalize(s, UNORM_NFD, 0, s, status); |
| 148 | if(s.charAt(1)!=0x308) { |
| 149 | dataerrln("error in Normalizer::normalize(UNORM_NFD, self)"); |
| 150 | } |
| 151 | Normalizer::normalize(s, UNORM_NFC, 0, s, status); |
| 152 | if(s.charAt(0)!=0xe4) { |
| 153 | dataerrln("error in Normalizer::normalize(UNORM_NFC, self)"); |
| 154 | } |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 155 | Normalizer::decompose(s, false, 0, s, status); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 156 | if(s.charAt(1)!=0x308) { |
| 157 | dataerrln("error in Normalizer::decompose(self)"); |
| 158 | } |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 159 | Normalizer::compose(s, false, 0, s, status); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 160 | if(s.charAt(0)!=0xe4) { |
| 161 | dataerrln("error in Normalizer::compose(self)"); |
| 162 | } |
| 163 | Normalizer::concatenate(s, s, s, UNORM_NFC, 0, status); |
| 164 | if(s.charAt(1)!=0xe4) { |
| 165 | dataerrln("error in Normalizer::decompose(self)"); |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | #endif |