blob: d92ec63b08da5881cf58ce90b99ac74633a752d4 [file] [log] [blame]
Jungshik Shinb3189662017-11-07 11:18:34 -08001// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include "unicode/utypes.h"
5
Jungshik Shina9a2bd32018-07-07 03:36:01 -07006#if !UCONFIG_NO_FORMATTING
Jungshik Shinb3189662017-11-07 11:18:34 -08007
8#include "umutex.h"
9#include "ucln_cmn.h"
10#include "ucln_in.h"
11#include "number_modifiers.h"
12
13using namespace icu;
14using namespace icu::number;
15using namespace icu::number::impl;
16
17namespace {
18
19// TODO: This is copied from simpleformatter.cpp
20const int32_t ARG_NUM_LIMIT = 0x100;
21
22// These are the default currency spacing UnicodeSets in CLDR.
23// Pre-compute them for performance.
24// The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR.
25icu::UInitOnce gDefaultCurrencySpacingInitOnce = U_INITONCE_INITIALIZER;
26
27UnicodeSet *UNISET_DIGIT = nullptr;
28UnicodeSet *UNISET_NOTS = nullptr;
29
30UBool U_CALLCONV cleanupDefaultCurrencySpacing() {
31 delete UNISET_DIGIT;
32 UNISET_DIGIT = nullptr;
33 delete UNISET_NOTS;
34 UNISET_NOTS = nullptr;
Jungshik Shina9a2bd32018-07-07 03:36:01 -070035 gDefaultCurrencySpacingInitOnce.reset();
Jungshik Shinb3189662017-11-07 11:18:34 -080036 return TRUE;
37}
38
39void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) {
40 ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing);
41 UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status);
42 UNISET_NOTS = new UnicodeSet(UnicodeString(u"[:^S:]"), status);
43 if (UNISET_DIGIT == nullptr || UNISET_NOTS == nullptr) {
44 status = U_MEMORY_ALLOCATION_ERROR;
45 return;
46 }
47 UNISET_DIGIT->freeze();
48 UNISET_NOTS->freeze();
49}
50
51} // namespace
52
53
Jungshik Shina9a2bd32018-07-07 03:36:01 -070054Modifier::~Modifier() = default;
55
Jungshik Shin42d50272018-10-24 01:22:09 -070056Modifier::Parameters::Parameters()
57 : obj(nullptr) {}
58
59Modifier::Parameters::Parameters(
60 const ModifierStore* _obj, int8_t _signum, StandardPlural::Form _plural)
61 : obj(_obj), signum(_signum), plural(_plural) {}
62
63ModifierStore::~ModifierStore() = default;
64
65AdoptingModifierStore::~AdoptingModifierStore() {
66 for (const Modifier *mod : mods) {
67 delete mod;
68 }
69}
70
Jungshik Shina9a2bd32018-07-07 03:36:01 -070071
Jungshik Shinb3189662017-11-07 11:18:34 -080072int32_t ConstantAffixModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
73 UErrorCode &status) const {
74 // Insert the suffix first since inserting the prefix will change the rightIndex
75 int length = output.insert(rightIndex, fSuffix, fField, status);
76 length += output.insert(leftIndex, fPrefix, fField, status);
77 return length;
78}
79
Jungshik Shin42d50272018-10-24 01:22:09 -070080int32_t ConstantAffixModifier::getPrefixLength() const {
Jungshik Shinb3189662017-11-07 11:18:34 -080081 return fPrefix.length();
82}
83
Jungshik Shin42d50272018-10-24 01:22:09 -070084int32_t ConstantAffixModifier::getCodePointCount() const {
Jungshik Shinb3189662017-11-07 11:18:34 -080085 return fPrefix.countChar32() + fSuffix.countChar32();
86}
87
88bool ConstantAffixModifier::isStrong() const {
89 return fStrong;
90}
91
Jungshik Shin42d50272018-10-24 01:22:09 -070092bool ConstantAffixModifier::containsField(UNumberFormatFields field) const {
93 (void)field;
94 // This method is not currently used.
95 U_ASSERT(false);
96 return false;
97}
98
99void ConstantAffixModifier::getParameters(Parameters& output) const {
100 (void)output;
101 // This method is not currently used.
102 U_ASSERT(false);
103}
104
105bool ConstantAffixModifier::semanticallyEquivalent(const Modifier& other) const {
106 auto* _other = dynamic_cast<const ConstantAffixModifier*>(&other);
107 if (_other == nullptr) {
108 return false;
109 }
110 return fPrefix == _other->fPrefix
111 && fSuffix == _other->fSuffix
112 && fField == _other->fField
113 && fStrong == _other->fStrong;
114}
115
116
Jungshik Shinb3189662017-11-07 11:18:34 -0800117SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong)
Jungshik Shin42d50272018-10-24 01:22:09 -0700118 : SimpleModifier(simpleFormatter, field, strong, {}) {}
119
120SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong,
121 const Modifier::Parameters parameters)
122 : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong),
123 fParameters(parameters) {
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700124 int32_t argLimit = SimpleFormatter::getArgumentLimit(
125 fCompiledPattern.getBuffer(), fCompiledPattern.length());
126 if (argLimit == 0) {
127 // No arguments in compiled pattern
Jungshik Shinb3189662017-11-07 11:18:34 -0800128 fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700129 U_ASSERT(2 + fPrefixLength == fCompiledPattern.length());
130 // Set suffixOffset = -1 to indicate no arguments in compiled pattern.
131 fSuffixOffset = -1;
Jungshik Shinb3189662017-11-07 11:18:34 -0800132 fSuffixLength = 0;
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700133 } else {
134 U_ASSERT(argLimit == 1);
135 if (fCompiledPattern.charAt(1) != 0) {
Jungshik Shin42d50272018-10-24 01:22:09 -0700136 // Found prefix
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700137 fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
138 fSuffixOffset = 3 + fPrefixLength;
139 } else {
Jungshik Shin42d50272018-10-24 01:22:09 -0700140 // No prefix
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700141 fPrefixLength = 0;
142 fSuffixOffset = 2;
143 }
144 if (3 + fPrefixLength < fCompiledPattern.length()) {
Jungshik Shin42d50272018-10-24 01:22:09 -0700145 // Found suffix
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700146 fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT;
147 } else {
Jungshik Shin42d50272018-10-24 01:22:09 -0700148 // No suffix
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700149 fSuffixLength = 0;
150 }
Jungshik Shinb3189662017-11-07 11:18:34 -0800151 }
152}
153
154SimpleModifier::SimpleModifier()
155 : fField(UNUM_FIELD_COUNT), fStrong(false), fPrefixLength(0), fSuffixLength(0) {
156}
157
158int32_t SimpleModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
159 UErrorCode &status) const {
160 return formatAsPrefixSuffix(output, leftIndex, rightIndex, fField, status);
161}
162
Jungshik Shin42d50272018-10-24 01:22:09 -0700163int32_t SimpleModifier::getPrefixLength() const {
Jungshik Shinb3189662017-11-07 11:18:34 -0800164 return fPrefixLength;
165}
166
Jungshik Shin42d50272018-10-24 01:22:09 -0700167int32_t SimpleModifier::getCodePointCount() const {
Jungshik Shinb3189662017-11-07 11:18:34 -0800168 int32_t count = 0;
169 if (fPrefixLength > 0) {
170 count += fCompiledPattern.countChar32(2, fPrefixLength);
171 }
172 if (fSuffixLength > 0) {
173 count += fCompiledPattern.countChar32(1 + fSuffixOffset, fSuffixLength);
174 }
175 return count;
176}
177
178bool SimpleModifier::isStrong() const {
179 return fStrong;
180}
181
Jungshik Shin42d50272018-10-24 01:22:09 -0700182bool SimpleModifier::containsField(UNumberFormatFields field) const {
183 (void)field;
184 // This method is not currently used.
185 U_ASSERT(false);
186 return false;
187}
188
189void SimpleModifier::getParameters(Parameters& output) const {
190 output = fParameters;
191}
192
193bool SimpleModifier::semanticallyEquivalent(const Modifier& other) const {
194 auto* _other = dynamic_cast<const SimpleModifier*>(&other);
195 if (_other == nullptr) {
196 return false;
197 }
198 if (fParameters.obj != nullptr) {
199 return fParameters.obj == _other->fParameters.obj;
200 }
201 return fCompiledPattern == _other->fCompiledPattern
202 && fField == _other->fField
203 && fStrong == _other->fStrong;
204}
205
206
Jungshik Shinb3189662017-11-07 11:18:34 -0800207int32_t
208SimpleModifier::formatAsPrefixSuffix(NumberStringBuilder &result, int32_t startIndex, int32_t endIndex,
209 Field field, UErrorCode &status) const {
Jungshik Shin42d50272018-10-24 01:22:09 -0700210 if (fSuffixOffset == -1 && fPrefixLength + fSuffixLength > 0) {
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700211 // There is no argument for the inner number; overwrite the entire segment with our string.
212 return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, field, status);
213 } else {
214 if (fPrefixLength > 0) {
215 result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, field, status);
216 }
217 if (fSuffixLength > 0) {
218 result.insert(
219 endIndex + fPrefixLength,
220 fCompiledPattern,
221 1 + fSuffixOffset,
222 1 + fSuffixOffset + fSuffixLength,
223 field,
224 status);
225 }
226 return fPrefixLength + fSuffixLength;
Jungshik Shinb3189662017-11-07 11:18:34 -0800227 }
Jungshik Shinb3189662017-11-07 11:18:34 -0800228}
229
Jungshik Shin42d50272018-10-24 01:22:09 -0700230
231int32_t
232SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, NumberStringBuilder& result,
233 int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength,
234 Field field, UErrorCode& status) {
235 const UnicodeString& compiledPattern = compiled.compiledPattern;
236 int32_t argLimit = SimpleFormatter::getArgumentLimit(
237 compiledPattern.getBuffer(), compiledPattern.length());
238 if (argLimit != 2) {
239 status = U_INTERNAL_PROGRAM_ERROR;
240 return 0;
241 }
242 int32_t offset = 1; // offset into compiledPattern
243 int32_t length = 0; // chars added to result
244
245 int32_t prefixLength = compiledPattern.charAt(offset);
246 offset++;
247 if (prefixLength < ARG_NUM_LIMIT) {
248 // No prefix
249 prefixLength = 0;
250 } else {
251 prefixLength -= ARG_NUM_LIMIT;
252 result.insert(index + length, compiledPattern, offset, offset + prefixLength, field, status);
253 offset += prefixLength;
254 length += prefixLength;
255 offset++;
256 }
257
258 int32_t infixLength = compiledPattern.charAt(offset);
259 offset++;
260 if (infixLength < ARG_NUM_LIMIT) {
261 // No infix
262 infixLength = 0;
263 } else {
264 infixLength -= ARG_NUM_LIMIT;
265 result.insert(index + length, compiledPattern, offset, offset + infixLength, field, status);
266 offset += infixLength;
267 length += infixLength;
268 offset++;
269 }
270
271 int32_t suffixLength;
272 if (offset == compiledPattern.length()) {
273 // No suffix
274 suffixLength = 0;
275 } else {
276 suffixLength = compiledPattern.charAt(offset) - ARG_NUM_LIMIT;
277 offset++;
278 result.insert(index + length, compiledPattern, offset, offset + suffixLength, field, status);
279 length += suffixLength;
280 }
281
282 *outPrefixLength = prefixLength;
283 *outSuffixLength = suffixLength;
284
285 return length;
286}
287
288
Jungshik Shinb3189662017-11-07 11:18:34 -0800289int32_t ConstantMultiFieldModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
290 UErrorCode &status) const {
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700291 int32_t length = output.insert(leftIndex, fPrefix, status);
292 if (fOverwrite) {
293 length += output.splice(
294 leftIndex + length,
295 rightIndex + length,
296 UnicodeString(), 0, 0,
297 UNUM_FIELD_COUNT, status);
298 }
299 length += output.insert(rightIndex + length, fSuffix, status);
Jungshik Shinb3189662017-11-07 11:18:34 -0800300 return length;
301}
302
Jungshik Shin42d50272018-10-24 01:22:09 -0700303int32_t ConstantMultiFieldModifier::getPrefixLength() const {
Jungshik Shinb3189662017-11-07 11:18:34 -0800304 return fPrefix.length();
305}
306
Jungshik Shin42d50272018-10-24 01:22:09 -0700307int32_t ConstantMultiFieldModifier::getCodePointCount() const {
Jungshik Shinb3189662017-11-07 11:18:34 -0800308 return fPrefix.codePointCount() + fSuffix.codePointCount();
309}
310
311bool ConstantMultiFieldModifier::isStrong() const {
312 return fStrong;
313}
314
Jungshik Shin42d50272018-10-24 01:22:09 -0700315bool ConstantMultiFieldModifier::containsField(UNumberFormatFields field) const {
316 return fPrefix.containsField(field) || fSuffix.containsField(field);
317}
318
319void ConstantMultiFieldModifier::getParameters(Parameters& output) const {
320 output = fParameters;
321}
322
323bool ConstantMultiFieldModifier::semanticallyEquivalent(const Modifier& other) const {
324 auto* _other = dynamic_cast<const ConstantMultiFieldModifier*>(&other);
325 if (_other == nullptr) {
326 return false;
327 }
328 if (fParameters.obj != nullptr) {
329 return fParameters.obj == _other->fParameters.obj;
330 }
331 return fPrefix.contentEquals(_other->fPrefix)
332 && fSuffix.contentEquals(_other->fSuffix)
333 && fOverwrite == _other->fOverwrite
334 && fStrong == _other->fStrong;
335}
336
337
Jungshik Shinb3189662017-11-07 11:18:34 -0800338CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const NumberStringBuilder &prefix,
339 const NumberStringBuilder &suffix,
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700340 bool overwrite,
Jungshik Shinb3189662017-11-07 11:18:34 -0800341 bool strong,
342 const DecimalFormatSymbols &symbols,
343 UErrorCode &status)
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700344 : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) {
Jungshik Shinb3189662017-11-07 11:18:34 -0800345 // Check for currency spacing. Do not build the UnicodeSets unless there is
346 // a currency code point at a boundary.
347 if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == UNUM_CURRENCY_FIELD) {
348 int prefixCp = prefix.getLastCodePoint();
349 UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status);
350 if (prefixUnicodeSet.contains(prefixCp)) {
351 fAfterPrefixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, PREFIX, status);
352 fAfterPrefixUnicodeSet.freeze();
353 fAfterPrefixInsert = getInsertString(symbols, PREFIX, status);
354 } else {
355 fAfterPrefixUnicodeSet.setToBogus();
356 fAfterPrefixInsert.setToBogus();
357 }
358 } else {
359 fAfterPrefixUnicodeSet.setToBogus();
360 fAfterPrefixInsert.setToBogus();
361 }
362 if (suffix.length() > 0 && suffix.fieldAt(0) == UNUM_CURRENCY_FIELD) {
363 int suffixCp = suffix.getLastCodePoint();
364 UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status);
365 if (suffixUnicodeSet.contains(suffixCp)) {
366 fBeforeSuffixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, SUFFIX, status);
367 fBeforeSuffixUnicodeSet.freeze();
368 fBeforeSuffixInsert = getInsertString(symbols, SUFFIX, status);
369 } else {
370 fBeforeSuffixUnicodeSet.setToBogus();
371 fBeforeSuffixInsert.setToBogus();
372 }
373 } else {
374 fBeforeSuffixUnicodeSet.setToBogus();
375 fBeforeSuffixInsert.setToBogus();
376 }
377}
378
379int32_t CurrencySpacingEnabledModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
380 UErrorCode &status) const {
381 // Currency spacing logic
382 int length = 0;
383 if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() &&
384 fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) {
385 // TODO: Should we use the CURRENCY field here?
386 length += output.insert(leftIndex, fAfterPrefixInsert, UNUM_FIELD_COUNT, status);
387 }
388 if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() &&
389 fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) {
390 // TODO: Should we use the CURRENCY field here?
391 length += output.insert(rightIndex + length, fBeforeSuffixInsert, UNUM_FIELD_COUNT, status);
392 }
393
394 // Call super for the remaining logic
395 length += ConstantMultiFieldModifier::apply(output, leftIndex, rightIndex + length, status);
396 return length;
397}
398
399int32_t
400CurrencySpacingEnabledModifier::applyCurrencySpacing(NumberStringBuilder &output, int32_t prefixStart,
401 int32_t prefixLen, int32_t suffixStart,
402 int32_t suffixLen,
403 const DecimalFormatSymbols &symbols,
404 UErrorCode &status) {
405 int length = 0;
406 bool hasPrefix = (prefixLen > 0);
407 bool hasSuffix = (suffixLen > 0);
408 bool hasNumber = (suffixStart - prefixStart - prefixLen > 0); // could be empty string
409 if (hasPrefix && hasNumber) {
410 length += applyCurrencySpacingAffix(output, prefixStart + prefixLen, PREFIX, symbols, status);
411 }
412 if (hasSuffix && hasNumber) {
413 length += applyCurrencySpacingAffix(output, suffixStart + length, SUFFIX, symbols, status);
414 }
415 return length;
416}
417
418int32_t
419CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(NumberStringBuilder &output, int32_t index,
420 EAffix affix,
421 const DecimalFormatSymbols &symbols,
422 UErrorCode &status) {
423 // NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix.
424 // This works even if the last code point in the prefix is 2 code units because the
425 // field value gets populated to both indices in the field array.
426 Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index);
427 if (affixField != UNUM_CURRENCY_FIELD) {
428 return 0;
429 }
430 int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index);
431 UnicodeSet affixUniset = getUnicodeSet(symbols, IN_CURRENCY, affix, status);
432 if (!affixUniset.contains(affixCp)) {
433 return 0;
434 }
435 int numberCp = (affix == PREFIX) ? output.codePointAt(index) : output.codePointBefore(index);
436 UnicodeSet numberUniset = getUnicodeSet(symbols, IN_NUMBER, affix, status);
437 if (!numberUniset.contains(numberCp)) {
438 return 0;
439 }
440 UnicodeString spacingString = getInsertString(symbols, affix, status);
441
442 // NOTE: This next line *inserts* the spacing string, triggering an arraycopy.
443 // It would be more efficient if this could be done before affixes were attached,
444 // so that it could be prepended/appended instead of inserted.
445 // However, the build code path is more efficient, and this is the most natural
446 // place to put currency spacing in the non-build code path.
447 // TODO: Should we use the CURRENCY field here?
448 return output.insert(index, spacingString, UNUM_FIELD_COUNT, status);
449}
450
451UnicodeSet
452CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position,
453 EAffix affix, UErrorCode &status) {
454 // Ensure the static defaults are initialized:
455 umtx_initOnce(gDefaultCurrencySpacingInitOnce, &initDefaultCurrencySpacing, status);
456 if (U_FAILURE(status)) {
457 return UnicodeSet();
458 }
459
460 const UnicodeString& pattern = symbols.getPatternForCurrencySpacing(
461 position == IN_CURRENCY ? UNUM_CURRENCY_MATCH : UNUM_CURRENCY_SURROUNDING_MATCH,
462 affix == SUFFIX,
463 status);
464 if (pattern.compare(u"[:digit:]", -1) == 0) {
465 return *UNISET_DIGIT;
466 } else if (pattern.compare(u"[:^S:]", -1) == 0) {
467 return *UNISET_NOTS;
468 } else {
469 return UnicodeSet(pattern, status);
470 }
471}
472
473UnicodeString
474CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols &symbols, EAffix affix,
475 UErrorCode &status) {
476 return symbols.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, affix == SUFFIX, status);
477}
478
479#endif /* #if !UCONFIG_NO_FORMATTING */