blob: a98b833ef08c548fd63f69ae1570791c2ee08678 [file] [log] [blame]
Frank Tang3e05d9d2021-11-08 14:04:04 -08001// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (c) 2004-2011, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8* Author: Alan Liu
9* Created: March 22 2004
10* Since: ICU 3.0
11**********************************************************************
12*/
13#include "tokiter.h"
14#include "textfile.h"
15#include "patternprops.h"
16#include "util.h"
17#include "uprops.h"
18
19TokenIterator::TokenIterator(TextFile* r) {
20 reader = r;
Frank Tang1f164ee2022-11-08 12:31:27 -080021 done = haveLine = false;
Frank Tang3e05d9d2021-11-08 14:04:04 -080022 pos = lastpos = -1;
23}
24
25TokenIterator::~TokenIterator() {
26}
27
28UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
29 if (done || U_FAILURE(ec)) {
Frank Tang1f164ee2022-11-08 12:31:27 -080030 return false;
Frank Tang3e05d9d2021-11-08 14:04:04 -080031 }
32 token.truncate(0);
33 for (;;) {
34 if (!haveLine) {
35 if (!reader->readLineSkippingComments(line, ec)) {
Frank Tang1f164ee2022-11-08 12:31:27 -080036 done = true;
37 return false;
Frank Tang3e05d9d2021-11-08 14:04:04 -080038 }
Frank Tang1f164ee2022-11-08 12:31:27 -080039 haveLine = true;
Frank Tang3e05d9d2021-11-08 14:04:04 -080040 pos = 0;
41 }
42 lastpos = pos;
43 if (!nextToken(token, ec)) {
Frank Tang1f164ee2022-11-08 12:31:27 -080044 haveLine = false;
45 if (U_FAILURE(ec)) return false;
Frank Tang3e05d9d2021-11-08 14:04:04 -080046 continue;
47 }
Frank Tang1f164ee2022-11-08 12:31:27 -080048 return true;
Frank Tang3e05d9d2021-11-08 14:04:04 -080049 }
50}
51
52int32_t TokenIterator::getLineNumber() const {
53 return reader->getLineNumber();
54}
55
56/**
57 * Read the next token from 'this->line' and append it to 'token'.
58 * Tokens are separated by Pattern_White_Space. Tokens may also be
59 * delimited by double or single quotes. The closing quote must match
60 * the opening quote. If a '#' is encountered, the rest of the line
61 * is ignored, unless it is backslash-escaped or within quotes.
62 * @param token the token is appended to this StringBuffer
63 * @param ec input-output error code
Frank Tang1f164ee2022-11-08 12:31:27 -080064 * @return true if a valid token is found, or false if the end
Frank Tang3e05d9d2021-11-08 14:04:04 -080065 * of the line is reached or an error occurs
66 */
67UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
Frank Tang1f164ee2022-11-08 12:31:27 -080068 ICU_Utility::skipWhitespace(line, pos, true);
Frank Tang3e05d9d2021-11-08 14:04:04 -080069 if (pos == line.length()) {
Frank Tang1f164ee2022-11-08 12:31:27 -080070 return false;
Frank Tang3e05d9d2021-11-08 14:04:04 -080071 }
72 UChar c = line.charAt(pos++);
73 UChar quote = 0;
74 switch (c) {
75 case 34/*'"'*/:
76 case 39/*'\\'*/:
77 quote = c;
78 break;
79 case 35/*'#'*/:
Frank Tang1f164ee2022-11-08 12:31:27 -080080 return false;
Frank Tang3e05d9d2021-11-08 14:04:04 -080081 default:
82 token.append(c);
83 break;
84 }
85 while (pos < line.length()) {
86 c = line.charAt(pos); // 16-bit ok
87 if (c == 92/*'\\'*/) {
88 UChar32 c32 = line.unescapeAt(pos);
89 if (c32 < 0) {
90 ec = U_MALFORMED_UNICODE_ESCAPE;
Frank Tang1f164ee2022-11-08 12:31:27 -080091 return false;
Frank Tang3e05d9d2021-11-08 14:04:04 -080092 }
93 token.append(c32);
94 } else if ((quote != 0 && c == quote) ||
95 (quote == 0 && PatternProps::isWhiteSpace(c))) {
96 ++pos;
Frank Tang1f164ee2022-11-08 12:31:27 -080097 return true;
Frank Tang3e05d9d2021-11-08 14:04:04 -080098 } else if (quote == 0 && c == '#') {
Frank Tang1f164ee2022-11-08 12:31:27 -080099 return true; // do NOT increment
Frank Tang3e05d9d2021-11-08 14:04:04 -0800100 } else {
101 token.append(c);
102 ++pos;
103 }
104 }
105 if (quote != 0) {
106 ec = U_UNTERMINATED_QUOTE;
Frank Tang1f164ee2022-11-08 12:31:27 -0800107 return false;
Frank Tang3e05d9d2021-11-08 14:04:04 -0800108 }
Frank Tang1f164ee2022-11-08 12:31:27 -0800109 return true;
Frank Tang3e05d9d2021-11-08 14:04:04 -0800110}