Blame - source/test/intltest/tokiter.cpp - chromium.googlesource.com/chromium/deps/icu

blob: 42809736f16f35b68e1739fa6728ecb96b5e3ec2 [file] [log] [blame]

Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame^]	1	// © 2016 and later: Unicode, Inc. and others.
				2	// License & terms of use: http://www.unicode.org/copyright.html
				3	/*
				4	**********************************************************************
				5	* Copyright (c) 2004-2011, International Business Machines
				6	* Corporation and others. All Rights Reserved.
				7	**********************************************************************
				8	* Author: Alan Liu
				9	* Created: March 22 2004
				10	* Since: ICU 3.0
				11	**********************************************************************
				12	*/
				13	#include "tokiter.h"
				14	#include "textfile.h"
				15	#include "patternprops.h"
				16	#include "util.h"
				17	#include "uprops.h"
				18
				19	TokenIterator::TokenIterator(TextFile* r) {
				20	reader = r;
				21	done = haveLine = FALSE;
				22	pos = lastpos = -1;
				23	}
				24
				25	TokenIterator::~TokenIterator() {
				26	}
				27
				28	UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
				29	if (done \|\| U_FAILURE(ec)) {
				30	return FALSE;
				31	}
				32	token.truncate(0);
				33	for (;;) {
				34	if (!haveLine) {
				35	if (!reader->readLineSkippingComments(line, ec)) {
				36	done = TRUE;
				37	return FALSE;
				38	}
				39	haveLine = TRUE;
				40	pos = 0;
				41	}
				42	lastpos = pos;
				43	if (!nextToken(token, ec)) {
				44	haveLine = FALSE;
				45	if (U_FAILURE(ec)) return FALSE;
				46	continue;
				47	}
				48	return TRUE;
				49	}
				50	}
				51
				52	int32_t TokenIterator::getLineNumber() const {
				53	return reader->getLineNumber();
				54	}
				55
				56	/**
				57	* Read the next token from 'this->line' and append it to 'token'.
				58	* Tokens are separated by Pattern_White_Space. Tokens may also be
				59	* delimited by double or single quotes. The closing quote must match
				60	* the opening quote. If a '#' is encountered, the rest of the line
				61	* is ignored, unless it is backslash-escaped or within quotes.
				62	* @param token the token is appended to this StringBuffer
				63	* @param ec input-output error code
				64	* @return TRUE if a valid token is found, or FALSE if the end
				65	* of the line is reached or an error occurs
				66	*/
				67	UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
				68	ICU_Utility::skipWhitespace(line, pos, TRUE);
				69	if (pos == line.length()) {
				70	return FALSE;
				71	}
				72	UChar c = line.charAt(pos++);
				73	UChar quote = 0;
				74	switch (c) {
				75	case 34/'"'/:
				76	case 39/'\\'/:
				77	quote = c;
				78	break;
				79	case 35/'#'/:
				80	return FALSE;
				81	default:
				82	token.append(c);
				83	break;
				84	}
				85	while (pos < line.length()) {
				86	c = line.charAt(pos); // 16-bit ok
				87	if (c == 92/'\\'/) {
				88	UChar32 c32 = line.unescapeAt(pos);
				89	if (c32 < 0) {
				90	ec = U_MALFORMED_UNICODE_ESCAPE;
				91	return FALSE;
				92	}
				93	token.append(c32);
				94	} else if ((quote != 0 && c == quote) \|\|
				95	(quote == 0 && PatternProps::isWhiteSpace(c))) {
				96	++pos;
				97	return TRUE;
				98	} else if (quote == 0 && c == '#') {
				99	return TRUE; // do NOT increment
				100	} else {
				101	token.append(c);
				102	++pos;
				103	}
				104	}
				105	if (quote != 0) {
				106	ec = U_UNTERMINATED_QUOTE;
				107	return FALSE;
				108	}
				109	return TRUE;
				110	}