Blame - source/common/rbbi.cpp - chromium.googlesource.com/chromium/deps/icu

blob: 54b289e24d1e0affac7f32ec140046c1b57b581d [file] [log] [blame]

Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1	// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2	// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	3	/*
				4	***************************************************************************
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	5	* Copyright (C) 1999-2016 International Business Machines Corporation
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	6	* and others. All rights reserved.
				7	***************************************************************************
				8	*/
				9	//
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	10	// file: rbbi.cpp Contains the implementation of the rule based break iterator
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	11	// runtime engine and the API implementation for
				12	// class RuleBasedBreakIterator
				13	//
				14
				15	#include "utypeinfo.h" // for 'typeid' to work
				16
				17	#include "unicode/utypes.h"
				18
				19	#if !UCONFIG_NO_BREAK_ITERATION
				20
				21	#include "unicode/rbbi.h"
				22	#include "unicode/schriter.h"
				23	#include "unicode/uchriter.h"
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	24	#include "unicode/uclean.h"
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	25	#include "unicode/udata.h"
				26
				27	#include "brkeng.h"
				28	#include "ucln_cmn.h"
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	29	#include "cmemory.h"
				30	#include "cstring.h"
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	31	#include "rbbidata.h"
				32	#include "rbbi_cache.h"
				33	#include "rbbirb.h"
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	34	#include "uassert.h"
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	35	#include "umutex.h"
				36	#include "uvectr32.h"
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	37
				38	// if U_LOCAL_SERVICE_HOOK is defined, then localsvc.cpp is expected to be included.
				39	#if U_LOCAL_SERVICE_HOOK
				40	#include "localsvc.h"
				41	#endif
				42
				43	#ifdef RBBI_DEBUG
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	44	static UBool gTrace = FALSE;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	45	#endif
				46
				47	U_NAMESPACE_BEGIN
				48
				49	// The state number of the starting state
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	50	constexpr int32_t START_STATE = 1;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	51
				52	// The state-transition value indicating "stop"
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	53	constexpr int32_t STOP_STATE = 0;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	54
				55
				56	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedBreakIterator)
				57
				58
				59	//=======================================================================
				60	// constructors
				61	//=======================================================================
				62
				63	/**
				64	* Constructs a RuleBasedBreakIterator that uses the already-created
				65	* tables object that is passed in as a parameter.
				66	*/
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	67	RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status) {
				68	init(status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	69	fData = new RBBIDataWrapper(data, status); // status checked in constructor
				70	if (U_FAILURE(status)) {return;}
				71	if(fData == 0) {
				72	status = U_MEMORY_ALLOCATION_ERROR;
				73	return;
				74	}
				75	}
				76
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	77	//
				78	// Construct from precompiled binary rules (tables). This constructor is public API,
				79	// taking the rules as a (const uint8_t *) to match the type produced by getBinaryRules().
				80	//
				81	RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules,
				82	uint32_t ruleLength,
				83	UErrorCode &status) {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	84	init(status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	85	if (U_FAILURE(status)) {
				86	return;
				87	}
				88	if (compiledRules == NULL \|\| ruleLength < sizeof(RBBIDataHeader)) {
				89	status = U_ILLEGAL_ARGUMENT_ERROR;
				90	return;
				91	}
				92	const RBBIDataHeader data = (const RBBIDataHeader )compiledRules;
				93	if (data->fLength > ruleLength) {
				94	status = U_ILLEGAL_ARGUMENT_ERROR;
				95	return;
				96	}
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	97	fData = new RBBIDataWrapper(data, RBBIDataWrapper::kDontAdopt, status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	98	if (U_FAILURE(status)) {return;}
				99	if(fData == 0) {
				100	status = U_MEMORY_ALLOCATION_ERROR;
				101	return;
				102	}
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	103	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	104
				105
				106	//-------------------------------------------------------------------------------
				107	//
				108	// Constructor from a UDataMemory handle to precompiled break rules
				109	// stored in an ICU data file.
				110	//
				111	//-------------------------------------------------------------------------------
				112	RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status)
				113	{
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	114	init(status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	115	fData = new RBBIDataWrapper(udm, status); // status checked in constructor
				116	if (U_FAILURE(status)) {return;}
				117	if(fData == 0) {
				118	status = U_MEMORY_ALLOCATION_ERROR;
				119	return;
				120	}
				121	}
				122
				123
				124
				125	//-------------------------------------------------------------------------------
				126	//
				127	// Constructor from a set of rules supplied as a string.
				128	//
				129	//-------------------------------------------------------------------------------
				130	RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules,
				131	UParseError &parseError,
				132	UErrorCode &status)
				133	{
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	134	init(status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	135	if (U_FAILURE(status)) {return;}
				136	RuleBasedBreakIterator bi = (RuleBasedBreakIterator )
				137	RBBIRuleBuilder::createRuleBasedBreakIterator(rules, &parseError, status);
				138	// Note: This is a bit awkward. The RBBI ruleBuilder has a factory method that
				139	// creates and returns a complete RBBI. From here, in a constructor, we
				140	// can't just return the object created by the builder factory, hence
				141	// the assignment of the factory created object to "this".
				142	if (U_SUCCESS(status)) {
				143	this = bi;
				144	delete bi;
				145	}
				146	}
				147
				148
				149	//-------------------------------------------------------------------------------
				150	//
				151	// Default Constructor. Create an empty shell that can be set up later.
				152	// Used when creating a RuleBasedBreakIterator from a set
				153	// of rules.
				154	//-------------------------------------------------------------------------------
				155	RuleBasedBreakIterator::RuleBasedBreakIterator() {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	156	UErrorCode status = U_ZERO_ERROR;
				157	init(status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	158	}
				159
				160
				161	//-------------------------------------------------------------------------------
				162	//
				163	// Copy constructor. Will produce a break iterator with the same behavior,
				164	// and which iterates over the same text, as the one passed in.
				165	//
				166	//-------------------------------------------------------------------------------
				167	RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other)
				168	: BreakIterator(other)
				169	{
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	170	UErrorCode status = U_ZERO_ERROR;
				171	this->init(status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	172	*this = other;
				173	}
				174
				175
				176	/**
				177	* Destructor
				178	*/
				179	RuleBasedBreakIterator::~RuleBasedBreakIterator() {
				180	if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
				181	// fCharIter was adopted from the outside.
				182	delete fCharIter;
				183	}
				184	fCharIter = NULL;
				185	delete fSCharIter;
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	186	fSCharIter = NULL;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	187	delete fDCharIter;
				188	fDCharIter = NULL;
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	189
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	190	utext_close(fText);
				191
				192	if (fData != NULL) {
				193	fData->removeReference();
				194	fData = NULL;
				195	}
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	196	delete fBreakCache;
				197	fBreakCache = NULL;
				198
				199	delete fDictionaryCache;
				200	fDictionaryCache = NULL;
				201
				202	delete fLanguageBreakEngines;
				203	fLanguageBreakEngines = NULL;
				204
				205	delete fUnhandledBreakEngine;
				206	fUnhandledBreakEngine = NULL;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	207	}
				208
				209	/**
				210	* Assignment operator. Sets this iterator to have the same behavior,
				211	* and iterate over the same text, as the one passed in.
				212	*/
				213	RuleBasedBreakIterator&
				214	RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
				215	if (this == &that) {
				216	return *this;
				217	}
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	218	BreakIterator::operator=(that);
				219
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	220	fBreakType = that.fBreakType;
				221	if (fLanguageBreakEngines != NULL) {
				222	delete fLanguageBreakEngines;
				223	fLanguageBreakEngines = NULL; // Just rebuild for now
				224	}
				225	// TODO: clone fLanguageBreakEngines from "that"
				226	UErrorCode status = U_ZERO_ERROR;
				227	fText = utext_clone(fText, that.fText, FALSE, TRUE, &status);
				228
				229	if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
				230	delete fCharIter;
				231	}
				232	fCharIter = NULL;
				233
				234	if (that.fCharIter != NULL ) {
				235	// This is a little bit tricky - it will intially appear that
				236	// this->fCharIter is adopted, even if that->fCharIter was
				237	// not adopted. That's ok.
				238	fCharIter = that.fCharIter->clone();
				239	}
				240
				241	if (fData != NULL) {
				242	fData->removeReference();
				243	fData = NULL;
				244	}
				245	if (that.fData != NULL) {
				246	fData = that.fData->addReference();
				247	}
				248
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	249	fPosition = that.fPosition;
				250	fRuleStatusIndex = that.fRuleStatusIndex;
				251	fDone = that.fDone;
				252
				253	// TODO: both the dictionary and the main cache need to be copied.
				254	// Current position could be within a dictionary range. Trying to continue
				255	// the iteration without the caches present would go to the rules, with
				256	// the assumption that the current position is on a rule boundary.
				257	fBreakCache->reset(fPosition, fRuleStatusIndex);
				258	fDictionaryCache->reset();
				259
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	260	return *this;
				261	}
				262
				263
				264
				265	//-----------------------------------------------------------------------------
				266	//
				267	// init() Shared initialization routine. Used by all the constructors.
				268	// Initializes all fields, leaving the object in a consistent state.
				269	//
				270	//-----------------------------------------------------------------------------
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	271	void RuleBasedBreakIterator::init(UErrorCode &status) {
				272	fText = NULL;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	273	fCharIter = NULL;
				274	fSCharIter = NULL;
				275	fDCharIter = NULL;
				276	fData = NULL;
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	277	fPosition = 0;
				278	fRuleStatusIndex = 0;
				279	fDone = false;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	280	fDictionaryCharCount = 0;
				281	fBreakType = UBRK_WORD; // Defaulting BreakType to word gives reasonable
				282	// dictionary behavior for Break Iterators that are
				283	// built from rules. Even better would be the ability to
				284	// declare the type in the rules.
				285
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	286	fLanguageBreakEngines = NULL;
				287	fUnhandledBreakEngine = NULL;
				288	fBreakCache = NULL;
				289	fDictionaryCache = NULL;
				290
				291	if (U_FAILURE(status)) {
				292	return;
				293	}
				294
				295	fText = utext_openUChars(NULL, NULL, 0, &status);
				296	fDictionaryCache = new DictionaryCache(this, status);
				297	fBreakCache = new BreakCache(this, status);
				298	if (U_SUCCESS(status) && (fText == NULL \|\| fDictionaryCache == NULL \|\| fBreakCache == NULL)) {
				299	status = U_MEMORY_ALLOCATION_ERROR;
				300	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	301
				302	#ifdef RBBI_DEBUG
				303	static UBool debugInitDone = FALSE;
				304	if (debugInitDone == FALSE) {
				305	char *debugEnv = getenv("U_RBBIDEBUG");
				306	if (debugEnv && uprv_strstr(debugEnv, "trace")) {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	307	gTrace = TRUE;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	308	}
				309	debugInitDone = TRUE;
				310	}
				311	#endif
				312	}
				313
				314
				315
				316	//-----------------------------------------------------------------------------
				317	//
				318	// clone - Returns a newly-constructed RuleBasedBreakIterator with the same
				319	// behavior, and iterating over the same text, as this one.
				320	// Virtual function: does the right thing with subclasses.
				321	//
				322	//-----------------------------------------------------------------------------
				323	BreakIterator*
				324	RuleBasedBreakIterator::clone(void) const {
				325	return new RuleBasedBreakIterator(*this);
				326	}
				327
				328	/**
				329	* Equality operator. Returns TRUE if both BreakIterators are of the
				330	* same class, have the same behavior, and iterate over the same text.
				331	*/
				332	UBool
				333	RuleBasedBreakIterator::operator==(const BreakIterator& that) const {
				334	if (typeid(*this) != typeid(that)) {
				335	return FALSE;
				336	}
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	337	if (this == &that) {
				338	return TRUE;
				339	}
				340
				341	// The base class BreakIterator carries no state that participates in equality,
				342	// and does not implement an equality function that would otherwise be
				343	// checked at this point.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	344
				345	const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&) that;
				346
				347	if (!utext_equals(fText, that2.fText)) {
				348	// The two break iterators are operating on different text,
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	349	// or have a different iteration position.
				350	// Note that fText's position is always the same as the break iterator's position.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	351	return FALSE;
				352	};
				353
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	354	if (!(fPosition == that2.fPosition &&
				355	fRuleStatusIndex == that2.fRuleStatusIndex &&
				356	fDone == that2.fDone)) {
				357	return FALSE;
				358	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	359
				360	if (that2.fData == fData \|\|
				361	(fData != NULL && that2.fData != NULL && that2.fData == fData)) {
				362	// The two break iterators are using the same rules.
				363	return TRUE;
				364	}
				365	return FALSE;
				366	}
				367
				368	/**
				369	* Compute a hash code for this BreakIterator
				370	* @return A hash code
				371	*/
				372	int32_t
				373	RuleBasedBreakIterator::hashCode(void) const {
				374	int32_t hash = 0;
				375	if (fData != NULL) {
				376	hash = fData->hashCode();
				377	}
				378	return hash;
				379	}
				380
				381
				382	void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) {
				383	if (U_FAILURE(status)) {
				384	return;
				385	}
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	386	fBreakCache->reset();
				387	fDictionaryCache->reset();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	388	fText = utext_clone(fText, ut, FALSE, TRUE, &status);
				389
				390	// Set up a dummy CharacterIterator to be returned if anyone
				391	// calls getText(). With input from UText, there is no reasonable
				392	// way to return a characterIterator over the actual input text.
				393	// Return one over an empty string instead - this is the closest
				394	// we can come to signaling a failure.
				395	// (GetText() is obsolete, this failure is sort of OK)
				396	if (fDCharIter == NULL) {
				397	static const UChar c = 0;
				398	fDCharIter = new UCharCharacterIterator(&c, 0);
				399	if (fDCharIter == NULL) {
				400	status = U_MEMORY_ALLOCATION_ERROR;
				401	return;
				402	}
				403	}
				404
				405	if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
				406	// existing fCharIter was adopted from the outside. Delete it now.
				407	delete fCharIter;
				408	}
				409	fCharIter = fDCharIter;
				410
				411	this->first();
				412	}
				413
				414
				415	UText RuleBasedBreakIterator::getUText(UText fillIn, UErrorCode &status) const {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	416	UText *result = utext_clone(fillIn, fText, FALSE, TRUE, &status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	417	return result;
				418	}
				419
				420
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	421	//=======================================================================
				422	// BreakIterator overrides
				423	//=======================================================================
				424
				425	/**
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	426	* Return a CharacterIterator over the text being analyzed.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	427	*/
				428	CharacterIterator&
				429	RuleBasedBreakIterator::getText() const {
				430	return *fCharIter;
				431	}
				432
				433	/**
				434	* Set the iterator to analyze a new piece of text. This function resets
				435	* the current iteration position to the beginning of the text.
				436	* @param newText An iterator over the text to analyze.
				437	*/
				438	void
				439	RuleBasedBreakIterator::adoptText(CharacterIterator* newText) {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	440	// If we are holding a CharacterIterator adopted from a
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	441	// previous call to this function, delete it now.
				442	if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
				443	delete fCharIter;
				444	}
				445
				446	fCharIter = newText;
				447	UErrorCode status = U_ZERO_ERROR;
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	448	fBreakCache->reset();
				449	fDictionaryCache->reset();
				450	if (newText==NULL \|\| newText->startIndex() != 0) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	451	// startIndex !=0 wants to be an error, but there's no way to report it.
				452	// Make the iterator text be an empty string.
				453	fText = utext_openUChars(fText, NULL, 0, &status);
				454	} else {
				455	fText = utext_openCharacterIterator(fText, newText, &status);
				456	}
				457	this->first();
				458	}
				459
				460	/**
				461	* Set the iterator to analyze a new piece of text. This function resets
				462	* the current iteration position to the beginning of the text.
				463	* @param newText An iterator over the text to analyze.
				464	*/
				465	void
				466	RuleBasedBreakIterator::setText(const UnicodeString& newText) {
				467	UErrorCode status = U_ZERO_ERROR;
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	468	fBreakCache->reset();
				469	fDictionaryCache->reset();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	470	fText = utext_openConstUnicodeString(fText, &newText, &status);
				471
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	472	// Set up a character iterator on the string.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	473	// Needed in case someone calls getText().
				474	// Can not, unfortunately, do this lazily on the (probably never)
				475	// call to getText(), because getText is const.
				476	if (fSCharIter == NULL) {
				477	fSCharIter = new StringCharacterIterator(newText);
				478	} else {
				479	fSCharIter->setText(newText);
				480	}
				481
				482	if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
				483	// old fCharIter was adopted from the outside. Delete it.
				484	delete fCharIter;
				485	}
				486	fCharIter = fSCharIter;
				487
				488	this->first();
				489	}
				490
				491
				492	/**
				493	* Provide a new UText for the input text. Must reference text with contents identical
				494	* to the original.
				495	* Intended for use with text data originating in Java (garbage collected) environments
				496	* where the data may be moved in memory at arbitrary times.
				497	*/
				498	RuleBasedBreakIterator &RuleBasedBreakIterator::refreshInputText(UText *input, UErrorCode &status) {
				499	if (U_FAILURE(status)) {
				500	return *this;
				501	}
				502	if (input == NULL) {
				503	status = U_ILLEGAL_ARGUMENT_ERROR;
				504	return *this;
				505	}
				506	int64_t pos = utext_getNativeIndex(fText);
				507	// Shallow read-only clone of the new UText into the existing input UText
				508	fText = utext_clone(fText, input, FALSE, TRUE, &status);
				509	if (U_FAILURE(status)) {
				510	return *this;
				511	}
				512	utext_setNativeIndex(fText, pos);
				513	if (utext_getNativeIndex(fText) != pos) {
				514	// Sanity check. The new input utext is supposed to have the exact same
				515	// contents as the old. If we can't set to the same position, it doesn't.
				516	// The contents underlying the old utext might be invalid at this point,
				517	// so it's not safe to check directly.
				518	status = U_ILLEGAL_ARGUMENT_ERROR;
				519	}
				520	return *this;
				521	}
				522
				523
				524	/**
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	525	* Sets the current iteration position to the beginning of the text, position zero.
				526	* @return The new iterator position, which is zero.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	527	*/
				528	int32_t RuleBasedBreakIterator::first(void) {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	529	UErrorCode status = U_ZERO_ERROR;
				530	if (!fBreakCache->seek(0)) {
				531	fBreakCache->populateNear(0, status);
				532	}
				533	fBreakCache->current();
				534	U_ASSERT(fPosition == 0);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	535	return 0;
				536	}
				537
				538	/**
				539	* Sets the current iteration position to the end of the text.
				540	* @return The text's past-the-end offset.
				541	*/
				542	int32_t RuleBasedBreakIterator::last(void) {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	543	int32_t endPos = (int32_t)utext_nativeLength(fText);
				544	UBool endShouldBeBoundary = isBoundary(endPos); // Has side effect of setting iterator position.
				545	(void)endShouldBeBoundary;
				546	U_ASSERT(endShouldBeBoundary);
				547	U_ASSERT(fPosition == endPos);
				548	return endPos;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	549	}
				550
				551	/**
				552	* Advances the iterator either forward or backward the specified number of steps.
				553	* Negative values move backward, and positive values move forward. This is
				554	* equivalent to repeatedly calling next() or previous().
				555	* @param n The number of steps to move. The sign indicates the direction
				556	* (negative is backwards, and positive is forwards).
				557	* @return The character offset of the boundary position n boundaries away from
				558	* the current one.
				559	*/
				560	int32_t RuleBasedBreakIterator::next(int32_t n) {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	561	int32_t result = 0;
				562	if (n > 0) {
				563	for (; n > 0 && result != UBRK_DONE; --n) {
				564	result = next();
				565	}
				566	} else if (n < 0) {
				567	for (; n < 0 && result != UBRK_DONE; ++n) {
				568	result = previous();
				569	}
				570	} else {
				571	result = current();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	572	}
				573	return result;
				574	}
				575
				576	/**
				577	* Advances the iterator to the next boundary position.
				578	* @return The position of the first boundary after this one.
				579	*/
				580	int32_t RuleBasedBreakIterator::next(void) {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	581	fBreakCache->next();
				582	return fDone ? UBRK_DONE : fPosition;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	583	}
				584
				585	/**
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	586	* Move the iterator backwards, to the boundary preceding the current one.
				587	*
				588	* Starts from the current position within fText.
				589	* Starting position need not be on a boundary.
				590	*
				591	* @return The position of the boundary position immediately preceding the starting position.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	592	*/
				593	int32_t RuleBasedBreakIterator::previous(void) {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	594	UErrorCode status = U_ZERO_ERROR;
				595	fBreakCache->previous(status);
				596	return fDone ? UBRK_DONE : fPosition;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	597	}
				598
				599	/**
				600	* Sets the iterator to refer to the first boundary position following
				601	* the specified position.
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	602	* @param startPos The position from which to begin searching for a break position.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	603	* @return The position of the first break after the current position.
				604	*/
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	605	int32_t RuleBasedBreakIterator::following(int32_t startPos) {
				606	// if the supplied position is before the beginning, return the
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	607	// text's starting offset
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	608	if (startPos < 0) {
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	609	return first();
				610	}
				611
				612	// Move requested offset to a code point start. It might be on a trail surrogate,
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	613	// or on a trail byte if the input is UTF-8. Or it may be beyond the end of the text.
				614	utext_setNativeIndex(fText, startPos);
				615	startPos = (int32_t)utext_getNativeIndex(fText);
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	616
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	617	UErrorCode status = U_ZERO_ERROR;
				618	fBreakCache->following(startPos, status);
				619	return fDone ? UBRK_DONE : fPosition;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	620	}
				621
				622	/**
				623	* Sets the iterator to refer to the last boundary position before the
				624	* specified position.
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	625	* @param offset The position to begin searching for a break from.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	626	* @return The position of the last boundary before the starting position.
				627	*/
				628	int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	629	if (fText == NULL \|\| offset > utext_nativeLength(fText)) {
				630	return last();
				631	}
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	632
				633	// Move requested offset to a code point start. It might be on a trail surrogate,
				634	// or on a trail byte if the input is UTF-8.
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	635
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	636	utext_setNativeIndex(fText, offset);
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	637	int32_t adjustedOffset = utext_getNativeIndex(fText);
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	638
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	639	UErrorCode status = U_ZERO_ERROR;
				640	fBreakCache->preceding(adjustedOffset, status);
				641	return fDone ? UBRK_DONE : fPosition;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	642	}
				643
				644	/**
				645	* Returns true if the specfied position is a boundary position. As a side
				646	* effect, leaves the iterator pointing to the first boundary position at
				647	* or after "offset".
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	648	*
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	649	* @param offset the offset to check.
				650	* @return True if "offset" is a boundary position.
				651	*/
				652	UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	653	// out-of-range indexes are never boundary positions
				654	if (offset < 0) {
				655	first(); // For side effects on current position, tag values.
				656	return FALSE;
				657	}
				658
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	659	// Adjust offset to be on a code point boundary and not beyond the end of the text.
				660	// Note that isBoundary() is always be false for offsets that are not on code point boundaries.
				661	// But we still need the side effect of leaving iteration at the following boundary.
				662
				663	utext_setNativeIndex(fText, offset);
				664	int32_t adjustedOffset = utext_getNativeIndex(fText);
				665
				666	bool result = false;
				667	UErrorCode status = U_ZERO_ERROR;
				668	if (fBreakCache->seek(adjustedOffset) \|\| fBreakCache->populateNear(adjustedOffset, status)) {
				669	result = (fBreakCache->current() == offset);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	670	}
				671
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	672	if (result && adjustedOffset < offset && utext_char32At(fText, offset) == U_SENTINEL) {
				673	// Original offset is beyond the end of the text. Return FALSE, it's not a boundary,
				674	// but the iteration position remains set to the end of the text, which is a boundary.
				675	return FALSE;
				676	}
				677	if (!result) {
				678	// Not on a boundary. isBoundary() must leave iterator on the following boundary.
				679	// Cache->seek(), above, left us on the preceding boundary, so advance one.
				680	next();
				681	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	682	return result;
				683	}
				684
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	685
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	686	/**
				687	* Returns the current iteration position.
				688	* @return The current iteration position.
				689	*/
				690	int32_t RuleBasedBreakIterator::current(void) const {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	691	return fPosition;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	692	}
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	693
				694
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	695	//=======================================================================
				696	// implementation
				697	//=======================================================================
				698
				699	//
				700	// RBBIRunMode - the state machine runs an extra iteration at the beginning and end
				701	// of user text. A variable with this enum type keeps track of where we
				702	// are. The state machine only fetches user input while in the RUN mode.
				703	//
				704	enum RBBIRunMode {
				705	RBBI_START, // state machine processing is before first char of input
				706	RBBI_RUN, // state machine processing is in the user text
				707	RBBI_END // state machine processing is after end of user text.
				708	};
				709
				710
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	711	// Map from look-ahead break states (corresponds to rules) to boundary positions.
				712	// Allows multiple lookahead break rules to be in flight at the same time.
				713	//
				714	// This is a temporary approach for ICU 57. A better fix is to make the look-ahead numbers
				715	// in the state table be sequential, then we can just index an array. And the
				716	// table could also tell us in advance how big that array needs to be.
				717	//
				718	// Before ICU 57 there was just a single simple variable for a look-ahead match that
				719	// was in progress. Two rules at once did not work.
				720
				721	static const int32_t kMaxLookaheads = 8;
				722	struct LookAheadResults {
				723	int32_t fUsedSlotLimit;
				724	int32_t fPositions[8];
				725	int16_t fKeys[8];
				726
				727	LookAheadResults() : fUsedSlotLimit(0), fPositions(), fKeys() {};
				728
				729	int32_t getPosition(int16_t key) {
				730	for (int32_t i=0; i<fUsedSlotLimit; ++i) {
				731	if (fKeys[i] == key) {
				732	return fPositions[i];
				733	}
				734	}
				735	U_ASSERT(FALSE);
				736	return -1;
				737	}
				738
				739	void setPosition(int16_t key, int32_t position) {
				740	int32_t i;
				741	for (i=0; i<fUsedSlotLimit; ++i) {
				742	if (fKeys[i] == key) {
				743	fPositions[i] = position;
				744	return;
				745	}
				746	}
				747	if (i >= kMaxLookaheads) {
				748	U_ASSERT(FALSE);
				749	i = kMaxLookaheads - 1;
				750	}
				751	fKeys[i] = key;
				752	fPositions[i] = position;
				753	U_ASSERT(fUsedSlotLimit == i);
				754	fUsedSlotLimit = i + 1;
				755	}
				756	};
				757
				758
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	759	//-----------------------------------------------------------------------------------
				760	//
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	761	// handleNext()
				762	// Run the state machine to find a boundary
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	763	//
				764	//-----------------------------------------------------------------------------------
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	765	int32_t RuleBasedBreakIterator::handleNext() {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	766	int32_t state;
				767	uint16_t category = 0;
				768	RBBIRunMode mode;
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	769
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	770	RBBIStateTableRow *row;
				771	UChar32 c;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	772	LookAheadResults lookAheadMatches;
				773	int32_t result = 0;
				774	int32_t initialPosition = 0;
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	775	const RBBIStateTable *statetable = fData->fForwardTable;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	776	const char *tableData = statetable->fTableData;
				777	uint32_t tableRowLen = statetable->fRowLen;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	778	#ifdef RBBI_DEBUG
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	779	if (gTrace) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	780	RBBIDebugPuts("Handle Next pos char state category");
				781	}
				782	#endif
				783
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	784	// handleNext alway sets the break tag value.
				785	// Set the default for it.
				786	fRuleStatusIndex = 0;
				787
				788	fDictionaryCharCount = 0;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	789
				790	// if we're already at the end of the text, return DONE.
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	791	initialPosition = fPosition;
				792	UTEXT_SETNATIVEINDEX(fText, initialPosition);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	793	result = initialPosition;
				794	c = UTEXT_NEXT32(fText);
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	795	if (c==U_SENTINEL) {
				796	fDone = TRUE;
				797	return UBRK_DONE;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	798	}
				799
				800	// Set the initial state for the state machine
				801	state = START_STATE;
				802	row = (RBBIStateTableRow *)
				803	//(statetable->fTableData + (statetable->fRowLen * state));
				804	(tableData + tableRowLen * state);
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	805
				806
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	807	mode = RBBI_RUN;
				808	if (statetable->fFlags & RBBI_BOF_REQUIRED) {
				809	category = 2;
				810	mode = RBBI_START;
				811	}
				812
				813
				814	// loop until we reach the end of the text or transition to state 0
				815	//
				816	for (;;) {
				817	if (c == U_SENTINEL) {
				818	// Reached end of input string.
				819	if (mode == RBBI_END) {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	820	// We have already run the loop one last time with the
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	821	// character set to the psueudo {eof} value. Now it is time
				822	// to unconditionally bail out.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	823	break;
				824	}
				825	// Run the loop one last time with the fake end-of-input character category.
				826	mode = RBBI_END;
				827	category = 1;
				828	}
				829
				830	//
				831	// Get the char category. An incoming category of 1 or 2 means that
				832	// we are preset for doing the beginning or end of input, and
				833	// that we shouldn't get a category from an actual text input character.
				834	//
				835	if (mode == RBBI_RUN) {
				836	// look up the current character's character category, which tells us
				837	// which column in the state table to look at.
				838	// Note: the 16 in UTRIE_GET16 refers to the size of the data being returned,
				839	// not the size of the character going in, which is a UChar32.
				840	//
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	841	category = UTRIE2_GET16(fData->fTrie, c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	842
				843	// Check the dictionary bit in the character's category.
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	844	// Counter is only used by dictionary based iteration.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	845	// Chars that need to be handled by a dictionary have a flag bit set
				846	// in their category values.
				847	//
				848	if ((category & 0x4000) != 0) {
				849	fDictionaryCharCount++;
				850	// And off the dictionary flag bit.
				851	category &= ~0x4000;
				852	}
				853	}
				854
				855	#ifdef RBBI_DEBUG
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	856	if (gTrace) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	857	RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(fText));
				858	if (0x20<=c && c<0x7f) {
				859	RBBIDebugPrintf("\"%c\" ", c);
				860	} else {
				861	RBBIDebugPrintf("%5x ", c);
				862	}
				863	RBBIDebugPrintf("%3d %3d\n", state, category);
				864	}
				865	#endif
				866
				867	// State Transition - move machine to its next state
				868	//
				869
				870	// Note: fNextState is defined as uint16_t[2], but we are casting
				871	// a generated RBBI table to RBBIStateTableRow and some tables
				872	// actually have more than 2 categories.
				873	U_ASSERT(category<fData->fHeader->fCatCount);
				874	state = row->fNextState[category]; /Not accessing beyond memory/
				875	row = (RBBIStateTableRow *)
				876	// (statetable->fTableData + (statetable->fRowLen * state));
				877	(tableData + tableRowLen * state);
				878
				879
				880	if (row->fAccepting == -1) {
				881	// Match found, common case.
				882	if (mode != RBBI_START) {
				883	result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
				884	}
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	885	fRuleStatusIndex = row->fTagIdx; // Remember the break status (tag) values.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	886	}
				887
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	888	int16_t completedRule = row->fAccepting;
				889	if (completedRule > 0) {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	890	// Lookahead match is completed.
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	891	int32_t lookaheadResult = lookAheadMatches.getPosition(completedRule);
				892	if (lookaheadResult >= 0) {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	893	fRuleStatusIndex = row->fTagIdx;
				894	fPosition = lookaheadResult;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	895	return lookaheadResult;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	896	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	897	}
				898	int16_t rule = row->fLookAhead;
				899	if (rule != 0) {
				900	// At the position of a '/' in a look-ahead match. Record it.
				901	int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(fText);
				902	lookAheadMatches.setPosition(rule, pos);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	903	}
				904
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	905	if (state == STOP_STATE) {
				906	// This is the normal exit from the lookup state machine.
				907	// We have advanced through the string until it is certain that no
				908	// longer match is possible, no matter what characters follow.
				909	break;
				910	}
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	911
				912	// Advance to the next character.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	913	// If this is a beginning-of-input loop iteration, don't advance
				914	// the input position. The next iteration will be processing the
				915	// first real input character.
				916	if (mode == RBBI_RUN) {
				917	c = UTEXT_NEXT32(fText);
				918	} else {
				919	if (mode == RBBI_START) {
				920	mode = RBBI_RUN;
				921	}
				922	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	923	}
				924
				925	// The state machine is done. Check whether it found a match...
				926
				927	// If the iterator failed to advance in the match engine, force it ahead by one.
				928	// (This really indicates a defect in the break rules. They should always match
				929	// at least one character.)
				930	if (result == initialPosition) {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	931	utext_setNativeIndex(fText, initialPosition);
				932	utext_next32(fText);
				933	result = (int32_t)utext_getNativeIndex(fText);
				934	fRuleStatusIndex = 0;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	935	}
				936
				937	// Leave the iterator at our result position.
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	938	fPosition = result;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	939	#ifdef RBBI_DEBUG
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	940	if (gTrace) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	941	RBBIDebugPrintf("result = %d\n\n", result);
				942	}
				943	#endif
				944	return result;
				945	}
				946
				947
				948
				949	//-----------------------------------------------------------------------------------
				950	//
				951	// handlePrevious()
				952	//
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	953	// Iterate backwards using the safe reverse rules.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	954	// The logic of this function is very similar to handleNext(), above.
				955	//
				956	//-----------------------------------------------------------------------------------
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	957	int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	958	int32_t state;
				959	uint16_t category = 0;
				960	RBBIRunMode mode;
				961	RBBIStateTableRow *row;
				962	UChar32 c;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	963	LookAheadResults lookAheadMatches;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	964	int32_t result = 0;
				965	int32_t initialPosition = 0;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	966
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	967	const RBBIStateTable *stateTable = fData->fSafeRevTable;
				968	UTEXT_SETNATIVEINDEX(fText, fromPosition);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	969	#ifdef RBBI_DEBUG
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	970	if (gTrace) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	971	RBBIDebugPuts("Handle Previous pos char state category");
				972	}
				973	#endif
				974
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	975	// if we're already at the start of the text, return DONE.
				976	if (fText == NULL \|\| fData == NULL \|\| UTEXT_GETNATIVEINDEX(fText)==0) {
				977	return BreakIterator::DONE;
				978	}
				979
				980	// Set up the starting char.
				981	initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(fText);
				982	result = initialPosition;
				983	c = UTEXT_PREVIOUS32(fText);
				984
				985	// Set the initial state for the state machine
				986	state = START_STATE;
				987	row = (RBBIStateTableRow *)
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	988	(stateTable->fTableData + (stateTable->fRowLen * state));
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	989	category = 3;
				990	mode = RBBI_RUN;
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	991	if (stateTable->fFlags & RBBI_BOF_REQUIRED) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	992	category = 2;
				993	mode = RBBI_START;
				994	}
				995
				996
				997	// loop until we reach the start of the text or transition to state 0
				998	//
				999	for (;;) {
				1000	if (c == U_SENTINEL) {
				1001	// Reached end of input string.
				1002	if (mode == RBBI_END) {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1003	// We have already run the loop one last time with the
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1004	// character set to the psueudo {eof} value. Now it is time
				1005	// to unconditionally bail out.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1006	break;
				1007	}
				1008	// Run the loop one last time with the fake end-of-input character category.
				1009	mode = RBBI_END;
				1010	category = 1;
				1011	}
				1012
				1013	//
				1014	// Get the char category. An incoming category of 1 or 2 means that
				1015	// we are preset for doing the beginning or end of input, and
				1016	// that we shouldn't get a category from an actual text input character.
				1017	//
				1018	if (mode == RBBI_RUN) {
				1019	// look up the current character's character category, which tells us
				1020	// which column in the state table to look at.
				1021	// Note: the 16 in UTRIE_GET16 refers to the size of the data being returned,
				1022	// not the size of the character going in, which is a UChar32.
				1023	//
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1024	// And off the dictionary flag bit. For reverse iteration it is not used.
				1025	category = UTRIE2_GET16(fData->fTrie, c);
				1026	category &= ~0x4000;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1027	}
				1028
				1029	#ifdef RBBI_DEBUG
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1030	if (gTrace) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1031	RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(fText));
				1032	if (0x20<=c && c<0x7f) {
				1033	RBBIDebugPrintf("\"%c\" ", c);
				1034	} else {
				1035	RBBIDebugPrintf("%5x ", c);
				1036	}
				1037	RBBIDebugPrintf("%3d %3d\n", state, category);
				1038	}
				1039	#endif
				1040
				1041	// State Transition - move machine to its next state
				1042	//
				1043
				1044	// Note: fNextState is defined as uint16_t[2], but we are casting
				1045	// a generated RBBI table to RBBIStateTableRow and some tables
				1046	// actually have more than 2 categories.
				1047	U_ASSERT(category<fData->fHeader->fCatCount);
				1048	state = row->fNextState[category]; /Not accessing beyond memory/
				1049	row = (RBBIStateTableRow *)
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1050	(stateTable->fTableData + (stateTable->fRowLen * state));
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1051
				1052	if (row->fAccepting == -1) {
				1053	// Match found, common case.
				1054	result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
				1055	}
				1056
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1057	int16_t completedRule = row->fAccepting;
				1058	if (completedRule > 0) {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1059	// Lookahead match is completed.
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1060	int32_t lookaheadResult = lookAheadMatches.getPosition(completedRule);
				1061	if (lookaheadResult >= 0) {
				1062	UTEXT_SETNATIVEINDEX(fText, lookaheadResult);
				1063	return lookaheadResult;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1064	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1065	}
				1066	int16_t rule = row->fLookAhead;
				1067	if (rule != 0) {
				1068	// At the position of a '/' in a look-ahead match. Record it.
				1069	int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(fText);
				1070	lookAheadMatches.setPosition(rule, pos);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1071	}
				1072
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1073	if (state == STOP_STATE) {
				1074	// This is the normal exit from the lookup state machine.
				1075	// We have advanced through the string until it is certain that no
				1076	// longer match is possible, no matter what characters follow.
				1077	break;
				1078	}
				1079
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1080	// Move (backwards) to the next character to process.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1081	// If this is a beginning-of-input loop iteration, don't advance
				1082	// the input position. The next iteration will be processing the
				1083	// first real input character.
				1084	if (mode == RBBI_RUN) {
				1085	c = UTEXT_PREVIOUS32(fText);
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1086	} else {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1087	if (mode == RBBI_START) {
				1088	mode = RBBI_RUN;
				1089	}
				1090	}
				1091	}
				1092
				1093	// The state machine is done. Check whether it found a match...
				1094
				1095	// If the iterator failed to advance in the match engine, force it ahead by one.
				1096	// (This really indicates a defect in the break rules. They should always match
				1097	// at least one character.)
				1098	if (result == initialPosition) {
				1099	UTEXT_SETNATIVEINDEX(fText, initialPosition);
				1100	UTEXT_PREVIOUS32(fText);
				1101	result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
				1102	}
				1103
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1104	#ifdef RBBI_DEBUG
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1105	if (gTrace) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1106	RBBIDebugPrintf("result = %d\n\n", result);
				1107	}
				1108	#endif
				1109	return result;
				1110	}
				1111
				1112
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1113	//-------------------------------------------------------------------------------
				1114	//
				1115	// getRuleStatus() Return the break rule tag associated with the current
				1116	// iterator position. If the iterator arrived at its current
				1117	// position by iterating forwards, the value will have been
				1118	// cached by the handleNext() function.
				1119	//
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1120	//-------------------------------------------------------------------------------
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1121
				1122	int32_t RuleBasedBreakIterator::getRuleStatus() const {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1123
				1124	// fLastRuleStatusIndex indexes to the start of the appropriate status record
				1125	// (the number of status values.)
				1126	// This function returns the last (largest) of the array of status values.
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1127	int32_t idx = fRuleStatusIndex + fData->fRuleStatusTable[fRuleStatusIndex];
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1128	int32_t tagVal = fData->fRuleStatusTable[idx];
				1129
				1130	return tagVal;
				1131	}
				1132
				1133
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1134	int32_t RuleBasedBreakIterator::getRuleStatusVec(
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1135	int32_t *fillInVec, int32_t capacity, UErrorCode &status) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1136	if (U_FAILURE(status)) {
				1137	return 0;
				1138	}
				1139
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1140	int32_t numVals = fData->fRuleStatusTable[fRuleStatusIndex];
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1141	int32_t numValsToCopy = numVals;
				1142	if (numVals > capacity) {
				1143	status = U_BUFFER_OVERFLOW_ERROR;
				1144	numValsToCopy = capacity;
				1145	}
				1146	int i;
				1147	for (i=0; i<numValsToCopy; i++) {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1148	fillInVec[i] = fData->fRuleStatusTable[fRuleStatusIndex + i + 1];
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1149	}
				1150	return numVals;
				1151	}
				1152
				1153
				1154
				1155	//-------------------------------------------------------------------------------
				1156	//
				1157	// getBinaryRules Access to the compiled form of the rules,
				1158	// for use by build system tools that save the data
				1159	// for standard iterator types.
				1160	//
				1161	//-------------------------------------------------------------------------------
				1162	const uint8_t *RuleBasedBreakIterator::getBinaryRules(uint32_t &length) {
				1163	const uint8_t *retPtr = NULL;
				1164	length = 0;
				1165
				1166	if (fData != NULL) {
				1167	retPtr = (const uint8_t *)fData->fHeader;
				1168	length = fData->fHeader->fLength;
				1169	}
				1170	return retPtr;
				1171	}
				1172
				1173
				1174	BreakIterator * RuleBasedBreakIterator::createBufferClone(void * /stackBuffer/,
				1175	int32_t &bufferSize,
				1176	UErrorCode &status)
				1177	{
				1178	if (U_FAILURE(status)){
				1179	return NULL;
				1180	}
				1181
				1182	if (bufferSize == 0) {
				1183	bufferSize = 1; // preflighting for deprecated functionality
				1184	return NULL;
				1185	}
				1186
				1187	BreakIterator *clonedBI = clone();
				1188	if (clonedBI == NULL) {
				1189	status = U_MEMORY_ALLOCATION_ERROR;
				1190	} else {
				1191	status = U_SAFECLONE_ALLOCATED_WARNING;
				1192	}
				1193	return (RuleBasedBreakIterator *)clonedBI;
				1194	}
				1195
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1196	U_NAMESPACE_END
				1197
				1198
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1199	static icu::UStack *gLanguageBreakFactories = nullptr;
				1200	static const icu::UnicodeString *gEmptyString = nullptr;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1201	static icu::UInitOnce gLanguageBreakFactoriesInitOnce = U_INITONCE_INITIALIZER;
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1202	static icu::UInitOnce gRBBIInitOnce = U_INITONCE_INITIALIZER;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1203
				1204	/**
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1205	* Release all static memory held by breakiterator.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1206	*/
				1207	U_CDECL_BEGIN
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1208	static UBool U_CALLCONV rbbi_cleanup(void) {
				1209	delete gLanguageBreakFactories;
				1210	gLanguageBreakFactories = nullptr;
				1211	delete gEmptyString;
				1212	gEmptyString = nullptr;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1213	gLanguageBreakFactoriesInitOnce.reset();
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1214	gRBBIInitOnce.reset();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1215	return TRUE;
				1216	}
				1217	U_CDECL_END
				1218
				1219	U_CDECL_BEGIN
				1220	static void U_CALLCONV _deleteFactory(void *obj) {
				1221	delete (icu::LanguageBreakFactory *) obj;
				1222	}
				1223	U_CDECL_END
				1224	U_NAMESPACE_BEGIN
				1225
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1226	static void U_CALLCONV rbbiInit() {
				1227	gEmptyString = new UnicodeString();
				1228	ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup);
				1229	}
				1230
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1231	static void U_CALLCONV initLanguageFactories() {
				1232	UErrorCode status = U_ZERO_ERROR;
				1233	U_ASSERT(gLanguageBreakFactories == NULL);
				1234	gLanguageBreakFactories = new UStack(_deleteFactory, NULL, status);
				1235	if (gLanguageBreakFactories != NULL && U_SUCCESS(status)) {
				1236	ICULanguageBreakFactory *builtIn = new ICULanguageBreakFactory(status);
				1237	gLanguageBreakFactories->push(builtIn, status);
				1238	#ifdef U_LOCAL_SERVICE_HOOK
				1239	LanguageBreakFactory extra = (LanguageBreakFactory )uprv_svc_hook("languageBreakFactory", &status);
				1240	if (extra != NULL) {
				1241	gLanguageBreakFactories->push(extra, status);
				1242	}
				1243	#endif
				1244	}
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1245	ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1246	}
				1247
				1248
				1249	static const LanguageBreakEngine*
				1250	getLanguageBreakEngineFromFactory(UChar32 c, int32_t breakType)
				1251	{
				1252	umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories);
				1253	if (gLanguageBreakFactories == NULL) {
				1254	return NULL;
				1255	}
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1256
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1257	int32_t i = gLanguageBreakFactories->size();
				1258	const LanguageBreakEngine *lbe = NULL;
				1259	while (--i >= 0) {
				1260	LanguageBreakFactory factory = (LanguageBreakFactory )(gLanguageBreakFactories->elementAt(i));
				1261	lbe = factory->getEngineFor(c, breakType);
				1262	if (lbe != NULL) {
				1263	break;
				1264	}
				1265	}
				1266	return lbe;
				1267	}
				1268
				1269
				1270	//-------------------------------------------------------------------------------
				1271	//
				1272	// getLanguageBreakEngine Find an appropriate LanguageBreakEngine for the
				1273	// the character c.
				1274	//
				1275	//-------------------------------------------------------------------------------
				1276	const LanguageBreakEngine *
				1277	RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
				1278	const LanguageBreakEngine *lbe = NULL;
				1279	UErrorCode status = U_ZERO_ERROR;
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1280
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1281	if (fLanguageBreakEngines == NULL) {
				1282	fLanguageBreakEngines = new UStack(status);
				1283	if (fLanguageBreakEngines == NULL \|\| U_FAILURE(status)) {
				1284	delete fLanguageBreakEngines;
				1285	fLanguageBreakEngines = 0;
				1286	return NULL;
				1287	}
				1288	}
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1289
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1290	int32_t i = fLanguageBreakEngines->size();
				1291	while (--i >= 0) {
				1292	lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i));
				1293	if (lbe->handles(c, fBreakType)) {
				1294	return lbe;
				1295	}
				1296	}
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1297
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1298	// No existing dictionary took the character. See if a factory wants to
				1299	// give us a new LanguageBreakEngine for this character.
				1300	lbe = getLanguageBreakEngineFromFactory(c, fBreakType);
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1301
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1302	// If we got one, use it and push it on our stack.
				1303	if (lbe != NULL) {
				1304	fLanguageBreakEngines->push((void *)lbe, status);
				1305	// Even if we can't remember it, we can keep looking it up, so
				1306	// return it even if the push fails.
				1307	return lbe;
				1308	}
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1309
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1310	// No engine is forthcoming for this character. Add it to the
				1311	// reject set. Create the reject break engine if needed.
				1312	if (fUnhandledBreakEngine == NULL) {
				1313	fUnhandledBreakEngine = new UnhandledEngine(status);
				1314	if (U_SUCCESS(status) && fUnhandledBreakEngine == NULL) {
				1315	status = U_MEMORY_ALLOCATION_ERROR;
				1316	}
				1317	// Put it last so that scripts for which we have an engine get tried
				1318	// first.
				1319	fLanguageBreakEngines->insertElementAt(fUnhandledBreakEngine, 0, status);
				1320	// If we can't insert it, or creation failed, get rid of it
				1321	if (U_FAILURE(status)) {
				1322	delete fUnhandledBreakEngine;
				1323	fUnhandledBreakEngine = 0;
				1324	return NULL;
				1325	}
				1326	}
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1327
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1328	// Tell the reject engine about the character; at its discretion, it may
				1329	// add more than just the one character.
				1330	fUnhandledBreakEngine->handleCharacter(c, fBreakType);
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1331
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1332	return fUnhandledBreakEngine;
				1333	}
				1334
				1335
				1336
				1337	/*int32_t RuleBasedBreakIterator::getBreakType() const {
				1338	return fBreakType;
				1339	}*/
				1340
				1341	void RuleBasedBreakIterator::setBreakType(int32_t type) {
				1342	fBreakType = type;
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	1343	}
				1344
				1345	void RuleBasedBreakIterator::dumpCache() {
				1346	fBreakCache->dumpCache();
				1347	}
				1348
				1349	/**
				1350	* Returns the description used to create this iterator
				1351	*/
				1352
				1353	const UnicodeString&
				1354	RuleBasedBreakIterator::getRules() const {
				1355	if (fData != NULL) {
				1356	return fData->getRuleSourceString();
				1357	} else {
				1358	umtx_initOnce(gRBBIInitOnce, &rbbiInit);
				1359	return *gEmptyString;
				1360	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1361	}
				1362
				1363	U_NAMESPACE_END
				1364
				1365	#endif /* #if !UCONFIG_NO_BREAK_ITERATION */