1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 4 *************************************************************************** 5 * Copyright (C) 1999-2016 International Business Machines Corporation * 6 * and others. All rights reserved. * 7 *************************************************************************** 9 ********************************************************************** 10 * Date Name Description 11 * 10/22/99 alan Creation. 12 * 11/11/99 rgillam Complete port from Java. 13 ********************************************************************** 21 #if U_SHOW_CPLUSPLUS_API 28 #if !UCONFIG_NO_BREAK_ITERATION 40 classLanguageBreakEngine;
47 #ifndef U_HIDE_INTERNAL_API 103 int32_t* foundBreaks, int32_t foundBreaksCapacity,
106 #endif/* U_HIDE_INTERNAL_API */ 128 #ifndef U_HIDE_INTERNAL_API 130 #endif/* U_HIDE_INTERNAL_API */ 136 RBBIDataWrapper *fData =
nullptr;
149 int32_t fPosition = 0;
154 int32_t fRuleStatusIndex = 0;
160 BreakCache *fBreakCache =
nullptr;
166 classDictionaryCache;
167 DictionaryCache *fDictionaryCache =
nullptr;
175 UStack *fLanguageBreakEngines =
nullptr;
183 UnhandledEngine *fUnhandledBreakEngine =
nullptr;
189 uint32_t fDictionaryCharCount = 0;
213 int32_t *fLookAheadMatches =
nullptr;
218 UBool fIsPhraseBreaking =
false;
220 //======================================================================= 222 //======================================================================= 249 friendclassRBBIRuleBuilder;
391 //======================================================================= 392 // BreakIterator overrides 393 //======================================================================= 498 virtual int32_t
next(int32_t n)
override;
636 #ifndef U_FORCE_HIDE_DEPRECATED_API 666 #endif// U_FORCE_HIDE_DEPRECATED_API 716 //======================================================================= 718 //======================================================================= 727 int32_t handleSafePrevious(int32_t fromPosition);
739 int32_t handleNext();
742 * Templatized version of handleNext() and handleSafePrevious(). 744 * There will be exactly four instantiations, two each for 8 and 16 bit tables, 745 * two each for 8 and 16 bit trie. 746 * Having separate instantiations for the table types keeps conditional tests of 747 * the table type out of the inner loops, at the expense of replicated code. 749 * The template parameter for the Trie access function is a value, not a type. 750 * Doing it this way, the compiler will inline the Trie function in the 751 * expanded functions. (Both the 8 and 16 bit access functions have the same type 757 template<
typename RowType, PTrieFunc trieFunc>
758 int32_t handleSafePrevious(int32_t fromPosition);
760 template<
typename RowType, PTrieFunc trieFunc>
761 int32_t handleNext();
770 const LanguageBreakEngine *getLanguageBreakEngine(
UChar32 c,
constchar* locale);
773 #ifndef U_HIDE_INTERNAL_API 785 #endif/* U_HIDE_INTERNAL_API */ 787 #ifndef U_HIDE_INTERNAL_API 799 #endif/* U_HIDE_INTERNAL_API */ 806 #endif/* #if !UCONFIG_NO_BREAK_ITERATION */ 808 #endif/* U_SHOW_CPLUSPLUS_API */ The BreakIterator class implements methods for finding the location of boundaries in text.
virtual U_COMMON_API bool operator==(const BreakIterator &) const =0
Return true if another object is semantically equal to this one.
Abstract class that defines an API for iteration on text objects.
The ExternalBreakEngine class define an abstract interface for the host environment to provide a low ...
virtual bool isFor(UChar32 c, const char *locale) const =0
virtual int32_t fillBreaks(UText *text, int32_t start, int32_t end, int32_t *foundBreaks, int32_t foundBreaksCapacity, UErrorCode &status) const =0
virtual bool handles(UChar32 c) const =0
virtual ~ExternalBreakEngine()
destructor
A subclass of BreakIterator whose behavior is specified using a list of rules.
virtual int32_t first() override
Sets the current iteration position to the beginning of the text, position zero.
virtual int32_t hashCode() const
Compute a hash code for this BreakIterator.
virtual int32_t following(int32_t offset) override
Sets the iterator to refer to the first boundary position following the specified position.
virtual bool operator==(const BreakIterator &that) const override
Equality operator.
virtual int32_t current() const override
Returns the current iteration position.
static UClassID getStaticClassID()
Returns the class ID for this class.
virtual RuleBasedBreakIterator & refreshInputText(UText *input, UErrorCode &status) override
Set the subject text string upon which the break iterator is operating without changing any other asp...
RuleBasedBreakIterator & operator=(const RuleBasedBreakIterator &that)
Assignment operator.
virtual UClassID getDynamicClassID() const override
Returns a unique class ID POLYMORPHICALLY.
void dumpTables()
Debugging function only.
virtual int32_t next() override
Advances the iterator to the next boundary position.
virtual int32_t getRuleStatus() const override
Return the status tag from the break rule that determined the boundary at the current iteration posit...
virtual RuleBasedBreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status) override
Deprecated functionality.
virtual UBool isBoundary(int32_t offset) override
Returns true if the specified position is a boundary position.
bool operator!=(const BreakIterator &that) const
Not-equal operator.
virtual RuleBasedBreakIterator * clone() const override
Returns a newly-constructed RuleBasedBreakIterator with the same behavior, and iterating over the sam...
void dumpCache()
Debugging function only.
static void registerExternalBreakEngine(ExternalBreakEngine *toAdopt, UErrorCode &status)
Register a new external break engine.
RuleBasedBreakIterator()
Default constructor.
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) override
Get the status (tag) values from the break rule(s) that determined the boundary at the current iterat...
virtual const uint8_t * getBinaryRules(uint32_t &length)
Return the binary form of compiled break rules, which can then be used to create a new break iterator...
virtual int32_t last() override
Sets the current iteration position to the end of the text.
virtual int32_t preceding(int32_t offset) override
Sets the iterator to refer to the last boundary position before the specified position.
RuleBasedBreakIterator(const RuleBasedBreakIterator &that)
Copy constructor.
virtual void adoptText(CharacterIterator *newText) override
Set the iterator to analyze a new piece of text.
virtual UText * getUText(UText *fillIn, UErrorCode &status) const override
Get a UText for the text being analyzed.
virtual int32_t previous() override
Moves the iterator backwards, to the last boundary preceding this one.
RuleBasedBreakIterator(UDataMemory *image, UErrorCode &status)
This constructor uses the udata interface to create a BreakIterator whose internal tables live in a m...
virtual int32_t next(int32_t n) override
Advances the iterator either forward or backward the specified number of steps.
virtual CharacterIterator & getText() const override
virtual const UnicodeString & getRules() const
Returns the description used to create this iterator.
virtual void setText(UText *text, UErrorCode &status) override
Reset the break iterator to operate over the text represented by the UText.
virtual ~RuleBasedBreakIterator()
Destructor.
RuleBasedBreakIterator(const UnicodeString &rules, UParseError &parseError, UErrorCode &status)
Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
RuleBasedBreakIterator(const uint8_t *compiledRules, uint32_t ruleLength, UErrorCode &status)
Construct a RuleBasedBreakIterator from a set of precompiled binary rules.
virtual void setText(const UnicodeString &newText) override
Set the iterator to analyze a new piece of text.
A concrete subclass of CharacterIterator that iterates over the characters (code units or code points...
UObject is the common ICU "boilerplate" class.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
C API: Parse Error Information.
C++ API: String Character Iterator.
Immutable Unicode code point trie structure.
A UParseError struct is used to returned detailed information about parsing errors.
C API: Data loading interface.
struct UDataMemory UDataMemory
Forward declaration of the data memory type.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
int8_t UBool
The ICU boolean type, a signed-byte integer.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
#define UTEXT_INITIALIZER
initializer to be used with local (stack) instances of a UText struct.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
@ U_ZERO_ERROR
No error, no warning.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.