Movatterモバイル変換


[0]ホーム

URL:


ICU 78.1  78.1
rbbi.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ***************************************************************************
5 * Copyright (C) 1999-2016 International Business Machines Corporation *
6 * and others. All rights reserved. *
7 ***************************************************************************
8 
9 **********************************************************************
10 * Date Name Description
11 * 10/22/99 alan Creation.
12 * 11/11/99 rgillam Complete port from Java.
13 **********************************************************************
14 */
15 
16 #ifndef RBBI_H
17 #define RBBI_H
18 
19 #include "unicode/utypes.h"
20 
21 #if U_SHOW_CPLUSPLUS_API
22 
28 #if !UCONFIG_NO_BREAK_ITERATION
29 
30 #include "unicode/brkiter.h"
31 #include "unicode/udata.h"
32 #include "unicode/parseerr.h"
33 #include "unicode/schriter.h"
34 
35 structUCPTrie;
36 
37 U_NAMESPACE_BEGIN
38 
40 classLanguageBreakEngine;
41 structRBBIDataHeader;
42 classRBBIDataWrapper;
43 classUnhandledEngine;
44 classUStack;
45 
46 
47 #ifndef U_HIDE_INTERNAL_API
60 classExternalBreakEngine :publicUObject {
61 public:
66 virtual~ExternalBreakEngine() {}
67 
77 virtualboolisFor(UChar32 c,constchar* locale)const = 0;
78 
87 virtualboolhandles(UChar32 c)const = 0;
88 
102 virtual int32_tfillBreaks(UText* text, int32_t start, int32_t end,
103  int32_t* foundBreaks, int32_t foundBreaksCapacity,
104 UErrorCode& status)const = 0;
105 };
106 #endif/* U_HIDE_INTERNAL_API */
107 
108 
120 classU_COMMON_APIRuleBasedBreakIterator/*final*/ :publicBreakIterator {
121 
122 private:
126 UText fText =UTEXT_INITIALIZER;
127 
128 #ifndef U_HIDE_INTERNAL_API
129 public:
130 #endif/* U_HIDE_INTERNAL_API */
136  RBBIDataWrapper *fData =nullptr;
137 
138 private:
143 UErrorCode fErrorCode =U_ZERO_ERROR;
144 
149  int32_t fPosition = 0;
150 
154  int32_t fRuleStatusIndex = 0;
155 
159 classBreakCache;
160  BreakCache *fBreakCache =nullptr;
161 
166 classDictionaryCache;
167  DictionaryCache *fDictionaryCache =nullptr;
168 
175  UStack *fLanguageBreakEngines =nullptr;
176 
183  UnhandledEngine *fUnhandledBreakEngine =nullptr;
184 
189  uint32_t fDictionaryCharCount = 0;
190 
196 CharacterIterator *fCharIter = &fSCharIter;
197 
203 UCharCharacterIterator fSCharIter {u"", 0};
204 
208 bool fDone =false;
209 
213  int32_t *fLookAheadMatches =nullptr;
214 
218 UBool fIsPhraseBreaking =false;
219 
220 //=======================================================================
221 // constructors
222 //=======================================================================
223 
233 RuleBasedBreakIterator(RBBIDataHeader* data,UErrorCode &status);
234 
247 RuleBasedBreakIterator(UDataMemory* image,UBool isPhraseBreaking,UErrorCode &status);
248 
249 friendclassRBBIRuleBuilder;
250 friendclassBreakIterator;
251 
257 RuleBasedBreakIterator(UErrorCode *status);
258 
259 public:
260 
266 RuleBasedBreakIterator();
267 
274 RuleBasedBreakIterator(constRuleBasedBreakIterator& that);
275 
284 RuleBasedBreakIterator(constUnicodeString &rules,
285 UParseError &parseError,
286 UErrorCode &status);
287 
311 RuleBasedBreakIterator(const uint8_t *compiledRules,
312  uint32_t ruleLength,
313 UErrorCode &status);
314 
327 RuleBasedBreakIterator(UDataMemory* image,UErrorCode &status);
328 
333 virtual~RuleBasedBreakIterator();
334 
342 RuleBasedBreakIterator&operator=(constRuleBasedBreakIterator& that);
343 
352 virtualbooloperator==(constBreakIterator& that)const override;
353 
361 inlinebooloperator!=(constBreakIterator& that) const{
362 return !operator==(that);
363  }
364 
375 virtualRuleBasedBreakIterator*clone()const override;
376 
382 virtual int32_thashCode()const;
383 
389 virtualconstUnicodeString&getRules()const;
390 
391 //=======================================================================
392 // BreakIterator overrides
393 //=======================================================================
394 
419 virtualCharacterIterator&getText()const override;
420 
435 virtualUText *getUText(UText *fillIn,UErrorCode &status)const override;
436 
444 virtualvoidadoptText(CharacterIterator* newText)override;
445 
457 virtualvoidsetText(constUnicodeString& newText)override;
458 
472 virtualvoidsetText(UText *text,UErrorCode &status)override;
473 
479 virtual int32_tfirst()override;
480 
486 virtual int32_tlast()override;
487 
498 virtual int32_tnext(int32_t n)override;
499 
505 virtual int32_tnext()override;
506 
512 virtual int32_tprevious()override;
513 
521 virtual int32_tfollowing(int32_t offset)override;
522 
530 virtual int32_tpreceding(int32_t offset)override;
531 
540 virtualUBoolisBoundary(int32_t offset)override;
541 
550 virtual int32_tcurrent()const override;
551 
583 virtual int32_tgetRuleStatus()const override;
584 
608 virtual int32_tgetRuleStatusVec(int32_t *fillInVec, int32_t capacity,UErrorCode &status)override;
609 
621 virtualUClassIDgetDynamicClassID()const override;
622 
634 staticUClassID U_EXPORT2getStaticClassID();
635 
636 #ifndef U_FORCE_HIDE_DEPRECATED_API
663 virtualRuleBasedBreakIterator *createBufferClone(void *stackBuffer,
664  int32_t &BufferSize,
665 UErrorCode &status)override;
666 #endif// U_FORCE_HIDE_DEPRECATED_API
667 
685 virtualconst uint8_t *getBinaryRules(uint32_t &length);
686 
712 virtualRuleBasedBreakIterator &refreshInputText(UText *input,UErrorCode &status)override;
713 
714 
715 private:
716 //=======================================================================
717 // implementation
718 //=======================================================================
727  int32_t handleSafePrevious(int32_t fromPosition);
728 
739  int32_t handleNext();
740 
741 /*
742  * Templatized version of handleNext() and handleSafePrevious().
743  *
744  * There will be exactly four instantiations, two each for 8 and 16 bit tables,
745  * two each for 8 and 16 bit trie.
746  * Having separate instantiations for the table types keeps conditional tests of
747  * the table type out of the inner loops, at the expense of replicated code.
748  *
749  * The template parameter for the Trie access function is a value, not a type.
750  * Doing it this way, the compiler will inline the Trie function in the
751  * expanded functions. (Both the 8 and 16 bit access functions have the same type
752  * signature)
753  */
754 
755 typedef uint16_t (*PTrieFunc)(constUCPTrie *,UChar32);
756 
757 template<typename RowType, PTrieFunc trieFunc>
758  int32_t handleSafePrevious(int32_t fromPosition);
759 
760 template<typename RowType, PTrieFunc trieFunc>
761  int32_t handleNext();
762 
763 
770 const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c,constchar* locale);
771 
772 public:
773 #ifndef U_HIDE_INTERNAL_API
778 voiddumpCache();
779 
784 voiddumpTables();
785 #endif/* U_HIDE_INTERNAL_API */
786 
787 #ifndef U_HIDE_INTERNAL_API
797 staticvoid U_EXPORT2registerExternalBreakEngine(
798 ExternalBreakEngine* toAdopt,UErrorCode& status);
799 #endif/* U_HIDE_INTERNAL_API */
800 
801 };
802 
803 
804 U_NAMESPACE_END
805 
806 #endif/* #if !UCONFIG_NO_BREAK_ITERATION */
807 
808 #endif/* U_SHOW_CPLUSPLUS_API */
809 
810 #endif
brkiter.h
C++ API: Break Iterator.
icu::BreakIterator
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition:brkiter.h:106
icu::BreakIterator::operator==
virtual U_COMMON_API bool operator==(const BreakIterator &) const =0
Return true if another object is semantically equal to this one.
icu::CharacterIterator
Abstract class that defines an API for iteration on text objects.
Definition:chariter.h:361
icu::ExternalBreakEngine
The ExternalBreakEngine class define an abstract interface for the host environment to provide a low ...
Definition:rbbi.h:60
icu::ExternalBreakEngine::isFor
virtual bool isFor(UChar32 c, const char *locale) const =0
icu::ExternalBreakEngine::fillBreaks
virtual int32_t fillBreaks(UText *text, int32_t start, int32_t end, int32_t *foundBreaks, int32_t foundBreaksCapacity, UErrorCode &status) const =0
icu::ExternalBreakEngine::handles
virtual bool handles(UChar32 c) const =0
icu::ExternalBreakEngine::~ExternalBreakEngine
virtual ~ExternalBreakEngine()
destructor
Definition:rbbi.h:66
icu::RuleBasedBreakIterator
A subclass of BreakIterator whose behavior is specified using a list of rules.
Definition:rbbi.h:120
icu::RuleBasedBreakIterator::first
virtual int32_t first() override
Sets the current iteration position to the beginning of the text, position zero.
icu::RuleBasedBreakIterator::hashCode
virtual int32_t hashCode() const
Compute a hash code for this BreakIterator.
icu::RuleBasedBreakIterator::following
virtual int32_t following(int32_t offset) override
Sets the iterator to refer to the first boundary position following the specified position.
icu::RuleBasedBreakIterator::operator==
virtual bool operator==(const BreakIterator &that) const override
Equality operator.
icu::RuleBasedBreakIterator::current
virtual int32_t current() const override
Returns the current iteration position.
icu::RuleBasedBreakIterator::getStaticClassID
static UClassID getStaticClassID()
Returns the class ID for this class.
icu::RuleBasedBreakIterator::refreshInputText
virtual RuleBasedBreakIterator & refreshInputText(UText *input, UErrorCode &status) override
Set the subject text string upon which the break iterator is operating without changing any other asp...
icu::RuleBasedBreakIterator::operator=
RuleBasedBreakIterator & operator=(const RuleBasedBreakIterator &that)
Assignment operator.
icu::RuleBasedBreakIterator::getDynamicClassID
virtual UClassID getDynamicClassID() const override
Returns a unique class ID POLYMORPHICALLY.
icu::RuleBasedBreakIterator::dumpTables
void dumpTables()
Debugging function only.
icu::RuleBasedBreakIterator::next
virtual int32_t next() override
Advances the iterator to the next boundary position.
icu::RuleBasedBreakIterator::getRuleStatus
virtual int32_t getRuleStatus() const override
Return the status tag from the break rule that determined the boundary at the current iteration posit...
icu::RuleBasedBreakIterator::createBufferClone
virtual RuleBasedBreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status) override
Deprecated functionality.
icu::RuleBasedBreakIterator::isBoundary
virtual UBool isBoundary(int32_t offset) override
Returns true if the specified position is a boundary position.
icu::RuleBasedBreakIterator::operator!=
bool operator!=(const BreakIterator &that) const
Not-equal operator.
Definition:rbbi.h:361
icu::RuleBasedBreakIterator::clone
virtual RuleBasedBreakIterator * clone() const override
Returns a newly-constructed RuleBasedBreakIterator with the same behavior, and iterating over the sam...
icu::RuleBasedBreakIterator::dumpCache
void dumpCache()
Debugging function only.
icu::RuleBasedBreakIterator::registerExternalBreakEngine
static void registerExternalBreakEngine(ExternalBreakEngine *toAdopt, UErrorCode &status)
Register a new external break engine.
icu::RuleBasedBreakIterator::RuleBasedBreakIterator
RuleBasedBreakIterator()
Default constructor.
icu::RuleBasedBreakIterator::getRuleStatusVec
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) override
Get the status (tag) values from the break rule(s) that determined the boundary at the current iterat...
icu::RuleBasedBreakIterator::getBinaryRules
virtual const uint8_t * getBinaryRules(uint32_t &length)
Return the binary form of compiled break rules, which can then be used to create a new break iterator...
icu::RuleBasedBreakIterator::last
virtual int32_t last() override
Sets the current iteration position to the end of the text.
icu::RuleBasedBreakIterator::preceding
virtual int32_t preceding(int32_t offset) override
Sets the iterator to refer to the last boundary position before the specified position.
icu::RuleBasedBreakIterator::RuleBasedBreakIterator
RuleBasedBreakIterator(const RuleBasedBreakIterator &that)
Copy constructor.
icu::RuleBasedBreakIterator::adoptText
virtual void adoptText(CharacterIterator *newText) override
Set the iterator to analyze a new piece of text.
icu::RuleBasedBreakIterator::getUText
virtual UText * getUText(UText *fillIn, UErrorCode &status) const override
Get a UText for the text being analyzed.
icu::RuleBasedBreakIterator::previous
virtual int32_t previous() override
Moves the iterator backwards, to the last boundary preceding this one.
icu::RuleBasedBreakIterator::RuleBasedBreakIterator
RuleBasedBreakIterator(UDataMemory *image, UErrorCode &status)
This constructor uses the udata interface to create a BreakIterator whose internal tables live in a m...
icu::RuleBasedBreakIterator::next
virtual int32_t next(int32_t n) override
Advances the iterator either forward or backward the specified number of steps.
icu::RuleBasedBreakIterator::getText
virtual CharacterIterator & getText() const override
icu::RuleBasedBreakIterator::getRules
virtual const UnicodeString & getRules() const
Returns the description used to create this iterator.
icu::RuleBasedBreakIterator::setText
virtual void setText(UText *text, UErrorCode &status) override
Reset the break iterator to operate over the text represented by the UText.
icu::RuleBasedBreakIterator::~RuleBasedBreakIterator
virtual ~RuleBasedBreakIterator()
Destructor.
icu::RuleBasedBreakIterator::RuleBasedBreakIterator
RuleBasedBreakIterator(const UnicodeString &rules, UParseError &parseError, UErrorCode &status)
Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
icu::RuleBasedBreakIterator::RuleBasedBreakIterator
RuleBasedBreakIterator(const uint8_t *compiledRules, uint32_t ruleLength, UErrorCode &status)
Construct a RuleBasedBreakIterator from a set of precompiled binary rules.
icu::RuleBasedBreakIterator::setText
virtual void setText(const UnicodeString &newText) override
Set the iterator to analyze a new piece of text.
icu::UCharCharacterIterator
A concrete subclass of CharacterIterator that iterates over the characters (code units or code points...
Definition:uchriter.h:38
icu::UObject
UObject is the common ICU "boilerplate" class.
Definition:uobject.h:222
icu::UnicodeString
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition:unistr.h:303
parseerr.h
C API: Parse Error Information.
schriter.h
C++ API: String Character Iterator.
UCPTrie
Immutable Unicode code point trie structure.
Definition:ucptrie.h:59
UParseError
A UParseError struct is used to returned detailed information about parsing errors.
Definition:parseerr.h:58
UText
UText struct.
Definition:utext.h:1328
udata.h
C API: Data loading interface.
UDataMemory
struct UDataMemory UDataMemory
Forward declaration of the data memory type.
Definition:udata.h:161
UChar32
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition:umachine.h:449
UBool
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition:umachine.h:269
UClassID
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition:uobject.h:96
UTEXT_INITIALIZER
#define UTEXT_INITIALIZER
initializer to be used with local (stack) instances of a UText struct.
Definition:utext.h:1558
utypes.h
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition:utypes.h:509
U_ZERO_ERROR
@ U_ZERO_ERROR
No error, no warning.
Definition:utypes.h:544
U_COMMON_API
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition:utypes.h:315

Generated by doxygen 1.9.1
[8]ページ先頭

©2009-2025 Movatter.jp