Movatterモバイル変換


[0]ホーム

URL:


ICU 77.1  77.1
normalizer2.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2009-2013, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: normalizer2.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2009nov22
16 * created by: Markus W. Scherer
17 */
18 
19 #ifndef __NORMALIZER2_H__
20 #define __NORMALIZER2_H__
21 
27 #include "unicode/utypes.h"
28 
29 #if U_SHOW_CPLUSPLUS_API
30 
31 #if !UCONFIG_NO_NORMALIZATION
32 
33 #include "unicode/stringpiece.h"
34 #include "unicode/uniset.h"
35 #include "unicode/unistr.h"
36 #include "unicode/unorm2.h"
37 
38 U_NAMESPACE_BEGIN
39 
40 classByteSink;
41 
85 classU_COMMON_APINormalizer2 :publicUObject {
86 public:
91 ~Normalizer2();
92 
104 staticconstNormalizer2 *
105 getNFCInstance(UErrorCode &errorCode);
106 
118 staticconstNormalizer2 *
119 getNFDInstance(UErrorCode &errorCode);
120 
132 staticconstNormalizer2 *
133 getNFKCInstance(UErrorCode &errorCode);
134 
146 staticconstNormalizer2 *
147 getNFKDInstance(UErrorCode &errorCode);
148 
163 staticconstNormalizer2 *
164 getNFKCCasefoldInstance(UErrorCode &errorCode);
165 
180 staticconstNormalizer2 *
181 getNFKCSimpleCasefoldInstance(UErrorCode &errorCode);
182 
204 staticconstNormalizer2 *
205 getInstance(constchar *packageName,
206 constchar *name,
207 UNormalization2Mode mode,
208 UErrorCode &errorCode);
209 
220 UnicodeString
221 normalize(constUnicodeString &src,UErrorCode &errorCode) const{
222 UnicodeString result;
223  normalize(src, result, errorCode);
224 return result;
225  }
239 virtualUnicodeString &
240 normalize(constUnicodeString &src,
241 UnicodeString &dest,
242 UErrorCode &errorCode)const = 0;
243 
266 virtualvoid
267 normalizeUTF8(uint32_t options,StringPiece src,ByteSink &sink,
268 Edits *edits,UErrorCode &errorCode)const;
269 
284 virtualUnicodeString &
285 normalizeSecondAndAppend(UnicodeString &first,
286 constUnicodeString &second,
287 UErrorCode &errorCode)const = 0;
302 virtualUnicodeString &
303 append(UnicodeString &first,
304 constUnicodeString &second,
305 UErrorCode &errorCode)const = 0;
306 
320 virtualUBool
321 getDecomposition(UChar32 c,UnicodeString &decomposition)const = 0;
322 
347 virtualUBool
348 getRawDecomposition(UChar32 c,UnicodeString &decomposition)const;
349 
365 virtualUChar32
366 composePair(UChar32 a,UChar32 b)const;
367 
376 virtual uint8_t
377 getCombiningClass(UChar32 c)const;
378 
393 virtualUBool
394 isNormalized(constUnicodeString &s,UErrorCode &errorCode)const = 0;
414 virtualUBool
415 isNormalizedUTF8(StringPiece s,UErrorCode &errorCode)const;
416 
417 
433 virtualUNormalizationCheckResult
434 quickCheck(constUnicodeString &s,UErrorCode &errorCode)const = 0;
435 
458 virtual int32_t
459 spanQuickCheckYes(constUnicodeString &s,UErrorCode &errorCode)const = 0;
460 
474 virtualUBoolhasBoundaryBefore(UChar32 c)const = 0;
475 
490 virtualUBoolhasBoundaryAfter(UChar32 c)const = 0;
491 
505 virtualUBoolisInert(UChar32 c)const = 0;
506 };
507 
519 classU_COMMON_APIFilteredNormalizer2 :publicNormalizer2 {
520 public:
531 FilteredNormalizer2(constNormalizer2 &n2,constUnicodeSet &filterSet) :
532  norm2(n2), set(filterSet) {}
533 
538 ~FilteredNormalizer2();
539 
553 virtualUnicodeString &
554 normalize(constUnicodeString &src,
555 UnicodeString &dest,
556 UErrorCode &errorCode)const override;
557 
580 virtualvoid
581 normalizeUTF8(uint32_t options,StringPiece src,ByteSink &sink,
582 Edits *edits,UErrorCode &errorCode)const override;
583 
598 virtualUnicodeString &
599 normalizeSecondAndAppend(UnicodeString &first,
600 constUnicodeString &second,
601 UErrorCode &errorCode)const override;
616 virtualUnicodeString &
617 append(UnicodeString &first,
618 constUnicodeString &second,
619 UErrorCode &errorCode)const override;
620 
632 virtualUBool
633 getDecomposition(UChar32 c,UnicodeString &decomposition)const override;
634 
646 virtualUBool
647 getRawDecomposition(UChar32 c,UnicodeString &decomposition)const override;
648 
659 virtualUChar32
660 composePair(UChar32 a,UChar32 b)const override;
661 
670 virtual uint8_t
671 getCombiningClass(UChar32 c)const override;
672 
684 virtualUBool
685 isNormalized(constUnicodeString &s,UErrorCode &errorCode)const override;
705 virtualUBool
706 isNormalizedUTF8(StringPiece s,UErrorCode &errorCode)const override;
718 virtualUNormalizationCheckResult
719 quickCheck(constUnicodeString &s,UErrorCode &errorCode)const override;
731 virtual int32_t
732 spanQuickCheckYes(constUnicodeString &s,UErrorCode &errorCode)const override;
733 
742 virtualUBoolhasBoundaryBefore(UChar32 c)const override;
743 
752 virtualUBoolhasBoundaryAfter(UChar32 c)const override;
753 
761 virtualUBoolisInert(UChar32 c)const override;
762 private:
763 UnicodeString &
764  normalize(constUnicodeString &src,
765 UnicodeString &dest,
766 USetSpanCondition spanCondition,
767 UErrorCode &errorCode)const;
768 
769 void
770  normalizeUTF8(uint32_t options,constchar *src, int32_t length,
771 ByteSink &sink,Edits *edits,
772 USetSpanCondition spanCondition,
773 UErrorCode &errorCode)const;
774 
775 UnicodeString &
776  normalizeSecondAndAppend(UnicodeString &first,
777 constUnicodeString &second,
778 UBool doNormalize,
779 UErrorCode &errorCode)const;
780 
781 constNormalizer2 &norm2;
782 constUnicodeSet &set;
783 };
784 
785 U_NAMESPACE_END
786 
787 #endif// !UCONFIG_NO_NORMALIZATION
788 
789 #endif/* U_SHOW_CPLUSPLUS_API */
790 
791 #endif// __NORMALIZER2_H__
icu::ByteSink
A ByteSink can be filled with bytes.
Definition:bytestream.h:53
icu::Edits
Records lengths of string edits but not replacement text.
Definition:edits.h:80
icu::FilteredNormalizer2
Normalization filtered by a UnicodeSet.
Definition:normalizer2.h:519
icu::FilteredNormalizer2::normalize
virtual UnicodeString & normalize(const UnicodeString &src, UnicodeString &dest, UErrorCode &errorCode) const override
Writes the normalized form of the source string to the destination string (replacing its contents) an...
icu::FilteredNormalizer2::isNormalizedUTF8
virtual UBool isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const override
Tests if the UTF-8 string is normalized.
icu::FilteredNormalizer2::~FilteredNormalizer2
~FilteredNormalizer2()
Destructor.
icu::FilteredNormalizer2::normalizeSecondAndAppend
virtual UnicodeString & normalizeSecondAndAppend(UnicodeString &first, const UnicodeString &second, UErrorCode &errorCode) const override
Appends the normalized form of the second string to the first string (merging them at the boundary) a...
icu::FilteredNormalizer2::quickCheck
virtual UNormalizationCheckResult quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override
Tests if the string is normalized.
icu::FilteredNormalizer2::isInert
virtual UBool isInert(UChar32 c) const override
Tests if the character is normalization-inert.
icu::FilteredNormalizer2::composePair
virtual UChar32 composePair(UChar32 a, UChar32 b) const override
Performs pairwise composition of a & b and returns the composite if there is one.
icu::FilteredNormalizer2::getDecomposition
virtual UBool getDecomposition(UChar32 c, UnicodeString &decomposition) const override
Gets the decomposition mapping of c.
icu::FilteredNormalizer2::isNormalized
virtual UBool isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override
Tests if the string is normalized.
icu::FilteredNormalizer2::getRawDecomposition
virtual UBool getRawDecomposition(UChar32 c, UnicodeString &decomposition) const override
Gets the raw decomposition mapping of c.
icu::FilteredNormalizer2::getCombiningClass
virtual uint8_t getCombiningClass(UChar32 c) const override
Gets the combining class of c.
icu::FilteredNormalizer2::normalizeUTF8
virtual void normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, Edits *edits, UErrorCode &errorCode) const override
Normalizes a UTF-8 string and optionally records how source substrings relate to changed and unchange...
icu::FilteredNormalizer2::hasBoundaryBefore
virtual UBool hasBoundaryBefore(UChar32 c) const override
Tests if the character always has a normalization boundary before it, regardless of context.
icu::FilteredNormalizer2::append
virtual UnicodeString & append(UnicodeString &first, const UnicodeString &second, UErrorCode &errorCode) const override
Appends the second string to the first string (merging them at the boundary) and returns the first st...
icu::FilteredNormalizer2::FilteredNormalizer2
FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet)
Constructs a filtered normalizer wrapping any Normalizer2 instance and a filter set.
Definition:normalizer2.h:531
icu::FilteredNormalizer2::hasBoundaryAfter
virtual UBool hasBoundaryAfter(UChar32 c) const override
Tests if the character always has a normalization boundary after it, regardless of context.
icu::FilteredNormalizer2::spanQuickCheckYes
virtual int32_t spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const override
Returns the end of the normalized substring of the input string.
icu::Normalizer2
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition:normalizer2.h:85
icu::Normalizer2::getDecomposition
virtual UBool getDecomposition(UChar32 c, UnicodeString &decomposition) const =0
Gets the decomposition mapping of c.
icu::Normalizer2::spanQuickCheckYes
virtual int32_t spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const =0
Returns the end of the normalized substring of the input string.
icu::Normalizer2::isNormalized
virtual UBool isNormalized(const UnicodeString &s, UErrorCode &errorCode) const =0
Tests if the string is normalized.
icu::Normalizer2::hasBoundaryBefore
virtual UBool hasBoundaryBefore(UChar32 c) const =0
Tests if the character always has a normalization boundary before it, regardless of context.
icu::Normalizer2::getNFKCInstance
static const Normalizer2 * getNFKCInstance(UErrorCode &errorCode)
Returns a Normalizer2 instance for Unicode NFKC normalization.
icu::Normalizer2::~Normalizer2
~Normalizer2()
Destructor.
icu::Normalizer2::composePair
virtual UChar32 composePair(UChar32 a, UChar32 b) const
Performs pairwise composition of a & b and returns the composite if there is one.
icu::Normalizer2::getNFCInstance
static const Normalizer2 * getNFCInstance(UErrorCode &errorCode)
Returns a Normalizer2 instance for Unicode NFC normalization.
icu::Normalizer2::normalize
virtual UnicodeString & normalize(const UnicodeString &src, UnicodeString &dest, UErrorCode &errorCode) const =0
Writes the normalized form of the source string to the destination string (replacing its contents) an...
icu::Normalizer2::getInstance
static const Normalizer2 * getInstance(const char *packageName, const char *name, UNormalization2Mode mode, UErrorCode &errorCode)
Returns a Normalizer2 instance which uses the specified data file (packageName/name similar to ucnv_o...
icu::Normalizer2::append
virtual UnicodeString & append(UnicodeString &first, const UnicodeString &second, UErrorCode &errorCode) const =0
Appends the second string to the first string (merging them at the boundary) and returns the first st...
icu::Normalizer2::normalizeSecondAndAppend
virtual UnicodeString & normalizeSecondAndAppend(UnicodeString &first, const UnicodeString &second, UErrorCode &errorCode) const =0
Appends the normalized form of the second string to the first string (merging them at the boundary) a...
icu::Normalizer2::normalize
UnicodeString normalize(const UnicodeString &src, UErrorCode &errorCode) const
Returns the normalized form of the source string.
Definition:normalizer2.h:221
icu::Normalizer2::quickCheck
virtual UNormalizationCheckResult quickCheck(const UnicodeString &s, UErrorCode &errorCode) const =0
Tests if the string is normalized.
icu::Normalizer2::hasBoundaryAfter
virtual UBool hasBoundaryAfter(UChar32 c) const =0
Tests if the character always has a normalization boundary after it, regardless of context.
icu::Normalizer2::getCombiningClass
virtual uint8_t getCombiningClass(UChar32 c) const
Gets the combining class of c.
icu::Normalizer2::getNFKCSimpleCasefoldInstance
static const Normalizer2 * getNFKCSimpleCasefoldInstance(UErrorCode &errorCode)
Returns a Normalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization which is equi...
icu::Normalizer2::isInert
virtual UBool isInert(UChar32 c) const =0
Tests if the character is normalization-inert.
icu::Normalizer2::getNFKCCasefoldInstance
static const Normalizer2 * getNFKCCasefoldInstance(UErrorCode &errorCode)
Returns a Normalizer2 instance for Unicode toNFKC_Casefold() normalization which is equivalent to app...
icu::Normalizer2::normalizeUTF8
virtual void normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, Edits *edits, UErrorCode &errorCode) const
Normalizes a UTF-8 string and optionally records how source substrings relate to changed and unchange...
icu::Normalizer2::isNormalizedUTF8
virtual UBool isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const
Tests if the UTF-8 string is normalized.
icu::Normalizer2::getRawDecomposition
virtual UBool getRawDecomposition(UChar32 c, UnicodeString &decomposition) const
Gets the raw decomposition mapping of c.
icu::Normalizer2::getNFDInstance
static const Normalizer2 * getNFDInstance(UErrorCode &errorCode)
Returns a Normalizer2 instance for Unicode NFD normalization.
icu::Normalizer2::getNFKDInstance
static const Normalizer2 * getNFKDInstance(UErrorCode &errorCode)
Returns a Normalizer2 instance for Unicode NFKD normalization.
icu::StringPiece
A string-like object that points to a sized piece of memory.
Definition:stringpiece.h:61
icu::UObject
UObject is the common ICU "boilerplate" class.
Definition:uobject.h:223
icu::UnicodeSet
A mutable set of Unicode characters and multicharacter strings.
Definition:uniset.h:285
icu::UnicodeString
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition:unistr.h:296
stringpiece.h
C++ API: StringPiece: Read-only byte string wrapper class.
UChar32
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition:umachine.h:427
UBool
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition:umachine.h:247
uniset.h
C++ API: Unicode Set.
unistr.h
C++ API: Unicode String.
unorm2.h
C API: New API for Unicode Normalization.
UNormalizationCheckResult
UNormalizationCheckResult
Result values for normalization quick check functions.
Definition:unorm2.h:97
UNormalization2Mode
UNormalization2Mode
Constants for normalization modes.
Definition:unorm2.h:48
USetSpanCondition
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition:uset.h:186
utypes.h
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition:utypes.h:430
U_COMMON_API
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition:utypes.h:315

Generated by doxygen 1.9.1
[8]ページ先頭

©2009-2025 Movatter.jp