Movatterモバイル変換

A mutable set of Unicode characters and multicharacter strings.More...

#include <uniset.h>

Inheritance diagram for icu::UnicodeSet:

Public Types
enum	{MIN_VALUE = 0,MAX_VALUE = 0x10ffff }

enum	ESerialization {kSerialized }

Public Member Functions
UBool	isBogus () const
	Determine if this object contains a valid set.More...

void	setToBogus ()
	Make thisUnicodeSet object invalid.More...

	UnicodeSet ()
	Constructs an empty set.More...

	UnicodeSet (UChar32 start,UChar32 end)
	Constructs a set containing the given range.More...

	UnicodeSet (const uint16_t buffer[], int32_t bufferLen,ESerialization serialization,UErrorCode &status)
	Constructs a set from the output ofserialize().More...

	UnicodeSet (constUnicodeString &pattern,UErrorCode &status)
	Constructs a set from the given pattern.More...

	UnicodeSet (constUnicodeString &pattern, uint32_t options, constSymbolTable *symbols,UErrorCode &status)
	Constructs a set from the given pattern.More...

	UnicodeSet (constUnicodeString &pattern,ParsePosition &pos, uint32_t options, constSymbolTable *symbols,UErrorCode &status)
	Constructs a set from the given pattern.More...

	UnicodeSet (constUnicodeSet &o)
	Constructs a set that is identical to the givenUnicodeSet.More...

virtual	~UnicodeSet ()
	Destructs the set.More...

UnicodeSet &	operator= (constUnicodeSet &o)
	Assigns this object to be a copy of another.More...

virtual bool	operator== (constUnicodeSet &o) const
	Compares the specified object with this set for equality.More...

bool	operator!= (constUnicodeSet &o) const
	Compares the specified object with this set for equality.More...

virtualUnicodeSet *	clone () const override
	Returns a copy of this object.More...

virtual int32_t	hashCode () const
	Returns the hash code value for this set.More...

USet *	toUSet ()
	Produce a USet * pointer for thisUnicodeSet.More...

constUSet *	toUSet () const
	Produce a const USet * pointer for thisUnicodeSet.More...

UBool	isFrozen () const
	Determines whether the set has been frozen (made immutable) or not.More...

UnicodeSet *	freeze ()
	Freeze the set (make it immutable).More...

UnicodeSet *	cloneAsThawed () const
	Clone the set and make the clone mutable.More...

UnicodeSet &	set (UChar32 start,UChar32 end)
	Make this object represent the range`start - end`.More...

UnicodeSet &	applyPattern (constUnicodeString &pattern,UErrorCode &status)
	Modifies this set to represent the set specified by the given pattern, ignoring Unicode Pattern_White_Space characters.More...

UnicodeSet &	applyPattern (constUnicodeString &pattern, uint32_t options, constSymbolTable *symbols,UErrorCode &status)
	Modifies this set to represent the set specified by the given pattern, optionally ignoring Unicode Pattern_White_Space characters.More...

UnicodeSet &	applyPattern (constUnicodeString &pattern,ParsePosition &pos, uint32_t options, constSymbolTable *symbols,UErrorCode &status)
	Parses the given pattern, starting at the given position.More...

virtualUnicodeString &	toPattern (UnicodeString &result,UBool escapeUnprintable=false) const override
	Returns a string representation of this set.More...

UnicodeSet &	applyIntPropertyValue (UProperty prop, int32_t value,UErrorCode &ec)
	Modifies this set to contain those code points which have the given value for the given binary or enumerated property, as returned by u_getIntPropertyValue.More...

UnicodeSet &	applyPropertyAlias (constUnicodeString &prop, constUnicodeString &value,UErrorCode &ec)
	Modifies this set to contain those code points which have the given value for the given property.More...

virtual int32_t	size () const
	Returns the number of elements in this set (its cardinality).More...

virtualUBool	isEmpty () const
	Returns`true` if this set contains no elements.More...

UBool	hasStrings () const

virtualUBool	contains (UChar32 c) const override
	Returns true if this set contains the given character.More...

virtualUBool	contains (UChar32 start,UChar32 end) const
	Returns true if this set contains every character of the given range.More...

UBool	contains (constUnicodeString &s) const
	Returns`true` if this set contains the given multicharacter string.More...

virtualUBool	containsAll (constUnicodeSet &c) const
	Returns true if this set contains all the characters and strings of the given set.More...

UBool	containsAll (constUnicodeString &s) const
	Returns true if this set contains all the characters of the given string.More...

UBool	containsNone (UChar32 start,UChar32 end) const
	Returns true if this set contains none of the characters of the given range.More...

UBool	containsNone (constUnicodeSet &c) const
	Returns true if this set contains none of the characters and strings of the given set.More...

UBool	containsNone (constUnicodeString &s) const
	Returns true if this set contains none of the characters of the given string.More...

UBool	containsSome (UChar32 start,UChar32 end) const
	Returns true if this set contains one or more of the characters in the given range.More...

UBool	containsSome (constUnicodeSet &s) const
	Returns true if this set contains one or more of the characters and strings of the given set.More...

UBool	containsSome (constUnicodeString &s) const
	Returns true if this set contains one or more of the characters of the given string.More...

int32_t	span (const char16_t *s, int32_t length,USetSpanCondition spanCondition) const
	Returns the length of the initial substring of the input string which consists only of characters and strings that are contained in this set (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), or only of characters and strings that are not contained in this set (USET_SPAN_NOT_CONTAINED).More...

int32_t	span (constUnicodeString &s, int32_t start,USetSpanCondition spanCondition) const
	Returns the end of the substring of the input string according to the USetSpanCondition.More...

int32_t	spanBack (const char16_t *s, int32_t length,USetSpanCondition spanCondition) const
	Returns the start of the trailing substring of the input string which consists only of characters and strings that are contained in this set (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), or only of characters and strings that are not contained in this set (USET_SPAN_NOT_CONTAINED).More...

int32_t	spanBack (constUnicodeString &s, int32_t limit,USetSpanCondition spanCondition) const
	Returns the start of the substring of the input string according to the USetSpanCondition.More...

int32_t	spanUTF8 (const char *s, int32_t length,USetSpanCondition spanCondition) const
	Returns the length of the initial substring of the input string which consists only of characters and strings that are contained in this set (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), or only of characters and strings that are not contained in this set (USET_SPAN_NOT_CONTAINED).More...

int32_t	spanBackUTF8 (const char *s, int32_t length,USetSpanCondition spanCondition) const
	Returns the start of the trailing substring of the input string which consists only of characters and strings that are contained in this set (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), or only of characters and strings that are not contained in this set (USET_SPAN_NOT_CONTAINED).More...

virtualUMatchDegree	matches (constReplaceable &text, int32_t &offset, int32_t limit,UBool incremental) override
	ImplementUnicodeMatcher::matches()More...

virtual void	addMatchSetTo (UnicodeSet &toUnionTo) const override
	Implementation ofUnicodeMatcher API.More...

int32_t	indexOf (UChar32 c) const
	Returns the index of the given character within this set, where the set is ordered by ascending code point.More...

UChar32	charAt (int32_t index) const
	Returns the character at the given index within this set, where the set is ordered by ascending code point.More...

U_HEADER_NESTED_NAMESPACE::USetCodePoints	codePoints () const
	Returns a C++ "range" for iterating over the code points of this set.More...

U_HEADER_NESTED_NAMESPACE::USetRanges	ranges () const
	Returns a C++ "range" for iterating over the code point ranges of this set.More...

U_HEADER_NESTED_NAMESPACE::USetStrings	strings () const
	Returns a C++ "range" for iterating over the empty and multi-character strings of this set.More...

U_HEADER_NESTED_NAMESPACE::USetElementIterator	begin () const
	Returns a C++ iterator for iterating over all of the elements of this set.More...

U_HEADER_NESTED_NAMESPACE::USetElementIterator	end () const

virtualUnicodeSet &	add (UChar32 start,UChar32 end)
	Adds the specified range to this set if it is not already present.More...

UnicodeSet &	add (UChar32 c)
	Adds the specified character to this set if it is not already present.More...

UnicodeSet &	add (constUnicodeString &s)
	Adds the specified multicharacter to this set if it is not already present.More...

UnicodeSet &	addAll (constUnicodeString &s)
	Adds each of the characters in this string to the set.More...

UnicodeSet &	retainAll (constUnicodeString &s)
	Retains EACH of the characters in this string.More...

UnicodeSet &	complementAll (constUnicodeString &s)
	Complement EACH of the characters in this string.More...

UnicodeSet &	removeAll (constUnicodeString &s)
	Remove EACH of the characters in this string.More...

virtualUnicodeSet &	retain (UChar32 start,UChar32 end)
	Retain only the elements in this set that are contained in the specified range.More...

UnicodeSet &	retain (UChar32 c)
	Retain the specified character from this set if it is present.More...

UnicodeSet &	retain (constUnicodeString &s)
	Retains only the specified string from this set if it is present.More...

virtualUnicodeSet &	remove (UChar32 start,UChar32 end)
	Removes the specified range from this set if it is present.More...

UnicodeSet &	remove (UChar32 c)
	Removes the specified character from this set if it is present.More...

UnicodeSet &	remove (constUnicodeString &s)
	Removes the specified string from this set if it is present.More...

virtualUnicodeSet &	complement ()
	This is equivalent to`complement(MIN_VALUE, MAX_VALUE)`.More...

virtualUnicodeSet &	complement (UChar32 start,UChar32 end)
	Complements the specified range in this set.More...

UnicodeSet &	complement (UChar32 c)
	Complements the specified character in this set.More...

UnicodeSet &	complement (constUnicodeString &s)
	Complement the specified string in this set.More...

virtualUnicodeSet &	addAll (constUnicodeSet &c)
	Adds all of the elements in the specified set to this set if they're not already present.More...

virtualUnicodeSet &	retainAll (constUnicodeSet &c)
	Retains only the elements in this set that are contained in the specified set.More...

virtualUnicodeSet &	removeAll (constUnicodeSet &c)
	Removes from this set all of its elements that are contained in the specified set.More...

virtualUnicodeSet &	complementAll (constUnicodeSet &c)
	Complements in this set all elements contained in the specified set.More...

virtualUnicodeSet &	clear ()
	Removes all of the elements from this set.More...

UnicodeSet &	closeOver (int32_t attribute)
	Close this set over the given attribute.More...

virtualUnicodeSet &	removeAllStrings ()
	Remove all strings from this set.More...

virtual int32_t	getRangeCount () const
	Iteration method that returns the number of ranges contained in this set.More...

virtualUChar32	getRangeStart (int32_t index) const
	Iteration method that returns the first character in the specified range of this set.More...

virtualUChar32	getRangeEnd (int32_t index) const
	Iteration method that returns the last character in the specified range of this set.More...

int32_t	serialize (uint16_t *dest, int32_t destCapacity,UErrorCode &ec) const
	Serializes this set into an array of 16-bit integers.More...

virtualUnicodeSet &	compact ()
	Reallocate this objects internal structures to take up the least possible space, without changing this object's value.More...

virtualUClassID	getDynamicClassID () const override
	ImplementUnicodeFunctor API.More...

Public Member Functions inherited fromicu::UnicodeFilter
virtual	~UnicodeFilter ()
	Destructor.More...

virtualUnicodeMatcher *	toMatcher () const override
	UnicodeFunctor API.More...

virtual void	setData (const TransliterationRuleData *) override
	UnicodeFunctor API.More...

Public Member Functions inherited fromicu::UnicodeFunctor
virtual	~UnicodeFunctor ()
	Destructor.More...

virtualUnicodeReplacer *	toReplacer () const
	Cast 'this' to a UnicodeReplacer* pointer and return the pointer, or null if this is not a UnicodeReplacer*.More...

Public Member Functions inherited fromicu::UObject
virtual	~UObject ()
	Destructor.More...

Public Member Functions inherited fromicu::UnicodeMatcher
virtual	~UnicodeMatcher ()
	Destructor.More...

Static Public Member Functions
staticUnicodeSet *	fromUSet (USet *uset)
	Get aUnicodeSet pointer from a USet.More...

static constUnicodeSet *	fromUSet (constUSet *uset)
	Get aUnicodeSet pointer from a const USet.More...

staticUBool	resemblesPattern (constUnicodeString &pattern, int32_t pos)
	Return true if the given position, in the given pattern, appears to be the start of aUnicodeSet pattern.More...

staticUnicodeSet *	createFrom (constUnicodeString &s)
	Makes a set from a multicharacter string.More...

staticUnicodeSet *	createFromAll (constUnicodeString &s)
	Makes a set from each of the characters in the string.More...

staticUClassID	getStaticClassID ()
	Return the class ID for this class.More...

Static Public Member Functions inherited fromicu::UnicodeFilter
staticUClassID	getStaticClassID ()
	ICU "poor man's RTTI", returns a UClassID for this class.More...

Static Public Member Functions inherited fromicu::UnicodeFunctor
staticUClassID	getStaticClassID ()
	Return the class ID for this class.More...

Friends
class	USetAccess

class	RBBIRuleScanner

class	UnicodeSetIterator

Detailed Description

A mutable set of Unicode characters and multicharacter strings.

Objects of this class representcharacter classes used in regular expressions. A character specifies a subset of Unicode code points. Legal code points are U+0000 to U+10FFFF, inclusive.

TheUnicodeSet class is not designed to be subclassed.

UnicodeSet supports two APIs. The first is theoperand API that allows the caller to modify the value of aUnicodeSet object. It conforms to Java 2'sjava.util.Set interface, althoughUnicodeSet does not actually implement that interface. All methods ofSet are supported, with the modification that they take a character range or single character instead of anObject, and they take aUnicodeSet instead of aCollection. The operand API may be thought of in terms of boolean logic: a boolean OR is implemented byadd, a boolean AND is implemented byretain, a boolean XOR is implemented bycomplement taking an argument, and a boolean NOT is implemented bycomplement with no argument. In terms of traditional set theory function names,add is a union,retain is an intersection,remove is an asymmetric difference, andcomplement with no argument is a set complement with respect to the superset rangeMIN_VALUE-MAX_VALUE

The second API is theapplyPattern()/toPattern() API from thejava.text.Format-derived classes. Unlike the methods that add characters, add categories, and control the logic of the set, the methodapplyPattern() sets all attributes of aUnicodeSet at once, based on a string pattern.

Pattern syntax

Patterns are accepted by the constructors and theapplyPattern() methods and returned by thetoPattern() method. These patterns follow a syntax similar to that employed by version 8 regular expression character classes. Here are some simple examples:

[] No characters
[a] The character 'a'
[ae] The characters 'a' and 'e'
[a-e] The characters 'a' through 'e' inclusive, in Unicode code point order
[\u4E01] The character U+4E01
[a{ab}{ac}] The character 'a' and the multicharacter strings "ab" and "ac"
[\p{Lu}] All characters in the general category Uppercase Letter

Any character may be preceded by a backslash in order to remove any special meaning. White space characters, as defined by UCharacter.isWhitespace(), are ignored, unless they are escaped.

Property patterns specify a set of characters having a certain property as defined by the Unicode standard. Both the POSIX-like "[:Lu:]" and the Perl-like syntax "\\p{Lu}" are recognized. For a complete list of supported property patterns, see the User's Guide forUnicodeSet athttps://unicode-org.github.io/icu/userguide/strings/unicodeset. Actual determination of property data is defined by the underlying Unicode database as implemented by UCharacter.

Patterns specify individual characters, ranges of characters, and Unicode property sets. When elements are concatenated, they specify their union. To complement a set, place a '^' immediately after the opening '['. Property patterns are inverted by modifying their delimiters; "[:^foo]" and "\\P{foo}". In any other location, '^' has no special meaning.

Since ICU 70, "[^...]", "[:^foo]", "\\P{foo}", and "[:binaryProperty=No:]" perform a “code point complement” (all code points minus the original set), removing all multicharacter strings, equivalent to.complement().removeAllStrings(). Thecomplement() API function continues to perform a symmetric difference with all code points and thus retains all multicharacter strings.

Ranges are indicated by placing two a '-' between two characters, as in "a-z". This specifies the range of all characters from the left to the right, in Unicode order. If the left character is greater than or equal to the right character it is a syntax error. If a '-' occurs as the first character after the opening '[' or '[^', or if it occurs as the last character before the closing ']', then it is taken as a literal. Thus "[a\-b]", "[-ab]", and "[ab-]" all indicate the same set of three characters, 'a', 'b', and '-'.

Sets may be intersected using the '&' operator or the asymmetric set difference may be taken using the '-' operator, for example, "[[:L:]&[\\u0000-\\u0FFF]]" indicates the set of all Unicode letters with values less than 4096. Operators ('&' and '|') have equal precedence and bind left-to-right. Thus "[[:L:]-[a-z]-[\\u0100-\\u01FF]]" is equivalent to "[[[:L:]-[a-z]]-[\\u0100-\\u01FF]]". This only really matters for difference; intersection is commutative.

`[a]`	The set containing 'a'
`[a-z]`	The set containing 'a' through 'z' and all letters in between, in Unicode order
`[^a-z]`	The set containing all characters but 'a' through 'z', that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF
`[[pat1][pat2]]`	The union of sets specified bypat1 andpat2
`[[pat1]&[pat2]]`	The intersection of sets specified bypat1 andpat2
`[[pat1]-[pat2]]`	The asymmetric difference of sets specified bypat1 andpat2
`[:Lu:] or \p{Lu}`	The set of characters having the specified Unicode property; in this case, Unicode uppercase letters
`[:^Lu:] or \P{Lu}`	The set of charactersnot having the given Unicode property

Formal syntax

pattern := ('[' '^'? item* ']') | property
item := char | (char '-' char) | pattern-expr
pattern-expr := pattern | pattern-expr pattern | pattern-expr op pattern
op := '&' | '-'
special := '[' | ']' | '-'
char := any character that is notspecial | ('\'any character) | ('\u' hex hex hex hex)
hex := '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f'
property := a Unicode property set pattern

Legend:
a := b a may be replaced byb
a? zero or one instance ofa
a* one or more instances ofa
a | b eithera orb
'a' the literal string between the quotes

Note:

MostUnicodeSet methods do not take a UErrorCode parameter because there are usually very few opportunities for failure other than a shortage of memory, error codes in low-level C++ string methods would be inconvenient, and the error code as the last parameter (ICU convention) would prevent the use of default parameter values. Instead, such methods set theUnicodeSet into a "bogus" state (seeisBogus()) if an error occurs.

Author: Alan Liu

Stable:: ICU 2.0

Definition at line285 of fileuniset.h.

Member Enumeration Documentation

◆ anonymous enum

anonymous enum

Enumerator
MIN_VALUE	Minimum value that can be stored in aUnicodeSet. Stable: ICU 2.4
MAX_VALUE	Maximum value that can be stored in aUnicodeSet. Stable: ICU 2.4

Enumerator

MIN_VALUE

Minimum value that can be stored in aUnicodeSet.

Stable:: ICU 2.4

MAX_VALUE

Maximum value that can be stored in aUnicodeSet.

Stable:: ICU 2.4

Definition at line358 of fileuniset.h.

◆ ESerialization

enumicu::UnicodeSet::ESerialization

Internal:: Do not use.

This API is for internal use only.

Definition at line398 of fileuniset.h.

Constructor & Destructor Documentation

◆ UnicodeSet()[1/7]

icu::UnicodeSet::UnicodeSet

(

)

Constructs an empty set.

Stable:: ICU 2.0

◆ UnicodeSet()[2/7]

icu::UnicodeSet::UnicodeSet	(	UChar32	start,
		UChar32	end
	)

Constructs a set containing the given range.

Ifend < start then an empty set is created.

Parameters

start	first character, inclusive, of range
end	last character, inclusive, of range

Stable:: ICU 2.4

◆ UnicodeSet()[3/7]

icu::UnicodeSet::UnicodeSet	(	const uint16_t	buffer[],
		int32_t	bufferLen,
		ESerialization	serialization,
		UErrorCode &	status
	)

Constructs a set from the output ofserialize().

Parameters

buffer	the 16 bit array
bufferLen	the original length returned fromserialize()
serialization	the value 'kSerialized'
status	error code

Internal:: Do not use. This API is for internal use only.

◆ UnicodeSet()[4/7]

icu::UnicodeSet::UnicodeSet	(	constUnicodeString &	pattern,
		UErrorCode &	status
	)

Constructs a set from the given pattern.

See the class description for the syntax of the pattern language.

Parameters

pattern	a string specifying what characters are in the set
status	returns`U_ILLEGAL_ARGUMENT_ERROR` if the pattern contains a syntax error.

Stable:: ICU 2.0

◆ UnicodeSet()[5/7]

icu::UnicodeSet::UnicodeSet	(	constUnicodeString &	pattern,
		uint32_t	options,
		constSymbolTable *	symbols,
		UErrorCode &	status
	)

Constructs a set from the given pattern.

See the class description for the syntax of the pattern language.

Parameters

pattern	a string specifying what characters are in the set
options	bitmask for options to apply to the pattern. Valid options are USET_IGNORE_SPACE and at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE. These case options are mutually exclusive.
symbols	a symbol table mapping variable names to values and stand-in characters to UnicodeSets; may be nullptr
status	returns`U_ILLEGAL_ARGUMENT_ERROR` if the pattern contains a syntax error.

Internal:: Do not use. This API is for internal use only.

◆ UnicodeSet()[6/7]

icu::UnicodeSet::UnicodeSet	(	constUnicodeString &	pattern,
		ParsePosition &	pos,
		uint32_t	options,
		constSymbolTable *	symbols,
		UErrorCode &	status
	)

Constructs a set from the given pattern.

See the class description for the syntax of the pattern language.

Parameters

pattern	a string specifying what characters are in the set
pos	on input, the position in pattern at which to start parsing. On output, the position after the last character parsed.
options	bitmask for options to apply to the pattern. Valid options are USET_IGNORE_SPACE and at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE. These case options are mutually exclusive.
symbols	a symbol table mapping variable names to values and stand-in characters to UnicodeSets; may be nullptr
status	input-output error code

Stable:: ICU 2.8

◆ UnicodeSet()[7/7]

icu::UnicodeSet::UnicodeSet

(

constUnicodeSet &

)

Constructs a set that is identical to the givenUnicodeSet.

Stable:: ICU 2.0

◆ ~UnicodeSet()

virtual icu::UnicodeSet::~UnicodeSet

(

)

virtual

Destructs the set.

Stable:: ICU 2.0

Member Function Documentation

◆ add()[1/3]

UnicodeSet& icu::UnicodeSet::add

(

constUnicodeString &

)

Adds the specified multicharacter to this set if it is not already present.

If this set already contains the multicharacter, the call leaves this set unchanged. Thus "ch" => {"ch"} A frozen set will not be modified.

Parameters

s	the source string

Returns: this object, for chaining

Stable:: ICU 2.4

◆ add()[2/3]

UnicodeSet& icu::UnicodeSet::add

(

UChar32

)

Adds the specified character to this set if it is not already present.

If this set already contains the specified character, the call leaves this set unchanged. A frozen set will not be modified.

Parameters

c	the character (code point)

Returns: this object, for chaining

Stable:: ICU 2.0

◆ add()[3/3]

virtualUnicodeSet& icu::UnicodeSet::add	(	UChar32	start,
		UChar32	end
	)

virtual

Adds the specified range to this set if it is not already present.

If this set already contains the specified range, the call leaves this set unchanged. Ifstart > end then an empty range is added, leaving the set unchanged. This is equivalent to a boolean logic OR, or a set UNION. A frozen set will not be modified.

Parameters

start	first character, inclusive, of range to be added to this set.
end	last character, inclusive, of range to be added to this set.

Stable:: ICU 2.0

◆ addAll()[1/2]

virtualUnicodeSet& icu::UnicodeSet::addAll

(

constUnicodeSet &

)

virtual

Adds all of the elements in the specified set to this set if they're not already present.

This operation effectively modifies this set so that its value is theunion of the two sets. The behavior of this operation is unspecified if the specified collection is modified while the operation is in progress. A frozen set will not be modified.

Parameters

c	set whose elements are to be added to this set.

See also: add(UChar32, UChar32)

Stable:: ICU 2.0

◆ addAll()[2/2]

UnicodeSet& icu::UnicodeSet::addAll

(

constUnicodeString &

)

Adds each of the characters in this string to the set.

Note: "ch" => {"c", "h"} If this set already contains any particular character, it has no effect on that character. A frozen set will not be modified.

Parameters

s	the source string

Returns: this object, for chaining

Stable:: ICU 2.4

◆ addMatchSetTo()

virtual void icu::UnicodeSet::addMatchSetTo

(

UnicodeSet &

toUnionTo

)

const

overridevirtual

Implementation ofUnicodeMatcher API.

Union the set of all characters that may be matched by this object into the given set.

Parameters

toUnionTo

the set into which to union the source characters

Stable:: ICU 2.4

Implementsicu::UnicodeMatcher.

◆ applyIntPropertyValue()

UnicodeSet& icu::UnicodeSet::applyIntPropertyValue	(	UProperty	prop,
		int32_t	value,
		UErrorCode &	ec
	)

Modifies this set to contain those code points which have the given value for the given binary or enumerated property, as returned by u_getIntPropertyValue.

Prior contents of this set are lost. A frozen set will not be modified.

Parameters

prop	a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1 or UCHAR_INT_START..UCHAR_INT_LIMIT-1 or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
value	a value in the range u_getIntPropertyMinValue(prop).. u_getIntPropertyMaxValue(prop), with one exception. If prop is UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but rather a mask value produced byU_GET_GC_MASK(). This allows grouped categories such as [:L:] to be represented.
ec	error code input/output parameter

Returns: a reference to this set

Stable:: ICU 2.4

◆ applyPattern()[1/3]

UnicodeSet& icu::UnicodeSet::applyPattern	(	constUnicodeString &	pattern,
		ParsePosition &	pos,
		uint32_t	options,
		constSymbolTable *	symbols,
		UErrorCode &	status
	)

Parses the given pattern, starting at the given position.

The character at pattern.charAt(pos.getIndex()) must be '[', or the parse fails. Parsing continues until the corresponding closing ']'. If a syntax error is encountered between the opening and closing brace, the parse fails. Upon return from a successful parse, theParsePosition is updated to point to the character following the closing ']', and a StringBuffer containing a pairs list for the parsed pattern is returned. This method calls itself recursively to parse embedded subpatterns. Empties the set passed before applying the pattern. A frozen set will not be modified.

Parameters

pattern	the string containing the pattern to be parsed. The portion of the string from pos.getIndex(), which must be a '[', to the corresponding closing ']', is parsed.
pos	upon entry, the position at which to being parsing. The character at pattern.charAt(pos.getIndex()) must be a '['. Upon return from a successful parse, pos.getIndex() is either the character after the closing ']' of the parsed pattern, or pattern.length() if the closing ']' is the last character of the pattern string.
options	bitmask for options to apply to the pattern. Valid options are USET_IGNORE_SPACE and at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE. These case options are mutually exclusive.
symbols	a symbol table mapping variable names to values and stand-ins to UnicodeSets; may be nullptr
status	returns`U_ILLEGAL_ARGUMENT_ERROR` if the pattern contains a syntax error.

Returns: a reference to this

Stable:: ICU 2.8

◆ applyPattern()[2/3]

UnicodeSet& icu::UnicodeSet::applyPattern	(	constUnicodeString &	pattern,
		UErrorCode &	status
	)

Modifies this set to represent the set specified by the given pattern, ignoring Unicode Pattern_White_Space characters.

See the class description for the syntax of the pattern language. A frozen set will not be modified.

Parameters

pattern	a string specifying what characters are in the set
status	returns`U_ILLEGAL_ARGUMENT_ERROR` if the pattern contains a syntax error. Empties the set passed before applying the pattern.

Returns: a reference to this

Stable:: ICU 2.0

◆ applyPattern()[3/3]

UnicodeSet& icu::UnicodeSet::applyPattern	(	constUnicodeString &	pattern,
		uint32_t	options,
		constSymbolTable *	symbols,
		UErrorCode &	status
	)

Modifies this set to represent the set specified by the given pattern, optionally ignoring Unicode Pattern_White_Space characters.

See the class description for the syntax of the pattern language. A frozen set will not be modified.

Parameters

pattern	a string specifying what characters are in the set
options	bitmask for options to apply to the pattern. Valid options are USET_IGNORE_SPACE and at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE. These case options are mutually exclusive.
symbols	a symbol table mapping variable names to values and stand-ins to UnicodeSets; may be nullptr
status	returns`U_ILLEGAL_ARGUMENT_ERROR` if the pattern contains a syntax error. Empties the set passed before applying the pattern.

Returns: a reference to this

Internal:: Do not use. This API is for internal use only.

◆ applyPropertyAlias()

UnicodeSet& icu::UnicodeSet::applyPropertyAlias	(	constUnicodeString &	prop,
		constUnicodeString &	value,
		UErrorCode &	ec
	)

Modifies this set to contain those code points which have the given value for the given property.

Prior contents of this set are lost. A frozen set will not be modified.

Parameters

prop

a property alias, either short or long. The name is matched loosely. See PropertyAliases.txt for names and a description of loose matching. If the value string is empty, then this string is interpreted as either a General_Category value alias, a Script value alias, a binary property alias, or a special ID. Special IDs are matched loosely and correspond to the following sets:

"ANY" = [\u0000-\U0010FFFF], "ASCII" = [\u0000-\u007F], "Assigned" = [:^Cn:].

Parameters

value	a value alias, either short or long. The name is matched loosely. See PropertyValueAliases.txt for names and a description of loose matching. In addition to aliases listed, numeric values and canonical combining classes may be expressed numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string may also be empty.
ec	error code input/output parameter

Returns: a reference to this set

Stable:: ICU 2.4

◆ begin()

U_HEADER_NESTED_NAMESPACE::USetElementIterator icu::UnicodeSet::begin

(

)

const

inline

Returns a C++ iterator for iterating over all of the elements of this set.

Convenient all-in one iteration, but creates a std::u16string for each code point or string. (Similar to how JavaUnicodeSetis an Iterable<String>.)

Code points are returned first, then empty and multi-character strings.

UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode);

for (auto el :set) {

UnicodeString us(el);

std::string u8;

printf("set.element length %ld \"%s\"\n", (long)us.length(), us.toUTF8String(u8).c_str());

}

icu::UnicodeSet::UnicodeSet

UnicodeSet()

Constructs an empty set.

icu::UnicodeSet::set

UnicodeSet & set(UChar32 start, UChar32 end)

Make this object represent the range start - end.

Returns: an all-elements iterator.

Draft:: This API may be changed in the future versions and was introduced in ICU 77

See also: end; codePoints; ranges; strings

Definition at line1203 of fileuniset.h.

◆ charAt()

UChar32 icu::UnicodeSet::charAt

(

int32_t

index

)

const

Returns the character at the given index within this set, where the set is ordered by ascending code point.

If the index is out of range for characters, returns (UChar32)-1. The inverse of this method isindexOf().

For iteration, this is slower thanUnicodeSetIterator orgetRangeCount()/getRangeStart()/getRangeEnd(), because for each call it skips linearly overindex characters in the ranges.

Parameters

index

an index from 0..size()-1

Returns: the character at the given index, or (UChar32)-1.

Stable:: ICU 2.4

◆ clear()

virtualUnicodeSet& icu::UnicodeSet::clear

(

)

virtual

Removes all of the elements from this set.

This set will be empty after this call returns. A frozen set will not be modified.

Stable:: ICU 2.0

◆ clone()

virtualUnicodeSet* icu::UnicodeSet::clone

(

)

const

overridevirtual

Returns a copy of this object.

AllUnicodeFunctor objects have to support cloning in order to allow classes using UnicodeFunctors, such asTransliterator, to implement cloning. If this set is frozen, then the clone will be frozen as well. UsecloneAsThawed() for a mutable clone of a frozen set.

See also: cloneAsThawed

Stable:: ICU 2.0

Implementsicu::UnicodeFilter.

◆ cloneAsThawed()

UnicodeSet* icu::UnicodeSet::cloneAsThawed

(

)

const

Clone the set and make the clone mutable.

See the ICU4J Freezable interface for details.

Returns: the mutable clone

See also: freeze; isFrozen

Stable:: ICU 3.8

◆ closeOver()

UnicodeSet& icu::UnicodeSet::closeOver

(

int32_t

attribute

)

Close this set over the given attribute.

For the attribute USET_CASE_INSENSITIVE, the result is to modify this set so that:

For each character or string 'a' in this set, all strings or characters 'b' such that foldCase(a) == foldCase(b) are added to this set.
For each string 'e' in the resulting set, if e != foldCase(e), 'e' will be removed.

Example: [aq\u00DF{Bc}{bC}{Fi}] => [aAqQ\u00DF\uFB01{ss}{bc}{fi}]

(Here foldCase(x) refers to the operation u_strFoldCase, and a == b denotes that the contents are the same, not pointer comparison.)

A frozen set will not be modified.

Parameters

attribute

bitmask for attributes to close over. Valid options: At most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE. These case options are mutually exclusive. Unrelated options bits are ignored.

Returns: a reference to this set.

Stable:: ICU 4.2

◆ codePoints()

U_HEADER_NESTED_NAMESPACE::USetCodePoints icu::UnicodeSet::codePoints

(

)

const

inline

Returns a C++ "range" for iterating over the code points of this set.

UnicodeSet set(u"[abcçカ🚴]", errorCode);

for (UChar32 c :set.codePoints()) {

printf("set.codePoint U+%04lx\n", (long)c);

}

icu::UnicodeSet::codePoints

U_HEADER_NESTED_NAMESPACE::USetCodePoints codePoints() const

Returns a C++ "range" for iterating over the code points of this set.

Definition:uniset.h:1123

UChar32

int32_t UChar32

Define UChar32 as a type for single Unicode code points.

Definition:umachine.h:427

Returns: a "range" object for iterating over the code points of this set.

Draft:: This API may be changed in the future versions and was introduced in ICU 76

See also: ranges; strings; begin; end

Definition at line1123 of fileuniset.h.

◆ compact()

virtualUnicodeSet& icu::UnicodeSet::compact

(

)

virtual

Reallocate this objects internal structures to take up the least possible space, without changing this object's value.

A frozen set will not be modified.

Stable:: ICU 2.4

◆ complement()[1/4]

virtualUnicodeSet& icu::UnicodeSet::complement

(

)

virtual

This is equivalent tocomplement(MIN_VALUE, MAX_VALUE).

Note: This performs a symmetric difference with all code pointsand thus retains all multicharacter strings. In order to achieve a “code point complement” (all code points minus this set), the easiest is to.complement().removeAllStrings().

A frozen set will not be modified.

Stable:: ICU 2.0

◆ complement()[2/4]

UnicodeSet& icu::UnicodeSet::complement

(

constUnicodeString &

)

Complement the specified string in this set.

The string will be removed if it is in this set, or will be added if it is not in this set. A frozen set will not be modified.

Parameters

s	the string to complement

Returns: this object, for chaining

Stable:: ICU 2.4

◆ complement()[3/4]

UnicodeSet& icu::UnicodeSet::complement

(

UChar32

)

Complements the specified character in this set.

The character will be removed if it is in this set, or will be added if it is not in this set. A frozen set will not be modified.

Parameters

c	the character (code point)

Returns: this object, for chaining

Stable:: ICU 2.0

◆ complement()[4/4]

virtualUnicodeSet& icu::UnicodeSet::complement	(	UChar32	start,
		UChar32	end
	)

virtual

Complements the specified range in this set.

Any character in the range will be removed if it is in this set, or will be added if it is not in this set. Ifstart > end then an empty range is complemented, leaving the set unchanged. This is equivalent to a boolean logic XOR. A frozen set will not be modified.

Parameters

start	first character, inclusive, of range
end	last character, inclusive, of range

Stable:: ICU 2.0

◆ complementAll()[1/2]

virtualUnicodeSet& icu::UnicodeSet::complementAll

(

constUnicodeSet &

)

virtual

Complements in this set all elements contained in the specified set.

Any character in the other set will be removed if it is in this set, or will be added if it is not in this set. A frozen set will not be modified.

Parameters

c	set that defines which elements will be xor'ed from this set.

Stable:: ICU 2.4

◆ complementAll()[2/2]

UnicodeSet& icu::UnicodeSet::complementAll

(

constUnicodeString &

)

Complement EACH of the characters in this string.

Note: "ch" == {"c", "h"} A frozen set will not be modified.

Parameters

s	the source string

Returns: this object, for chaining

Stable:: ICU 2.4

◆ contains()[1/3]

UBool icu::UnicodeSet::contains

(

constUnicodeString &

)

const

Returnstrue if this set contains the given multicharacter string.

Parameters

s	string to be checked for containment

Returns: true if this set contains the specified string

Stable:: ICU 2.4

◆ contains()[2/3]

virtualUBool icu::UnicodeSet::contains

(

UChar32

)

const

overridevirtual

Returns true if this set contains the given character.

This function works faster with a frozen set.

Parameters

c	character to be checked for containment

Returns: true if the test condition is met

Stable:: ICU 2.0

Implementsicu::UnicodeFilter.

◆ contains()[3/3]

virtualUBool icu::UnicodeSet::contains	(	UChar32	start,
		UChar32	end
	)		const

virtual

Returns true if this set contains every character of the given range.

Parameters

start	first character, inclusive, of the range
end	last character, inclusive, of the range

Returns: true if the test condition is met

Stable:: ICU 2.0

◆ containsAll()[1/2]

virtualUBool icu::UnicodeSet::containsAll

(

constUnicodeSet &

)

const

virtual

Returns true if this set contains all the characters and strings of the given set.

Parameters

c	set to be checked for containment

Returns: true if the test condition is met

Stable:: ICU 2.4

◆ containsAll()[2/2]

UBool icu::UnicodeSet::containsAll

(

constUnicodeString &

)

const

Returns true if this set contains all the characters of the given string.

Parameters

s	string containing characters to be checked for containment

Returns: true if the test condition is met

Stable:: ICU 2.4

◆ containsNone()[1/3]

UBool icu::UnicodeSet::containsNone

(

constUnicodeSet &

)

const

Returns true if this set contains none of the characters and strings of the given set.

Parameters

c	set to be checked for containment

Returns: true if the test condition is met

Stable:: ICU 2.4

◆ containsNone()[2/3]

UBool icu::UnicodeSet::containsNone

(

constUnicodeString &

)

const

Returns true if this set contains none of the characters of the given string.

Parameters

s	string containing characters to be checked for containment

Returns: true if the test condition is met

Stable:: ICU 2.4

◆ containsNone()[3/3]

UBool icu::UnicodeSet::containsNone	(	UChar32	start,
		UChar32	end
	)		const

Returns true if this set contains none of the characters of the given range.

Parameters

start	first character, inclusive, of the range
end	last character, inclusive, of the range

Returns: true if the test condition is met

Stable:: ICU 2.4

◆ containsSome()[1/3]

UBool icu::UnicodeSet::containsSome

(

constUnicodeSet &

)

const

inline

Returns true if this set contains one or more of the characters and strings of the given set.

Parameters

s	The set to be checked for containment

Returns: true if the condition is met

Stable:: ICU 2.4

Definition at line1856 of fileuniset.h.

◆ containsSome()[2/3]

UBool icu::UnicodeSet::containsSome

(

constUnicodeString &

)

const

inline

Returns true if this set contains one or more of the characters of the given string.

Parameters

s	string containing characters to be checked for containment

Returns: true if the condition is met

Stable:: ICU 2.4

Definition at line1860 of fileuniset.h.

◆ containsSome()[3/3]

UBool icu::UnicodeSet::containsSome	(	UChar32	start,
		UChar32	end
	)		const

inline

Returns true if this set contains one or more of the characters in the given range.

Parameters

start	first character, inclusive, of the range
end	last character, inclusive, of the range

Returns: true if the condition is met

Stable:: ICU 2.4

Definition at line1852 of fileuniset.h.

◆ createFrom()

staticUnicodeSet* icu::UnicodeSet::createFrom

(

constUnicodeString &

)

static

Makes a set from a multicharacter string.

Thus "ch" => {"ch"}

Parameters

s	the source string

Returns: a newly created set containing the given string. The caller owns the return object and is responsible for deleting it.

Stable:: ICU 2.4

◆ createFromAll()

staticUnicodeSet* icu::UnicodeSet::createFromAll

(

constUnicodeString &

)

static

Makes a set from each of the characters in the string.

Thus "ch" => {"c", "h"}

Parameters

s	the source string

Returns: a newly created set containing the given characters The caller owns the return object and is responsible for deleting it.

Stable:: ICU 2.4

◆ end()

U_HEADER_NESTED_NAMESPACE::USetElementIterator icu::UnicodeSet::end

(

)

const

inline

Returns: an exclusive-end sentinel for iterating over all of the elements of this set.

Draft:: This API may be changed in the future versions and was introduced in ICU 77

See also: begin; codePoints; ranges; strings

Definition at line1215 of fileuniset.h.

◆ freeze()

UnicodeSet* icu::UnicodeSet::freeze

(

)

Freeze the set (make it immutable).

Once frozen, it cannot be unfrozen and is therefore thread-safe until it is deleted. See the ICU4J Freezable interface for details. Freezing the set may also make some operations faster, for examplecontains() andspan(). A frozen set will not be modified. (It remains frozen.)

Returns: this set.

See also: isFrozen; cloneAsThawed

Stable:: ICU 3.8

◆ fromUSet()[1/2]

constUnicodeSet * icu::UnicodeSet::fromUSet

(

constUSet *

uset

)

inlinestatic

Get aUnicodeSet pointer from a const USet.

Parameters

uset	a const USet (the ICU plain C type forUnicodeSet)

Returns: the correspondingUnicodeSet pointer.

Stable:: ICU 4.2

Definition at line1872 of fileuniset.h.

◆ fromUSet()[2/2]

UnicodeSet * icu::UnicodeSet::fromUSet

(

USet *

uset

)

inlinestatic

Get aUnicodeSet pointer from a USet.

Parameters

uset	a USet (the ICU plain C type forUnicodeSet)

Returns: the correspondingUnicodeSet pointer.

Stable:: ICU 4.2

Definition at line1868 of fileuniset.h.

◆ getDynamicClassID()

virtualUClassID icu::UnicodeSet::getDynamicClassID

(

)

const

overridevirtual

ImplementUnicodeFunctor API.

Returns: The class ID for this object. All objects of a given class have the same class ID. Objects of other classes have different class IDs.

Stable:: ICU 2.4

Implementsicu::UnicodeFunctor.

◆ getRangeCount()

virtual int32_t icu::UnicodeSet::getRangeCount

(

)

const

virtual

Iteration method that returns the number of ranges contained in this set.

See also: getRangeStart; getRangeEnd

Stable:: ICU 2.4

◆ getRangeEnd()

virtualUChar32 icu::UnicodeSet::getRangeEnd

(

int32_t

index

)

const

virtual

Iteration method that returns the last character in the specified range of this set.

See also: getRangeStart; getRangeEnd

Stable:: ICU 2.4

◆ getRangeStart()

virtualUChar32 icu::UnicodeSet::getRangeStart

(

int32_t

index

)

const

virtual

Iteration method that returns the first character in the specified range of this set.

See also: getRangeCount; getRangeEnd

Stable:: ICU 2.4

◆ getStaticClassID()

staticUClassID icu::UnicodeSet::getStaticClassID

(

)

static

Return the class ID for this class.

This is useful only for comparing to a return value fromgetDynamicClassID(). For example:

.      Base* polymorphic_pointer = createPolymorphicObject();.      if (polymorphic_pointer->getDynamicClassID() ==.          Derived::getStaticClassID()) ...

Returns: The class ID for all objects of this class.

Stable:: ICU 2.0

◆ hashCode()

virtual int32_t icu::UnicodeSet::hashCode

(

)

const

virtual

Returns the hash code value for this set.

Returns: the hash code value for this set.

See also: Object::hashCode()

Stable:: ICU 2.0

◆ hasStrings()

UBool icu::UnicodeSet::hasStrings

(

)

const

Returns: true if this set contains multi-character strings or the empty string.

Stable:: ICU 70

◆ indexOf()

int32_t icu::UnicodeSet::indexOf

(

UChar32

)

const

Returns the index of the given character within this set, where the set is ordered by ascending code point.

If the character is not in this set, return -1. The inverse of this method ischarAt().

Returns: an index from 0..size()-1, or -1

Stable:: ICU 2.4

◆ isBogus()

UBool icu::UnicodeSet::isBogus

(

)

const

inline

Determine if this object contains a valid set.

A bogus set has no value. It is different from an empty set. It can be used to indicate that no set value is available.

Returns: true if the set is bogus/invalid, false otherwise

See also: setToBogus()

Stable:: ICU 4.0

Definition at line1864 of fileuniset.h.

◆ isEmpty()

virtualUBool icu::UnicodeSet::isEmpty

(

)

const

virtual

Returnstrue if this set contains no elements.

Returns: true if this set contains no elements.

Stable:: ICU 2.0

◆ isFrozen()

UBool icu::UnicodeSet::isFrozen

(

)

const

inline

Determines whether the set has been frozen (made immutable) or not.

See the ICU4J Freezable interface for details.

Returns: true/false for whether the set has been frozen

See also: freeze; cloneAsThawed

Stable:: ICU 3.8

Definition at line1848 of fileuniset.h.

◆ matches()

virtualUMatchDegree icu::UnicodeSet::matches	(	constReplaceable &	text,
		int32_t &	offset,
		int32_t	limit,
		UBool	incremental
	)

overridevirtual

ImplementUnicodeMatcher::matches()

Stable:: ICU 2.4

Reimplemented fromicu::UnicodeFilter.

◆ operator!=()

bool icu::UnicodeSet::operator!=

(

constUnicodeSet &

)

const

inline

Compares the specified object with this set for equality.

Returnstrue if the specified set is not equal to this set.

Stable:: ICU 2.0

Definition at line1844 of fileuniset.h.

Referencesicu::operator==().

◆ operator=()

UnicodeSet& icu::UnicodeSet::operator=

(

constUnicodeSet &

)

Assigns this object to be a copy of another.

A frozen set will not be modified.

Stable:: ICU 2.0

◆ operator==()

virtual bool icu::UnicodeSet::operator==

(

constUnicodeSet &

)

const

virtual

Compares the specified object with this set for equality.

Returnstrue if the two sets have the same size, and every member of the specified set is contained in this set (or equivalently, every member of this set is contained in the specified set).

Parameters

o	set to be compared for equality with this set.

Returns: true if the specified set is equal to this set.

Stable:: ICU 2.0

◆ ranges()

U_HEADER_NESTED_NAMESPACE::USetRanges icu::UnicodeSet::ranges

(

)

const

inline

Returns a C++ "range" for iterating over the code point ranges of this set.

UnicodeSet set(u"[abcçカ🚴]", errorCode);

for (auto [start,end] :set.ranges()) {

printf("set.range U+%04lx..U+%04lx\n", (long)start, (long)end);

}

for (auto range :set.ranges()) {

for (UChar32 c : range) {

printf("set.range.c U+%04lx\n", (long)c);

}

icu::UnicodeSet::end

U_HEADER_NESTED_NAMESPACE::USetElementIterator end() const

Definition:uniset.h:1215

icu::UnicodeSet::ranges

U_HEADER_NESTED_NAMESPACE::USetRanges ranges() const

Returns a C++ "range" for iterating over the code point ranges of this set.

Definition:uniset.h:1149

Returns: a "range" object for iterating over the code point ranges of this set.

Draft:: This API may be changed in the future versions and was introduced in ICU 76

See also: codePoints; strings; begin; end

Definition at line1149 of fileuniset.h.

◆ remove()[1/3]

UnicodeSet& icu::UnicodeSet::remove

(

constUnicodeString &

)

Removes the specified string from this set if it is present.

The set will not contain the specified character once the call returns. A frozen set will not be modified.

Parameters

s	the source string

Returns: this object, for chaining

Stable:: ICU 2.4

◆ remove()[2/3]

UnicodeSet& icu::UnicodeSet::remove

(

UChar32

)

Removes the specified character from this set if it is present.

The set will not contain the specified range once the call returns. A frozen set will not be modified.

Parameters

c	the character (code point)

Returns: this object, for chaining

Stable:: ICU 2.0

◆ remove()[3/3]

virtualUnicodeSet& icu::UnicodeSet::remove	(	UChar32	start,
		UChar32	end
	)

virtual

Removes the specified range from this set if it is present.

The set will not contain the specified range once the call returns. Ifstart > end then an empty range is removed, leaving the set unchanged. A frozen set will not be modified.

Parameters

start	first character, inclusive, of range to be removed from this set.
end	last character, inclusive, of range to be removed from this set.

Stable:: ICU 2.0

◆ removeAll()[1/2]

virtualUnicodeSet& icu::UnicodeSet::removeAll

(

constUnicodeSet &

)

virtual

Removes from this set all of its elements that are contained in the specified set.

This operation effectively modifies this set so that its value is theasymmetric set difference of the two sets. A frozen set will not be modified.

Parameters

c	set that defines which elements will be removed from this set.

Stable:: ICU 2.0

◆ removeAll()[2/2]

UnicodeSet& icu::UnicodeSet::removeAll

(

constUnicodeString &

)

Remove EACH of the characters in this string.

Note: "ch" == {"c", "h"} A frozen set will not be modified.

Parameters

s	the source string

Returns: this object, for chaining

Stable:: ICU 2.4

◆ removeAllStrings()

virtualUnicodeSet& icu::UnicodeSet::removeAllStrings

(

)

virtual

Remove all strings from this set.

Returns: a reference to this set.

Stable:: ICU 4.2

◆ resemblesPattern()

staticUBool icu::UnicodeSet::resemblesPattern	(	constUnicodeString &	pattern,
		int32_t	pos
	)

static

Return true if the given position, in the given pattern, appears to be the start of aUnicodeSet pattern.

Stable:: ICU 2.4

◆ retain()[1/3]

UnicodeSet& icu::UnicodeSet::retain

(

constUnicodeString &

)

Retains only the specified string from this set if it is present.

Upon return this set will be empty if it did not contain s, or will only contain s if it did contain s. A frozen set will not be modified.

Parameters

s	the source string

Returns: this object, for chaining

Stable:: ICU 69

◆ retain()[2/3]

UnicodeSet& icu::UnicodeSet::retain

(

UChar32

)

Retain the specified character from this set if it is present.

A frozen set will not be modified.

Parameters

c	the character (code point)

Returns: this object, for chaining

Stable:: ICU 2.0

◆ retain()[3/3]

virtualUnicodeSet& icu::UnicodeSet::retain	(	UChar32	start,
		UChar32	end
	)

virtual

Retain only the elements in this set that are contained in the specified range.

Ifstart > end then an empty range is retained, leaving the set empty. This is equivalent to a boolean logic AND, or a set INTERSECTION. A frozen set will not be modified.

Parameters

start	first character, inclusive, of range
end	last character, inclusive, of range

Stable:: ICU 2.0

◆ retainAll()[1/2]

virtualUnicodeSet& icu::UnicodeSet::retainAll

(

constUnicodeSet &

)

virtual

Retains only the elements in this set that are contained in the specified set.

In other words, removes from this set all of its elements that are not contained in the specified set. This operation effectively modifies this set so that its value is theintersection of the two sets. A frozen set will not be modified.

Parameters

c	set that defines which elements this set will retain.

Stable:: ICU 2.0

◆ retainAll()[2/2]

UnicodeSet& icu::UnicodeSet::retainAll

(

constUnicodeString &

)

Retains EACH of the characters in this string.

Note: "ch" == {"c", "h"} A frozen set will not be modified.

Parameters

s	the source string

Returns: this object, for chaining

Stable:: ICU 2.4

◆ serialize()

int32_t icu::UnicodeSet::serialize	(	uint16_t *	dest,
		int32_t	destCapacity,
		UErrorCode &	ec
	)		const

Serializes this set into an array of 16-bit integers.

Serialization (currently) only records the characters in the set; multicharacter strings are ignored.

The array has following format (each line is one 16-bit integer):

length = (n+2*m) | (m!=0?0x8000:0) bmpLength = n; present if m!=0 bmp[0] bmp[1] ... bmp[n-1] supp-high[0] supp-low[0] supp-high[1] supp-low[1] ... supp-high[m-1] supp-low[m-1]

The array starts with a header. After the header are n bmp code points, then m supplementary code points. Either n or m or both may be zero. n+2*m is always <= 0x7FFF.

If there are no supplementary characters (if m==0) then the header is one 16-bit integer, 'length', with value n.

If there are supplementary characters (if m!=0) then the header is two 16-bit integers. The first, 'length', has value (n+2*m)|0x8000. The second, 'bmpLength', has value n.

After the header the code points are stored in ascending order. Supplementary code points are stored as most significant 16 bits followed by least significant 16 bits.

Parameters

dest	pointer to buffer of destCapacity 16-bit integers. May be nullptr only if destCapacity is zero.
destCapacity	size of dest, or zero. Must not be negative.
ec	error code. Will be set to U_INDEX_OUTOFBOUNDS_ERROR if n+2m > 0x7FFF. Will be set to U_BUFFER_OVERFLOW_ERROR if n+2m+(m!=0?2:1) > destCapacity.

Returns: the total length of the serialized format, including the header, that is, n+2*m+(m!=0?2:1), or 0 on error other than U_BUFFER_OVERFLOW_ERROR.

Stable:: ICU 2.4

◆ set()

UnicodeSet& icu::UnicodeSet::set	(	UChar32	start,
		UChar32	end
	)

Make this object represent the rangestart - end.

Ifstart > end then this object is set to an empty range. A frozen set will not be modified.

Parameters

start	first character in the set, inclusive
end	last character in the set, inclusive

Stable:: ICU 2.4

◆ setToBogus()

void icu::UnicodeSet::setToBogus

(

)

Make thisUnicodeSet object invalid.

The string will test true withisBogus().

A bogus set has no value. It is different from an empty set. It can be used to indicate that no set value is available.

This utility function is used throughout theUnicodeSet implementation to indicate that aUnicodeSet operation failed, and may be used in other functions, especially but not exclusively when such functions do not take a UErrorCode for simplicity.

See also: isBogus()

Stable:: ICU 4.0

◆ size()

virtual int32_t icu::UnicodeSet::size

(

)

const

virtual

Returns the number of elements in this set (its cardinality).

Note than the elements of a set may include both individual codepoints and strings.

This is slower thangetRangeCount() because it counts the code points of all ranges.

Returns: the number of elements in this set (its cardinality).

Stable:: ICU 2.0

See also: getRangeCount

◆ span()[1/2]

int32_t icu::UnicodeSet::span	(	const char16_t *	s,
		int32_t	length,
		USetSpanCondition	spanCondition
	)		const

Returns the length of the initial substring of the input string which consists only of characters and strings that are contained in this set (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), or only of characters and strings that are not contained in this set (USET_SPAN_NOT_CONTAINED).

See USetSpanCondition for details. Similar to the strspn() C library function. Unpaired surrogates are treated according tocontains() of their surrogate code points. This function works faster with a frozen set and with a non-negative string length argument.

Parameters

s	start of the string
length	of the string; can be -1 for NUL-terminated
spanCondition	specifies the containment condition

Returns: the length of the initial substring according to the spanCondition; 0 if the start of the string does not fit the spanCondition

Stable:: ICU 3.8

See also: USetSpanCondition

◆ span()[2/2]

int32_t icu::UnicodeSet::span	(	constUnicodeString &	s,
		int32_t	start,
		USetSpanCondition	spanCondition
	)		const

inline

Returns the end of the substring of the input string according to the USetSpanCondition.

Same asstart+span(s.getBuffer()+start, s.length()-start, spanCondition) after pinning start to 0<=start<=s.length().

Parameters

s	the string
start	the start index in the string for the span operation
spanCondition	specifies the containment condition

Returns: the exclusive end of the substring according to the spanCondition; the substring s.tempSubStringBetween(start, end) fulfills the spanCondition

Stable:: ICU 4.4

See also: USetSpanCondition

Definition at line1884 of fileuniset.h.

Referencesicu::UnicodeString::getBuffer(), andicu::UnicodeString::length().

◆ spanBack()[1/2]

int32_t icu::UnicodeSet::spanBack	(	const char16_t *	s,
		int32_t	length,
		USetSpanCondition	spanCondition
	)		const

Returns the start of the trailing substring of the input string which consists only of characters and strings that are contained in this set (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), or only of characters and strings that are not contained in this set (USET_SPAN_NOT_CONTAINED).

See USetSpanCondition for details. Unpaired surrogates are treated according tocontains() of their surrogate code points. This function works faster with a frozen set and with a non-negative string length argument.

Parameters

s	start of the string
length	of the string; can be -1 for NUL-terminated
spanCondition	specifies the containment condition

Returns: the start of the trailing substring according to the spanCondition; the string length if the end of the string does not fit the spanCondition

Stable:: ICU 3.8

See also: USetSpanCondition

◆ spanBack()[2/2]

int32_t icu::UnicodeSet::spanBack	(	constUnicodeString &	s,
		int32_t	limit,
		USetSpanCondition	spanCondition
	)		const

inline

Returns the start of the substring of the input string according to the USetSpanCondition.

Same asspanBack(s.getBuffer(), limit, spanCondition) after pinning limit to 0<=end<=s.length().

Parameters

s	the string
limit	the exclusive-end index in the string for the span operation (use s.length() or INT32_MAX for spanning back from the end of the string)
spanCondition	specifies the containment condition

Returns: the start of the substring according to the spanCondition; the substring s.tempSubStringBetween(start, limit) fulfills the spanCondition

Stable:: ICU 4.4

See also: USetSpanCondition

Definition at line1894 of fileuniset.h.

Referencesicu::UnicodeString::getBuffer(), andicu::UnicodeString::length().

◆ spanBackUTF8()

int32_t icu::UnicodeSet::spanBackUTF8	(	const char *	s,
		int32_t	length,
		USetSpanCondition	spanCondition
	)		const

See USetSpanCondition for details. Malformed byte sequences are treated according to contains(0xfffd). This function works faster with a frozen set and with a non-negative string length argument.

Parameters

s	start of the string (UTF-8)
length	of the string; can be -1 for NUL-terminated
spanCondition	specifies the containment condition

Returns: the start of the trailing substring according to the spanCondition; the string length if the end of the string does not fit the spanCondition

Stable:: ICU 3.8

See also: USetSpanCondition

◆ spanUTF8()

int32_t icu::UnicodeSet::spanUTF8	(	const char *	s,
		int32_t	length,
		USetSpanCondition	spanCondition
	)		const

See USetSpanCondition for details. Similar to the strspn() C library function. Malformed byte sequences are treated according to contains(0xfffd). This function works faster with a frozen set and with a non-negative string length argument.

Parameters

s	start of the string (UTF-8)
length	of the string; can be -1 for NUL-terminated
spanCondition	specifies the containment condition

Returns: the length of the initial substring according to the spanCondition; 0 if the start of the string does not fit the spanCondition

Stable:: ICU 3.8

See also: USetSpanCondition

◆ strings()

U_HEADER_NESTED_NAMESPACE::USetStrings icu::UnicodeSet::strings

(

)

const

inline

Returns a C++ "range" for iterating over the empty and multi-character strings of this set.

Returns each string as a std::u16string_view without copying its contents.

UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode);

for (auto s :set.strings()) {

UnicodeString us(s);

std::string u8;

printf("set.string length %ld \"%s\"\n", (long)s.length(), us.toUTF8String(u8).c_str());

}

icu::UnicodeSet::strings

U_HEADER_NESTED_NAMESPACE::USetStrings strings() const

Returns a C++ "range" for iterating over the empty and multi-character strings of this set.

Definition:uniset.h:1173

Returns: a "range" object for iterating over the strings of this set.

Draft:: This API may be changed in the future versions and was introduced in ICU 76

See also: codePoints; ranges; begin; end

Definition at line1173 of fileuniset.h.

◆ toPattern()

virtualUnicodeString& icu::UnicodeSet::toPattern	(	UnicodeString &	result,
		UBool	escapeUnprintable =`false`
	)		const

overridevirtual

Returns a string representation of this set.

If the result of calling this function is passed to aUnicodeSet constructor, it will produce another set that is equal to this one. A frozen set will not be modified.

Parameters

result	the string to receive the rules. Previous contents will be deleted.
escapeUnprintable	if true then convert unprintable character to their hex escape representations, \uxxxx or \Uxxxxxxxx. Unprintable characters are those other than U+000A, U+0020..U+007E.

Stable:: ICU 2.0

Implementsicu::UnicodeMatcher.

◆ toUSet()[1/2]

USet * icu::UnicodeSet::toUSet

(

)

inline

Produce a USet * pointer for thisUnicodeSet.

USet is the plain C type forUnicodeSet

Returns: a USet pointer for thisUnicodeSet

Stable:: ICU 4.2

Definition at line1876 of fileuniset.h.

◆ toUSet()[2/2]

constUSet * icu::UnicodeSet::toUSet

(

)

const

inline

Produce a const USet * pointer for thisUnicodeSet.

USet is the plain C type forUnicodeSet

Returns: a const USet pointer for thisUnicodeSet

Stable:: ICU 4.2

Definition at line1880 of fileuniset.h.

The documentation for this class was generated from the following file:

common/unicode/uniset.h

`[]`	No characters
`[a]`	The character 'a'
`[ae]`	The characters 'a' and 'e'
`[a-e]`	The characters 'a' through 'e' inclusive, in Unicode code point order
`[\u4E01]`	The character U+4E01
`[a{ab}{ac}]`	The character 'a' and the multicharacter strings "ab" and "ac"
`[\p{Lu}]`	All characters in the general category Uppercase Letter

`pattern :=`	`('[' '^'? item* ']') \| property`
`item :=`	`char \| (char '-' char) \| pattern-expr`
`pattern-expr :=`	`pattern \| pattern-expr pattern \| pattern-expr op pattern`
`op :=`	`'&' \| '-'`
`special :=`	`'[' \| ']' \| '-'`
`char :=`	any character that is not`special \| ('\'`any character`) \| ('\u' hex hex hex hex)`
`hex :=`	`'0' \| '1' \| '2' \| '3' \| '4' \| '5' \| '6' \| '7' \| '8' \| '9' \| 'A' \| 'B' \| 'C' \| 'D' \| 'E' \| 'F' \| 'a' \| 'b' \| 'c' \| 'd' \| 'e' \| 'f'`
`property :=`	a Unicode property set pattern

Movatterモバイル変換

Public Types

Public Member Functions

Static Public Member Functions

Friends

Detailed Description

Member Enumeration Documentation

◆ anonymous enum

◆ ESerialization

Constructor & Destructor Documentation

◆ UnicodeSet()[1/7]

◆ UnicodeSet()[2/7]

◆ UnicodeSet()[3/7]

◆ UnicodeSet()[4/7]

◆ UnicodeSet()[5/7]

◆ UnicodeSet()[6/7]

◆ UnicodeSet()[7/7]

◆ ~UnicodeSet()

Member Function Documentation

◆ add()[1/3]

◆ add()[2/3]

◆ add()[3/3]

◆ addAll()[1/2]

◆ addAll()[2/2]

◆ addMatchSetTo()

◆ applyIntPropertyValue()

◆ applyPattern()[1/3]

◆ applyPattern()[2/3]

◆ applyPattern()[3/3]

◆ applyPropertyAlias()

◆ begin()

◆ charAt()

◆ clear()

◆ clone()

◆ cloneAsThawed()

◆ closeOver()

◆ codePoints()

◆ compact()

◆ complement()[1/4]

◆ complement()[2/4]

◆ complement()[3/4]

◆ complement()[4/4]

◆ complementAll()[1/2]

◆ complementAll()[2/2]

◆ contains()[1/3]

◆ contains()[2/3]

◆ contains()[3/3]

◆ containsAll()[1/2]

◆ containsAll()[2/2]

◆ containsNone()[1/3]

◆ containsNone()[2/3]

◆ containsNone()[3/3]

◆ containsSome()[1/3]

◆ containsSome()[2/3]

◆ containsSome()[3/3]

◆ createFrom()

◆ createFromAll()

◆ end()

◆ freeze()

◆ fromUSet()[1/2]

◆ fromUSet()[2/2]

◆ getDynamicClassID()

◆ getRangeCount()

◆ getRangeEnd()

◆ getRangeStart()

◆ getStaticClassID()

◆ hashCode()

◆ hasStrings()

◆ indexOf()

◆ isBogus()

◆ isEmpty()

◆ isFrozen()

◆ matches()

◆ operator!=()

◆ operator=()

◆ operator==()

◆ ranges()

◆ remove()[1/3]

◆ remove()[2/3]

◆ remove()[3/3]