|
21 | 21 | #include"unicode/uscript.h"
|
22 | 22 | #include"unicode/ucharstrie.h"
|
23 | 23 | #include"unicode/bytestrie.h"
|
| 24 | +#include"unicode/rbbi.h" |
24 | 25 |
|
25 | 26 | #include"brkeng.h"
|
26 | 27 | #include"cmemory.h"
|
@@ -70,19 +71,21 @@ UnhandledEngine::~UnhandledEngine() {
|
70 | 71 | }
|
71 | 72 |
|
72 | 73 | UBool
|
73 |
| -UnhandledEngine::handles(UChar32 c)const { |
| 74 | +UnhandledEngine::handles(UChar32 c,constchar* locale)const { |
| 75 | + (void)locale;// Unused |
74 | 76 | returnfHandled &&fHandled->contains(c);
|
75 | 77 | }
|
76 | 78 |
|
77 | 79 | int32_t
|
78 | 80 | UnhandledEngine::findBreaks( UText *text,
|
79 |
| -int32_t/*startPos*/, |
| 81 | +int32_t startPos, |
80 | 82 | int32_t endPos,
|
81 | 83 | UVector32 &/*foundBreaks*/,
|
82 | 84 | UBool/* isPhraseBreaking*/,
|
83 | 85 | UErrorCode &status)const {
|
84 | 86 | if (U_FAILURE(status))return0;
|
85 |
| - UChar32 c =utext_current32(text); |
| 87 | +utext_setNativeIndex(text, startPos); |
| 88 | + UChar32 c =utext_current32(text); |
86 | 89 | while((int32_t)utext_getNativeIndex(text) < endPos &&fHandled->contains(c)) {
|
87 | 90 | utext_next32(text);// TODO: recast loop to work with post-increment operations.
|
88 | 91 | c =utext_current32(text);
|
@@ -120,49 +123,47 @@ ICULanguageBreakFactory::~ICULanguageBreakFactory() {
|
120 | 123 | }
|
121 | 124 | }
|
122 | 125 |
|
123 |
| -U_NAMESPACE_END |
124 |
| -U_CDECL_BEGIN |
125 |
| -staticvoid U_CALLCONV_deleteEngine(void *obj) { |
126 |
| -delete (const icu::LanguageBreakEngine *) obj; |
| 126 | +voidICULanguageBreakFactory::ensureEngines(UErrorCode& status) { |
| 127 | +static UMutexgBreakEngineMutex; |
| 128 | + Mutexm(&gBreakEngineMutex); |
| 129 | +if (fEngines ==nullptr) { |
| 130 | + LocalPointer<UStack>engines(newUStack(uprv_deleteUObject,nullptr, status), status); |
| 131 | +if (U_SUCCESS(status)) { |
| 132 | +fEngines = engines.orphan(); |
| 133 | + } |
| 134 | + } |
127 | 135 | }
|
128 |
| -U_CDECL_END |
129 |
| -U_NAMESPACE_BEGIN |
130 | 136 |
|
131 | 137 | const LanguageBreakEngine *
|
132 |
| -ICULanguageBreakFactory::getEngineFor(UChar32 c) { |
| 138 | +ICULanguageBreakFactory::getEngineFor(UChar32 c,constchar* locale) { |
133 | 139 | const LanguageBreakEngine *lbe =nullptr;
|
134 | 140 | UErrorCode status = U_ZERO_ERROR;
|
| 141 | +ensureEngines(status); |
| 142 | +if (U_FAILURE(status) ) { |
| 143 | +// Note: no way to return error code to caller. |
| 144 | +returnnullptr; |
| 145 | + } |
135 | 146 |
|
136 | 147 | static UMutexgBreakEngineMutex;
|
137 | 148 | Mutexm(&gBreakEngineMutex);
|
138 |
| - |
139 |
| -if (fEngines ==nullptr) { |
140 |
| - LocalPointer<UStack>engines(newUStack(_deleteEngine,nullptr, status), status); |
141 |
| -if (U_FAILURE(status) ) { |
142 |
| -// Note: no way to return error code to caller. |
143 |
| -returnnullptr; |
144 |
| - } |
145 |
| -fEngines = engines.orphan(); |
146 |
| - }else { |
147 |
| -int32_t i =fEngines->size(); |
148 |
| -while (--i >=0) { |
149 |
| - lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); |
150 |
| -if (lbe !=nullptr && lbe->handles(c)) { |
151 |
| -return lbe; |
152 |
| - } |
| 149 | +int32_t i =fEngines->size(); |
| 150 | +while (--i >=0) { |
| 151 | + lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); |
| 152 | +if (lbe !=nullptr && lbe->handles(c, locale)) { |
| 153 | +return lbe; |
153 | 154 | }
|
154 | 155 | }
|
155 |
| -
|
| 156 | + |
156 | 157 | // We didn't find an engine. Create one.
|
157 |
| - lbe =loadEngineFor(c); |
| 158 | + lbe =loadEngineFor(c, locale); |
158 | 159 | if (lbe !=nullptr) {
|
159 | 160 | fEngines->push((void *)lbe, status);
|
160 | 161 | }
|
161 | 162 | returnU_SUCCESS(status) ? lbe :nullptr;
|
162 | 163 | }
|
163 | 164 |
|
164 | 165 | const LanguageBreakEngine *
|
165 |
| -ICULanguageBreakFactory::loadEngineFor(UChar32 c) { |
| 166 | +ICULanguageBreakFactory::loadEngineFor(UChar32 c,constchar*) { |
166 | 167 | UErrorCode status = U_ZERO_ERROR;
|
167 | 168 | UScriptCode code =uscript_getScript(c, &status);
|
168 | 169 | if (U_SUCCESS(status)) {
|
@@ -299,6 +300,70 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
|
299 | 300 | returnnullptr;
|
300 | 301 | }
|
301 | 302 |
|
| 303 | + |
| 304 | +voidICULanguageBreakFactory::addExternalEngine( |
| 305 | + ExternalBreakEngine* external, UErrorCode& status) { |
| 306 | + LocalPointer<ExternalBreakEngine>engine(external, status); |
| 307 | +ensureEngines(status); |
| 308 | + LocalPointer<BreakEngineWrapper>wrapper( |
| 309 | +newBreakEngineWrapper(engine.orphan(), status), status); |
| 310 | +static UMutexgBreakEngineMutex; |
| 311 | + Mutexm(&gBreakEngineMutex); |
| 312 | +fEngines->push(wrapper.getAlias(), status); |
| 313 | + wrapper.orphan(); |
| 314 | +} |
| 315 | + |
| 316 | +BreakEngineWrapper::BreakEngineWrapper( |
| 317 | + ExternalBreakEngine* engine, UErrorCode &status) : delegate(engine, status) { |
| 318 | +} |
| 319 | + |
| 320 | +BreakEngineWrapper::~BreakEngineWrapper() { |
| 321 | +} |
| 322 | + |
| 323 | +UBoolBreakEngineWrapper::handles(UChar32 c,constchar* locale)const { |
| 324 | +return delegate->isFor(c, locale); |
| 325 | +} |
| 326 | + |
| 327 | +int32_tBreakEngineWrapper::findBreaks( |
| 328 | + UText *text, |
| 329 | +int32_t startPos, |
| 330 | +int32_t endPos, |
| 331 | + UVector32 &foundBreaks, |
| 332 | + UBool/* isPhraseBreaking*/, |
| 333 | + UErrorCode &status)const { |
| 334 | +if (U_FAILURE(status))return0; |
| 335 | +int32_t result =0; |
| 336 | + |
| 337 | +// Find the span of characters included in the set. |
| 338 | +// The span to break begins at the current position in the text, and |
| 339 | +// extends towards the start or end of the text, depending on 'reverse'. |
| 340 | + |
| 341 | +utext_setNativeIndex(text, startPos); |
| 342 | +int32_t start = (int32_t)utext_getNativeIndex(text); |
| 343 | +int32_t current; |
| 344 | +int32_t rangeStart; |
| 345 | +int32_t rangeEnd; |
| 346 | + UChar32 c =utext_current32(text); |
| 347 | +while((current = (int32_t)utext_getNativeIndex(text)) < endPos && delegate->handles(c)) { |
| 348 | +utext_next32(text);// TODO: recast loop for postincrement |
| 349 | + c =utext_current32(text); |
| 350 | + } |
| 351 | + rangeStart = start; |
| 352 | + rangeEnd = current; |
| 353 | +int32_t beforeSize = foundBreaks.size(); |
| 354 | +int32_t additionalCapacity = rangeEnd - rangeStart +1; |
| 355 | +// enlarge to contains (rangeEnd-rangeStart+1) more items |
| 356 | + foundBreaks.ensureCapacity(beforeSize+additionalCapacity, status); |
| 357 | +if (U_FAILURE(status))return0; |
| 358 | + foundBreaks.setSize(beforeSize + beforeSize+additionalCapacity); |
| 359 | + result = delegate->fillBreaks(text, rangeStart, rangeEnd, foundBreaks.getBuffer()+beforeSize, |
| 360 | + additionalCapacity, status); |
| 361 | +if (U_FAILURE(status))return0; |
| 362 | + foundBreaks.setSize(beforeSize + result); |
| 363 | +utext_setNativeIndex(text, current); |
| 364 | +return result; |
| 365 | +} |
| 366 | + |
302 | 367 | U_NAMESPACE_END
|
303 | 368 |
|
304 | 369 | #endif/* #if !UCONFIG_NO_BREAK_ITERATION*/
|