summaryrefslogtreecommitdiffstats
path: root/thirdparty/icu4c/common/brkeng.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/icu4c/common/brkeng.cpp')
-rw-r--r--thirdparty/icu4c/common/brkeng.cpp121
1 files changed, 93 insertions, 28 deletions
diff --git a/thirdparty/icu4c/common/brkeng.cpp b/thirdparty/icu4c/common/brkeng.cpp
index ce3d09cf23..3f58287532 100644
--- a/thirdparty/icu4c/common/brkeng.cpp
+++ b/thirdparty/icu4c/common/brkeng.cpp
@@ -21,6 +21,7 @@
#include "unicode/uscript.h"
#include "unicode/ucharstrie.h"
#include "unicode/bytestrie.h"
+#include "unicode/rbbi.h"
#include "brkeng.h"
#include "cmemory.h"
@@ -70,19 +71,21 @@ UnhandledEngine::~UnhandledEngine() {
}
UBool
-UnhandledEngine::handles(UChar32 c) const {
+UnhandledEngine::handles(UChar32 c, const char* locale) const {
+ (void)locale; // Unused
return fHandled && fHandled->contains(c);
}
int32_t
UnhandledEngine::findBreaks( UText *text,
- int32_t /* startPos */,
+ int32_t startPos,
int32_t endPos,
UVector32 &/*foundBreaks*/,
UBool /* isPhraseBreaking */,
UErrorCode &status) const {
if (U_FAILURE(status)) return 0;
- UChar32 c = utext_current32(text);
+ utext_setNativeIndex(text, startPos);
+ UChar32 c = utext_current32(text);
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
c = utext_current32(text);
@@ -120,41 +123,39 @@ ICULanguageBreakFactory::~ICULanguageBreakFactory() {
}
}
-U_NAMESPACE_END
-U_CDECL_BEGIN
-static void U_CALLCONV _deleteEngine(void *obj) {
- delete (const icu::LanguageBreakEngine *) obj;
+void ICULanguageBreakFactory::ensureEngines(UErrorCode& status) {
+ static UMutex gBreakEngineMutex;
+ Mutex m(&gBreakEngineMutex);
+ if (fEngines == nullptr) {
+ LocalPointer<UStack> engines(new UStack(uprv_deleteUObject, nullptr, status), status);
+ if (U_SUCCESS(status)) {
+ fEngines = engines.orphan();
+ }
+ }
}
-U_CDECL_END
-U_NAMESPACE_BEGIN
const LanguageBreakEngine *
-ICULanguageBreakFactory::getEngineFor(UChar32 c) {
+ICULanguageBreakFactory::getEngineFor(UChar32 c, const char* locale) {
const LanguageBreakEngine *lbe = nullptr;
UErrorCode status = U_ZERO_ERROR;
+ ensureEngines(status);
+ if (U_FAILURE(status) ) {
+ // Note: no way to return error code to caller.
+ return nullptr;
+ }
static UMutex gBreakEngineMutex;
Mutex m(&gBreakEngineMutex);
-
- if (fEngines == nullptr) {
- LocalPointer<UStack> engines(new UStack(_deleteEngine, nullptr, status), status);
- if (U_FAILURE(status) ) {
- // Note: no way to return error code to caller.
- return nullptr;
- }
- fEngines = engines.orphan();
- } else {
- int32_t i = fEngines->size();
- while (--i >= 0) {
- lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
- if (lbe != nullptr && lbe->handles(c)) {
- return lbe;
- }
+ int32_t i = fEngines->size();
+ while (--i >= 0) {
+ lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
+ if (lbe != nullptr && lbe->handles(c, locale)) {
+ return lbe;
}
}
-
+
// We didn't find an engine. Create one.
- lbe = loadEngineFor(c);
+ lbe = loadEngineFor(c, locale);
if (lbe != nullptr) {
fEngines->push((void *)lbe, status);
}
@@ -162,7 +163,7 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) {
}
const LanguageBreakEngine *
-ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
+ICULanguageBreakFactory::loadEngineFor(UChar32 c, const char*) {
UErrorCode status = U_ZERO_ERROR;
UScriptCode code = uscript_getScript(c, &status);
if (U_SUCCESS(status)) {
@@ -299,6 +300,70 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
return nullptr;
}
+
+void ICULanguageBreakFactory::addExternalEngine(
+ ExternalBreakEngine* external, UErrorCode& status) {
+ LocalPointer<ExternalBreakEngine> engine(external, status);
+ ensureEngines(status);
+ LocalPointer<BreakEngineWrapper> wrapper(
+ new BreakEngineWrapper(engine.orphan(), status), status);
+ static UMutex gBreakEngineMutex;
+ Mutex m(&gBreakEngineMutex);
+ fEngines->push(wrapper.getAlias(), status);
+ wrapper.orphan();
+}
+
+BreakEngineWrapper::BreakEngineWrapper(
+ ExternalBreakEngine* engine, UErrorCode &status) : delegate(engine, status) {
+}
+
+BreakEngineWrapper::~BreakEngineWrapper() {
+}
+
+UBool BreakEngineWrapper::handles(UChar32 c, const char* locale) const {
+ return delegate->isFor(c, locale);
+}
+
+int32_t BreakEngineWrapper::findBreaks(
+ UText *text,
+ int32_t startPos,
+ int32_t endPos,
+ UVector32 &foundBreaks,
+ UBool /* isPhraseBreaking */,
+ UErrorCode &status) const {
+ if (U_FAILURE(status)) return 0;
+ int32_t result = 0;
+
+ // Find the span of characters included in the set.
+ // The span to break begins at the current position in the text, and
+ // extends towards the start or end of the text, depending on 'reverse'.
+
+ utext_setNativeIndex(text, startPos);
+ int32_t start = (int32_t)utext_getNativeIndex(text);
+ int32_t current;
+ int32_t rangeStart;
+ int32_t rangeEnd;
+ UChar32 c = utext_current32(text);
+ while((current = (int32_t)utext_getNativeIndex(text)) < endPos && delegate->handles(c)) {
+ utext_next32(text); // TODO: recast loop for postincrement
+ c = utext_current32(text);
+ }
+ rangeStart = start;
+ rangeEnd = current;
+ int32_t beforeSize = foundBreaks.size();
+ int32_t additionalCapacity = rangeEnd - rangeStart + 1;
+ // enlarge to contains (rangeEnd-rangeStart+1) more items
+ foundBreaks.ensureCapacity(beforeSize+additionalCapacity, status);
+ if (U_FAILURE(status)) return 0;
+ foundBreaks.setSize(beforeSize + beforeSize+additionalCapacity);
+ result = delegate->fillBreaks(text, rangeStart, rangeEnd, foundBreaks.getBuffer()+beforeSize,
+ additionalCapacity, status);
+ if (U_FAILURE(status)) return 0;
+ foundBreaks.setSize(beforeSize + result);
+ utext_setNativeIndex(text, current);
+ return result;
+}
+
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */