diff options
Diffstat (limited to 'thirdparty/icu4c/common/caniter.cpp')
-rw-r--r-- | thirdparty/icu4c/common/caniter.cpp | 50 |
1 files changed, 29 insertions, 21 deletions
diff --git a/thirdparty/icu4c/common/caniter.cpp b/thirdparty/icu4c/common/caniter.cpp index 64a3c65d29..2c987306ec 100644 --- a/thirdparty/icu4c/common/caniter.cpp +++ b/thirdparty/icu4c/common/caniter.cpp @@ -64,6 +64,7 @@ U_NAMESPACE_BEGIN UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator) + /** *@param source string to get results for */ @@ -73,10 +74,10 @@ CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode pieces_lengths(nullptr), current(nullptr), current_length(0), - nfd(*Normalizer2::getNFDInstance(status)), - nfcImpl(*Normalizer2Factory::getNFCImpl(status)) + nfd(Normalizer2::getNFDInstance(status)), + nfcImpl(Normalizer2Factory::getNFCImpl(status)) { - if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) { + if(U_SUCCESS(status) && nfcImpl->ensureCanonIterData(status)) { setSource(sourceStr, status); } } @@ -172,7 +173,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st int32_t i = 0; UnicodeString *list = nullptr; - nfd.normalize(newSource, source, status); + nfd->normalize(newSource, source, status); if(U_FAILURE(status)) { return; } @@ -194,7 +195,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st current[0] = 0; pieces[0] = new UnicodeString[1]; pieces_lengths[0] = 1; - if (pieces[0] == 0) { + if (pieces[0] == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; goto CleanPartialInitialization; } @@ -203,7 +204,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st list = new UnicodeString[source.length()]; - if (list == 0) { + if (list == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; goto CleanPartialInitialization; } @@ -219,7 +220,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st // on the NFD form - see above). for (; i < source.length(); i += U16_LENGTH(cp)) { cp = source.char32At(i); - if (nfcImpl.isCanonSegmentStarter(cp)) { + if (nfcImpl->isCanonSegmentStarter(cp)) { source.extract(start, i-start, list[list_length++]); // add up to i start = i; } @@ -252,9 +253,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st return; // Common section to cleanup all local variables and reset object variables. CleanPartialInitialization: - if (list != nullptr) { - delete[] list; - } + delete[] list; cleanPieces(); } @@ -264,10 +263,19 @@ CleanPartialInitialization: * @param source the string to find permutations for * @return the results in a set. */ -void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) { +void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth) { if(U_FAILURE(status)) { return; } + // To avoid infinity loop caused by permute, we limit the depth of recursive + // call to permute and return U_UNSUPPORTED_ERROR. + // We know in some unit test we need at least 4. Set to 8 just in case some + // unforseen use cases. + constexpr int32_t kPermuteDepthLimit = 8; + if (depth > kPermuteDepthLimit) { + status = U_UNSUPPORTED_ERROR; + return; + } //if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source))); int32_t i = 0; @@ -277,7 +285,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros if (source.length() <= 2 && source.countChar32() <= 1) { UnicodeString *toPut = new UnicodeString(source); /* test for nullptr */ - if (toPut == 0) { + if (toPut == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return; } @@ -311,7 +319,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros // see what the permutations of the characters before and after this one are //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp))); - permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status); + permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status, depth+1); /* Test for buffer overflows */ if(U_FAILURE(status)) { return; @@ -346,7 +354,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i Hashtable permutations(status); Hashtable basic(status); if (U_FAILURE(status)) { - return 0; + return nullptr; } result.setValueDeleter(uprv_deleteUObject); permutations.setValueDeleter(uprv_deleteUObject); @@ -381,7 +389,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i //UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer))); UnicodeString possible(*((UnicodeString *)(ne2->value.pointer))); UnicodeString attempt; - nfd.normalize(possible, attempt, status); + nfd->normalize(possible, attempt, status); // TODO: check if operator == is semanticaly the same as attempt.equals(segment) if (attempt==segment) { @@ -399,7 +407,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i /* Test for buffer overflows */ if(U_FAILURE(status)) { - return 0; + return nullptr; } // convert into a String[] to clean up storage //String[] finalResult = new String[result.size()]; @@ -407,7 +415,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i int32_t resultCount; if((resultCount = result.count()) != 0) { finalResult = new UnicodeString[resultCount]; - if (finalResult == 0) { + if (finalResult == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return nullptr; } @@ -448,7 +456,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) { // see if any character is at the start of some decomposition U16_GET(segment, 0, i, segLen, cp); - if (!nfcImpl.getCanonStartSet(cp, starts)) { + if (!nfcImpl->getCanonStartSet(cp, starts)) { continue; } // if so, see which decompositions match @@ -471,7 +479,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha UnicodeString item = *((UnicodeString *)(ne->value.pointer)); UnicodeString *toAdd = new UnicodeString(prefix); /* test for nullptr */ - if (toAdd == 0) { + if (toAdd == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return nullptr; } @@ -509,7 +517,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con UnicodeString temp(comp); int32_t inputLen=temp.length(); UnicodeString decompString; - nfd.normalize(temp, decompString, status); + nfd->normalize(temp, decompString, status); if (U_FAILURE(status)) { return nullptr; } @@ -573,7 +581,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con // brute force approach // check to make sure result is canonically equivalent UnicodeString trial; - nfd.normalize(temp, trial, status); + nfd->normalize(temp, trial, status); if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) { return nullptr; } |