diff options
Diffstat (limited to 'thirdparty/icu4c/common/uprops.cpp')
-rw-r--r-- | thirdparty/icu4c/common/uprops.cpp | 100 |
1 files changed, 73 insertions, 27 deletions
diff --git a/thirdparty/icu4c/common/uprops.cpp b/thirdparty/icu4c/common/uprops.cpp index c87eeff381..817a1fac0c 100644 --- a/thirdparty/icu4c/common/uprops.cpp +++ b/thirdparty/icu4c/common/uprops.cpp @@ -100,8 +100,8 @@ void U_CALLCONV ulayout_load(UErrorCode &errorCode) { ulayout_isAcceptable, nullptr, &errorCode); if (U_FAILURE(errorCode)) { return; } - const uint8_t *inBytes = (const uint8_t *)udata_getMemory(gLayoutMemory); - const int32_t *inIndexes = (const int32_t *)inBytes; + const uint8_t* inBytes = static_cast<const uint8_t*>(udata_getMemory(gLayoutMemory)); + const int32_t* inIndexes = reinterpret_cast<const int32_t*>(inBytes); int32_t indexesLength = inIndexes[ULAYOUT_IX_INDEXES_LENGTH]; if (indexesLength < 12) { errorCode = U_INVALID_FORMAT_ERROR; // Not enough indexes. @@ -171,7 +171,7 @@ static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /* } static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { - return static_cast<UBool>(ucase_hasBinaryProperty(c, which)); + return ucase_hasBinaryProperty(c, which); } static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { @@ -208,7 +208,7 @@ static UBool isNormInert(const BinaryProperty &, UChar32, UProperty) { static UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { UErrorCode errorCode=U_ZERO_ERROR; const Normalizer2 *norm2=Normalizer2Factory::getInstance( - (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode); + static_cast<UNormalizationMode>(which - UCHAR_NFD_INERT + UNORM_NFD), errorCode); return U_SUCCESS(errorCode) && norm2->isInert(c); } #endif @@ -242,7 +242,7 @@ static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UP if(c>=0) { /* single code point */ const char16_t *resultString; - return (UBool)(ucase_toFullFolding(c, &resultString, U_FOLD_CASE_DEFAULT)>=0); + return ucase_toFullFolding(c, &resultString, U_FOLD_CASE_DEFAULT) >= 0; } else { /* guess some large but stack-friendly capacity */ char16_t dest[2*UCASE_MAX_STRING_LENGTH]; @@ -250,9 +250,9 @@ static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UP destLength=u_strFoldCase(dest, UPRV_LENGTHOF(dest), nfd.getBuffer(), nfd.length(), U_FOLD_CASE_DEFAULT, &errorCode); - return (UBool)(U_SUCCESS(errorCode) && + return U_SUCCESS(errorCode) && 0!=u_strCompare(nfd.getBuffer(), nfd.length(), - dest, destLength, false)); + dest, destLength, false); } } #endif @@ -359,6 +359,19 @@ static constexpr UChar32 ID_COMPAT_MATH_START[] = { 0x1D7C3 }; +/** Ranges (start/limit pairs) of Modifier_Combining_mark (only), from UCD PropList.txt. */ +static constexpr UChar32 MODIFIER_COMBINING_MARK[] = { + 0x0654, 0x0655 + 1, + 0x0658, 0x0658 + 1, // U+0658 + 0x06DC, 0x06DC + 1, // U+06DC + 0x06E3, 0x06E3 + 1, // U+06E3 + 0x06E7, 0x06E8 + 1, + 0x08CA, 0x08CB + 1, + 0x08CD, 0x08CF + 1, + 0x08D3, 0x08D3 + 1, // U+08D3 + 0x08F3, 0x08F3 + 1 // U+08F3 +}; + static UBool isIDCompatMathStart(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { if (c < ID_COMPAT_MATH_START[0]) { return false; } // fastpath for common scripts for (UChar32 startChar : ID_COMPAT_MATH_START) { @@ -375,6 +388,14 @@ static UBool isIDCompatMathContinue(const BinaryProperty &prop, UChar32 c, UProp return isIDCompatMathStart(prop, c, UCHAR_ID_COMPAT_MATH_START); } +static UBool isModifierCombiningMark(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + for (int32_t i = 0; i < UPRV_LENGTHOF(MODIFIER_COMBINING_MARK); i += 2) { + if (c < MODIFIER_COMBINING_MARK[i]) { return false; } // below range start + if (c < MODIFIER_COMBINING_MARK[i + 1]) { return true; } // below range limit + } + return false; +} + static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={ /* * column and mask values for binary properties from u_getUnicodeProperties(). @@ -459,6 +480,7 @@ static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={ { UPROPS_SRC_IDSU, 0, isIDSUnaryOperator }, // UCHAR_IDS_UNARY_OPERATOR { UPROPS_SRC_ID_COMPAT_MATH, 0, isIDCompatMathStart }, // UCHAR_ID_COMPAT_MATH_START { UPROPS_SRC_ID_COMPAT_MATH, 0, isIDCompatMathContinue }, // UCHAR_ID_COMPAT_MATH_CONTINUE + { UPROPS_SRC_MCM, 0 , isModifierCombiningMark }, // UCHAR_MODIFIER_COMBINING_MARK }; U_CAPI UBool U_EXPORT2 @@ -521,7 +543,7 @@ struct IntProperty { static int32_t defaultGetValue(const IntProperty &prop, UChar32 c, UProperty /*which*/) { /* systematic, directly stored properties */ - return (int32_t)(u_getUnicodeProperties(c, prop.column)&prop.mask)>>prop.shift; + return static_cast<int32_t>(u_getUnicodeProperties(c, prop.column) & prop.mask) >> prop.shift; } static int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/) { @@ -533,17 +555,25 @@ static int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/ } static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return (int32_t)u_charDirection(c); + return static_cast<int32_t>(u_charDirection(c)); } static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return (int32_t)ubidi_getPairedBracketType(c); + return static_cast<int32_t>(ubidi_getPairedBracketType(c)); } static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) { return ubidi_getMaxValue(which); } +static int32_t getBlock(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + return static_cast<int32_t>(ublock_getCode(c)); +} + +static int32_t blockGetMaxValue(const IntProperty &/*prop*/, UProperty /*which*/) { + return uprv_getMaxValues(UPROPS_MAX_VALUES_OTHER_INDEX) & UPROPS_MAX_BLOCK; +} + #if UCONFIG_NO_NORMALIZATION static int32_t getCombiningClass(const IntProperty &, UChar32, UProperty) { return 0; @@ -555,7 +585,7 @@ static int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProper #endif static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return (int32_t)u_charType(c); + return static_cast<int32_t>(u_charType(c)); } static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { @@ -567,23 +597,26 @@ static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty } static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c)); + int32_t ntv = static_cast<int32_t>(GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c))); return UPROPS_NTV_GET_TYPE(ntv); } static int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { UErrorCode errorCode=U_ZERO_ERROR; - return (int32_t)uscript_getScript(c, &errorCode); + return static_cast<int32_t>(uscript_getScript(c, &errorCode)); } static int32_t scriptGetMaxValue(const IntProperty &/*prop*/, UProperty /*which*/) { - uint32_t scriptX=uprv_getMaxValues(0)&UPROPS_SCRIPT_X_MASK; - return uprops_mergeScriptCodeOrIndex(scriptX); + return uprv_getMaxValues(0)&UPROPS_MAX_SCRIPT; } /* * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. - * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break. + * Hangul_Syllable_Type used to be fully redundant with a subset of Grapheme_Cluster_Break. + * + * Starting with Unicode 16, this is no longer true for HST=V vs. GCB=V in some cases: + * Some Kirat Rai vowels are given GCB=V for proper grapheme clustering, but + * they are of course not related to Hangul syllables. */ static const UHangulSyllableType gcbToHst[]={ U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */ @@ -603,8 +636,13 @@ static const UHangulSyllableType gcbToHst[]={ }; static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + // Ignore supplementary code points: They all have HST=NA. + // This is a simple way to handle the GCB!=hst cases since Unicode 16 (Kirat Rai vowels). + if(c>0xffff) { + return U_HST_NOT_APPLICABLE; + } /* see comments on gcbToHst[] above */ - int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; + int32_t gcb = static_cast<int32_t>(u_getUnicodeProperties(c, 2) & UPROPS_GCB_MASK) >> UPROPS_GCB_SHIFT; if(gcb<UPRV_LENGTHOF(gcbToHst)) { return gcbToHst[gcb]; } else { @@ -618,7 +656,7 @@ static int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) { } #else static int32_t getNormQuickCheck(const IntProperty &/*prop*/, UChar32 c, UProperty which) { - return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD)); + return static_cast<int32_t>(unorm_getQuickCheck(c, static_cast<UNormalizationMode>(which - UCHAR_NFD_QUICK_CHECK + UNORM_NFD))); } #endif @@ -683,25 +721,25 @@ static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={ * For them, column is the UPropertySource value. */ { UPROPS_SRC_BIDI, 0, 0, getBiDiClass, biDiGetMaxValue }, - { 0, UPROPS_BLOCK_MASK, UPROPS_BLOCK_SHIFT, defaultGetValue, defaultGetMaxValue }, + { UPROPS_SRC_BLOCK, 0, 0, getBlock, blockGetMaxValue }, { UPROPS_SRC_NFC, 0, 0xff, getCombiningClass, getMaxValueFromShift }, { 2, UPROPS_DT_MASK, 0, defaultGetValue, defaultGetMaxValue }, { 0, UPROPS_EA_MASK, UPROPS_EA_SHIFT, defaultGetValue, defaultGetMaxValue }, - { UPROPS_SRC_CHAR, 0, (int32_t)U_CHAR_CATEGORY_COUNT-1,getGeneralCategory, getMaxValueFromShift }, + { UPROPS_SRC_CHAR, 0, static_cast<int32_t>(U_CHAR_CATEGORY_COUNT) - 1, getGeneralCategory, getMaxValueFromShift }, { UPROPS_SRC_BIDI, 0, 0, getJoiningGroup, biDiGetMaxValue }, { UPROPS_SRC_BIDI, 0, 0, getJoiningType, biDiGetMaxValue }, { 2, UPROPS_LB_MASK, UPROPS_LB_SHIFT, defaultGetValue, defaultGetMaxValue }, - { UPROPS_SRC_CHAR, 0, (int32_t)U_NT_COUNT-1, getNumericType, getMaxValueFromShift }, + { UPROPS_SRC_CHAR, 0, static_cast<int32_t>(U_NT_COUNT) - 1, getNumericType, getMaxValueFromShift }, { UPROPS_SRC_PROPSVEC, 0, 0, getScript, scriptGetMaxValue }, - { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_HST_COUNT-1, getHangulSyllableType, getMaxValueFromShift }, + { UPROPS_SRC_PROPSVEC, 0, static_cast<int32_t>(U_HST_COUNT) - 1, getHangulSyllableType, getMaxValueFromShift }, // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" - { UPROPS_SRC_NFC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, + { UPROPS_SRC_NFC, 0, static_cast<int32_t>(UNORM_YES), getNormQuickCheck, getMaxValueFromShift }, // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" - { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, + { UPROPS_SRC_NFKC, 0, static_cast<int32_t>(UNORM_YES), getNormQuickCheck, getMaxValueFromShift }, // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE - { UPROPS_SRC_NFC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, + { UPROPS_SRC_NFC, 0, static_cast<int32_t>(UNORM_MAYBE), getNormQuickCheck, getMaxValueFromShift }, // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE - { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, + { UPROPS_SRC_NFKC, 0, static_cast<int32_t>(UNORM_MAYBE), getNormQuickCheck, getMaxValueFromShift }, { UPROPS_SRC_NFC, 0, 0xff, getLeadCombiningClass, getMaxValueFromShift }, { UPROPS_SRC_NFC, 0, 0xff, getTrailCombiningClass, getMaxValueFromShift }, { 2, UPROPS_GCB_MASK, UPROPS_GCB_SHIFT, defaultGetValue, defaultGetMaxValue }, @@ -711,7 +749,8 @@ static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={ { UPROPS_SRC_INPC, 0, 0, getInPC, layoutGetMaxValue }, { UPROPS_SRC_INSC, 0, 0, getInSC, layoutGetMaxValue }, { UPROPS_SRC_VO, 0, 0, getVo, layoutGetMaxValue }, - { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_ID_STATUS_ALLOWED, getIDStatusValue, getMaxValueFromShift }, + { UPROPS_SRC_PROPSVEC, 0, static_cast<int32_t>(U_ID_STATUS_ALLOWED), getIDStatusValue, getMaxValueFromShift }, + { 0, UPROPS_INCB_MASK, UPROPS_INCB_SHIFT,defaultGetValue, defaultGetMaxValue }, }; U_CAPI int32_t U_EXPORT2 @@ -829,6 +868,13 @@ uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *p } return; } + if (src == UPROPS_SRC_MCM) { + // range limits + for (UChar32 c : MODIFIER_COMBINING_MARK) { + sa->add(sa->set, c); + } + return; + } if (!ulayout_ensureData(*pErrorCode)) { return; } const UCPTrie *trie; switch (src) { |