summaryrefslogtreecommitdiffstats
path: root/thirdparty/icu4c/common/uprops.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/icu4c/common/uprops.cpp')
-rw-r--r--thirdparty/icu4c/common/uprops.cpp100
1 files changed, 73 insertions, 27 deletions
diff --git a/thirdparty/icu4c/common/uprops.cpp b/thirdparty/icu4c/common/uprops.cpp
index c87eeff381..817a1fac0c 100644
--- a/thirdparty/icu4c/common/uprops.cpp
+++ b/thirdparty/icu4c/common/uprops.cpp
@@ -100,8 +100,8 @@ void U_CALLCONV ulayout_load(UErrorCode &errorCode) {
ulayout_isAcceptable, nullptr, &errorCode);
if (U_FAILURE(errorCode)) { return; }
- const uint8_t *inBytes = (const uint8_t *)udata_getMemory(gLayoutMemory);
- const int32_t *inIndexes = (const int32_t *)inBytes;
+ const uint8_t* inBytes = static_cast<const uint8_t*>(udata_getMemory(gLayoutMemory));
+ const int32_t* inIndexes = reinterpret_cast<const int32_t*>(inBytes);
int32_t indexesLength = inIndexes[ULAYOUT_IX_INDEXES_LENGTH];
if (indexesLength < 12) {
errorCode = U_INVALID_FORMAT_ERROR; // Not enough indexes.
@@ -171,7 +171,7 @@ static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /*
}
static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) {
- return static_cast<UBool>(ucase_hasBinaryProperty(c, which));
+ return ucase_hasBinaryProperty(c, which);
}
static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
@@ -208,7 +208,7 @@ static UBool isNormInert(const BinaryProperty &, UChar32, UProperty) {
static UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) {
UErrorCode errorCode=U_ZERO_ERROR;
const Normalizer2 *norm2=Normalizer2Factory::getInstance(
- (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode);
+ static_cast<UNormalizationMode>(which - UCHAR_NFD_INERT + UNORM_NFD), errorCode);
return U_SUCCESS(errorCode) && norm2->isInert(c);
}
#endif
@@ -242,7 +242,7 @@ static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UP
if(c>=0) {
/* single code point */
const char16_t *resultString;
- return (UBool)(ucase_toFullFolding(c, &resultString, U_FOLD_CASE_DEFAULT)>=0);
+ return ucase_toFullFolding(c, &resultString, U_FOLD_CASE_DEFAULT) >= 0;
} else {
/* guess some large but stack-friendly capacity */
char16_t dest[2*UCASE_MAX_STRING_LENGTH];
@@ -250,9 +250,9 @@ static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UP
destLength=u_strFoldCase(dest, UPRV_LENGTHOF(dest),
nfd.getBuffer(), nfd.length(),
U_FOLD_CASE_DEFAULT, &errorCode);
- return (UBool)(U_SUCCESS(errorCode) &&
+ return U_SUCCESS(errorCode) &&
0!=u_strCompare(nfd.getBuffer(), nfd.length(),
- dest, destLength, false));
+ dest, destLength, false);
}
}
#endif
@@ -359,6 +359,19 @@ static constexpr UChar32 ID_COMPAT_MATH_START[] = {
0x1D7C3
};
+/** Ranges (start/limit pairs) of Modifier_Combining_mark (only), from UCD PropList.txt. */
+static constexpr UChar32 MODIFIER_COMBINING_MARK[] = {
+ 0x0654, 0x0655 + 1,
+ 0x0658, 0x0658 + 1, // U+0658
+ 0x06DC, 0x06DC + 1, // U+06DC
+ 0x06E3, 0x06E3 + 1, // U+06E3
+ 0x06E7, 0x06E8 + 1,
+ 0x08CA, 0x08CB + 1,
+ 0x08CD, 0x08CF + 1,
+ 0x08D3, 0x08D3 + 1, // U+08D3
+ 0x08F3, 0x08F3 + 1 // U+08F3
+};
+
static UBool isIDCompatMathStart(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
if (c < ID_COMPAT_MATH_START[0]) { return false; } // fastpath for common scripts
for (UChar32 startChar : ID_COMPAT_MATH_START) {
@@ -375,6 +388,14 @@ static UBool isIDCompatMathContinue(const BinaryProperty &prop, UChar32 c, UProp
return isIDCompatMathStart(prop, c, UCHAR_ID_COMPAT_MATH_START);
}
+static UBool isModifierCombiningMark(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ for (int32_t i = 0; i < UPRV_LENGTHOF(MODIFIER_COMBINING_MARK); i += 2) {
+ if (c < MODIFIER_COMBINING_MARK[i]) { return false; } // below range start
+ if (c < MODIFIER_COMBINING_MARK[i + 1]) { return true; } // below range limit
+ }
+ return false;
+}
+
static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={
/*
* column and mask values for binary properties from u_getUnicodeProperties().
@@ -459,6 +480,7 @@ static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={
{ UPROPS_SRC_IDSU, 0, isIDSUnaryOperator }, // UCHAR_IDS_UNARY_OPERATOR
{ UPROPS_SRC_ID_COMPAT_MATH, 0, isIDCompatMathStart }, // UCHAR_ID_COMPAT_MATH_START
{ UPROPS_SRC_ID_COMPAT_MATH, 0, isIDCompatMathContinue }, // UCHAR_ID_COMPAT_MATH_CONTINUE
+ { UPROPS_SRC_MCM, 0 , isModifierCombiningMark }, // UCHAR_MODIFIER_COMBINING_MARK
};
U_CAPI UBool U_EXPORT2
@@ -521,7 +543,7 @@ struct IntProperty {
static int32_t defaultGetValue(const IntProperty &prop, UChar32 c, UProperty /*which*/) {
/* systematic, directly stored properties */
- return (int32_t)(u_getUnicodeProperties(c, prop.column)&prop.mask)>>prop.shift;
+ return static_cast<int32_t>(u_getUnicodeProperties(c, prop.column) & prop.mask) >> prop.shift;
}
static int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/) {
@@ -533,17 +555,25 @@ static int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/
}
static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return (int32_t)u_charDirection(c);
+ return static_cast<int32_t>(u_charDirection(c));
}
static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return (int32_t)ubidi_getPairedBracketType(c);
+ return static_cast<int32_t>(ubidi_getPairedBracketType(c));
}
static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) {
return ubidi_getMaxValue(which);
}
+static int32_t getBlock(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ return static_cast<int32_t>(ublock_getCode(c));
+}
+
+static int32_t blockGetMaxValue(const IntProperty &/*prop*/, UProperty /*which*/) {
+ return uprv_getMaxValues(UPROPS_MAX_VALUES_OTHER_INDEX) & UPROPS_MAX_BLOCK;
+}
+
#if UCONFIG_NO_NORMALIZATION
static int32_t getCombiningClass(const IntProperty &, UChar32, UProperty) {
return 0;
@@ -555,7 +585,7 @@ static int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProper
#endif
static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return (int32_t)u_charType(c);
+ return static_cast<int32_t>(u_charType(c));
}
static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
@@ -567,23 +597,26 @@ static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty
}
static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c));
+ int32_t ntv = static_cast<int32_t>(GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c)));
return UPROPS_NTV_GET_TYPE(ntv);
}
static int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
UErrorCode errorCode=U_ZERO_ERROR;
- return (int32_t)uscript_getScript(c, &errorCode);
+ return static_cast<int32_t>(uscript_getScript(c, &errorCode));
}
static int32_t scriptGetMaxValue(const IntProperty &/*prop*/, UProperty /*which*/) {
- uint32_t scriptX=uprv_getMaxValues(0)&UPROPS_SCRIPT_X_MASK;
- return uprops_mergeScriptCodeOrIndex(scriptX);
+ return uprv_getMaxValues(0)&UPROPS_MAX_SCRIPT;
}
/*
* Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
- * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
+ * Hangul_Syllable_Type used to be fully redundant with a subset of Grapheme_Cluster_Break.
+ *
+ * Starting with Unicode 16, this is no longer true for HST=V vs. GCB=V in some cases:
+ * Some Kirat Rai vowels are given GCB=V for proper grapheme clustering, but
+ * they are of course not related to Hangul syllables.
*/
static const UHangulSyllableType gcbToHst[]={
U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */
@@ -603,8 +636,13 @@ static const UHangulSyllableType gcbToHst[]={
};
static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
+ // Ignore supplementary code points: They all have HST=NA.
+ // This is a simple way to handle the GCB!=hst cases since Unicode 16 (Kirat Rai vowels).
+ if(c>0xffff) {
+ return U_HST_NOT_APPLICABLE;
+ }
/* see comments on gcbToHst[] above */
- int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
+ int32_t gcb = static_cast<int32_t>(u_getUnicodeProperties(c, 2) & UPROPS_GCB_MASK) >> UPROPS_GCB_SHIFT;
if(gcb<UPRV_LENGTHOF(gcbToHst)) {
return gcbToHst[gcb];
} else {
@@ -618,7 +656,7 @@ static int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) {
}
#else
static int32_t getNormQuickCheck(const IntProperty &/*prop*/, UChar32 c, UProperty which) {
- return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD));
+ return static_cast<int32_t>(unorm_getQuickCheck(c, static_cast<UNormalizationMode>(which - UCHAR_NFD_QUICK_CHECK + UNORM_NFD)));
}
#endif
@@ -683,25 +721,25 @@ static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={
* For them, column is the UPropertySource value.
*/
{ UPROPS_SRC_BIDI, 0, 0, getBiDiClass, biDiGetMaxValue },
- { 0, UPROPS_BLOCK_MASK, UPROPS_BLOCK_SHIFT, defaultGetValue, defaultGetMaxValue },
+ { UPROPS_SRC_BLOCK, 0, 0, getBlock, blockGetMaxValue },
{ UPROPS_SRC_NFC, 0, 0xff, getCombiningClass, getMaxValueFromShift },
{ 2, UPROPS_DT_MASK, 0, defaultGetValue, defaultGetMaxValue },
{ 0, UPROPS_EA_MASK, UPROPS_EA_SHIFT, defaultGetValue, defaultGetMaxValue },
- { UPROPS_SRC_CHAR, 0, (int32_t)U_CHAR_CATEGORY_COUNT-1,getGeneralCategory, getMaxValueFromShift },
+ { UPROPS_SRC_CHAR, 0, static_cast<int32_t>(U_CHAR_CATEGORY_COUNT) - 1, getGeneralCategory, getMaxValueFromShift },
{ UPROPS_SRC_BIDI, 0, 0, getJoiningGroup, biDiGetMaxValue },
{ UPROPS_SRC_BIDI, 0, 0, getJoiningType, biDiGetMaxValue },
{ 2, UPROPS_LB_MASK, UPROPS_LB_SHIFT, defaultGetValue, defaultGetMaxValue },
- { UPROPS_SRC_CHAR, 0, (int32_t)U_NT_COUNT-1, getNumericType, getMaxValueFromShift },
+ { UPROPS_SRC_CHAR, 0, static_cast<int32_t>(U_NT_COUNT) - 1, getNumericType, getMaxValueFromShift },
{ UPROPS_SRC_PROPSVEC, 0, 0, getScript, scriptGetMaxValue },
- { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_HST_COUNT-1, getHangulSyllableType, getMaxValueFromShift },
+ { UPROPS_SRC_PROPSVEC, 0, static_cast<int32_t>(U_HST_COUNT) - 1, getHangulSyllableType, getMaxValueFromShift },
// UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
- { UPROPS_SRC_NFC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift },
+ { UPROPS_SRC_NFC, 0, static_cast<int32_t>(UNORM_YES), getNormQuickCheck, getMaxValueFromShift },
// UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
- { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift },
+ { UPROPS_SRC_NFKC, 0, static_cast<int32_t>(UNORM_YES), getNormQuickCheck, getMaxValueFromShift },
// UCHAR_NFC_QUICK_CHECK: max=2=MAYBE
- { UPROPS_SRC_NFC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift },
+ { UPROPS_SRC_NFC, 0, static_cast<int32_t>(UNORM_MAYBE), getNormQuickCheck, getMaxValueFromShift },
// UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE
- { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift },
+ { UPROPS_SRC_NFKC, 0, static_cast<int32_t>(UNORM_MAYBE), getNormQuickCheck, getMaxValueFromShift },
{ UPROPS_SRC_NFC, 0, 0xff, getLeadCombiningClass, getMaxValueFromShift },
{ UPROPS_SRC_NFC, 0, 0xff, getTrailCombiningClass, getMaxValueFromShift },
{ 2, UPROPS_GCB_MASK, UPROPS_GCB_SHIFT, defaultGetValue, defaultGetMaxValue },
@@ -711,7 +749,8 @@ static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={
{ UPROPS_SRC_INPC, 0, 0, getInPC, layoutGetMaxValue },
{ UPROPS_SRC_INSC, 0, 0, getInSC, layoutGetMaxValue },
{ UPROPS_SRC_VO, 0, 0, getVo, layoutGetMaxValue },
- { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_ID_STATUS_ALLOWED, getIDStatusValue, getMaxValueFromShift },
+ { UPROPS_SRC_PROPSVEC, 0, static_cast<int32_t>(U_ID_STATUS_ALLOWED), getIDStatusValue, getMaxValueFromShift },
+ { 0, UPROPS_INCB_MASK, UPROPS_INCB_SHIFT,defaultGetValue, defaultGetMaxValue },
};
U_CAPI int32_t U_EXPORT2
@@ -829,6 +868,13 @@ uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *p
}
return;
}
+ if (src == UPROPS_SRC_MCM) {
+ // range limits
+ for (UChar32 c : MODIFIER_COMBINING_MARK) {
+ sa->add(sa->set, c);
+ }
+ return;
+ }
if (!ulayout_ensureData(*pErrorCode)) { return; }
const UCPTrie *trie;
switch (src) {