summaryrefslogtreecommitdiffstats
path: root/thirdparty/icu4c/common/unicode/uset.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/icu4c/common/unicode/uset.h')
-rw-r--r--thirdparty/icu4c/common/unicode/uset.h58
1 files changed, 45 insertions, 13 deletions
diff --git a/thirdparty/icu4c/common/unicode/uset.h b/thirdparty/icu4c/common/unicode/uset.h
index 5dd890e148..ee4e0036d2 100644
--- a/thirdparty/icu4c/common/unicode/uset.h
+++ b/thirdparty/icu4c/common/unicode/uset.h
@@ -53,6 +53,12 @@ typedef struct USet USet;
/**
* Bitmask values to be passed to uset_openPatternOptions() or
* uset_applyPattern() taking an option parameter.
+ *
+ * Use at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+ * These case options are mutually exclusive.
+ *
+ * Undefined options bits are ignored, and reserved for future use.
+ *
* @stable ICU 2.4
*/
enum {
@@ -60,13 +66,13 @@ enum {
* Ignore white space within patterns unless quoted or escaped.
* @stable ICU 2.4
*/
- USET_IGNORE_SPACE = 1,
+ USET_IGNORE_SPACE = 1,
/**
* Enable case insensitive matching. E.g., "[ab]" with this flag
* will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will
* match all except 'a', 'A', 'b', and 'B'. This performs a full
- * closure over case mappings, e.g. U+017F for s.
+ * closure over case mappings, e.g. 'ſ' (U+017F long s) for 's'.
*
* The resulting set is a superset of the input for the code points but
* not for the strings.
@@ -88,17 +94,36 @@ enum {
*
* @stable ICU 2.4
*/
- USET_CASE_INSENSITIVE = 2,
+ USET_CASE_INSENSITIVE = 2,
/**
- * Enable case insensitive matching. E.g., "[ab]" with this flag
- * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will
- * match all except 'a', 'A', 'b', and 'B'. This adds the lower-,
- * title-, and uppercase mappings as well as the case folding
+ * Adds all case mappings for each element in the set.
+ * This adds the full lower-, title-, and uppercase mappings as well as the full case folding
* of each existing element in the set.
+ *
+ * Unlike the “case insensitive” options, this does not perform a closure.
+ * For example, it does not add 'ſ' (U+017F long s) for 's',
+ * 'K' (U+212A Kelvin sign) for 'k', or replace set strings by their case-folded versions.
+ *
* @stable ICU 3.2
*/
- USET_ADD_CASE_MAPPINGS = 4
+ USET_ADD_CASE_MAPPINGS = 4,
+
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Enable case insensitive matching.
+ * Same as USET_CASE_INSENSITIVE but using only Simple_Case_Folding (scf) mappings,
+ * which map each code point to one code point,
+ * not full Case_Folding (cf) mappings, which map some code points to multiple code points.
+ *
+ * This is designed for case-insensitive matches, for example in certain
+ * regular expression implementations where only Simple_Case_Folding mappings are used,
+ * such as in ECMAScript (JavaScript) regular expressions.
+ *
+ * @draft ICU 73
+ */
+ USET_SIMPLE_CASE_INSENSITIVE = 6
+#endif // U_HIDE_DRAFT_API
};
/**
@@ -299,7 +324,9 @@ uset_openPattern(const UChar* pattern, int32_t patternLength,
* @param patternLength the length of the pattern, or -1 if null
* terminated
* @param options bitmask for options to apply to the pattern.
- * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * Valid options are USET_IGNORE_SPACE and
+ * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+ * These case options are mutually exclusive.
* @param ec the error code
* @stable ICU 2.4
*/
@@ -414,7 +441,10 @@ uset_set(USet* set,
* The character at pattern[0] must be a '['.
* @param patternLength The length of the UChar string. -1 if NUL terminated.
* @param options A bitmask for options to apply to the pattern.
- * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * Valid options are USET_IGNORE_SPACE and
+ * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS,
+ * USET_SIMPLE_CASE_INSENSITIVE.
+ * These case options are mutually exclusive.
* @param status Returns an error if the pattern cannot be parsed.
* @return Upon successful parse, the value is either
* the index of the character after the closing ']'
@@ -804,7 +834,7 @@ uset_clear(USet* set);
/**
* Close this set over the given attribute. For the attribute
- * USET_CASE, the result is to modify this set so that:
+ * USET_CASE_INSENSITIVE, the result is to modify this set so that:
*
* 1. For each character or string 'a' in this set, all strings or
* characters 'b' such that foldCase(a) == foldCase(b) are added
@@ -824,8 +854,10 @@ uset_clear(USet* set);
* @param set the set
*
* @param attributes bitmask for attributes to close over.
- * Currently only the USET_CASE bit is supported. Any undefined bits
- * are ignored.
+ * Valid options:
+ * At most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+ * These case options are mutually exclusive.
+ * Unrelated options bits are ignored.
* @stable ICU 4.2
*/
U_CAPI void U_EXPORT2