diff options
Diffstat (limited to 'thirdparty/icu4c/common/unicode')
30 files changed, 1447 insertions, 156 deletions
diff --git a/thirdparty/icu4c/common/unicode/brkiter.h b/thirdparty/icu4c/common/unicode/brkiter.h index bd96629285..30c59c4a94 100644 --- a/thirdparty/icu4c/common/unicode/brkiter.h +++ b/thirdparty/icu4c/common/unicode/brkiter.h @@ -219,7 +219,7 @@ public: * boundaries have been returned. * @stable ICU 2.0 */ - DONE = (int32_t)-1 + DONE = static_cast<int32_t>(-1) }; /** diff --git a/thirdparty/icu4c/common/unicode/bytestream.h b/thirdparty/icu4c/common/unicode/bytestream.h index 997746e428..bea41461bc 100644 --- a/thirdparty/icu4c/common/unicode/bytestream.h +++ b/thirdparty/icu4c/common/unicode/bytestream.h @@ -281,7 +281,7 @@ class StringByteSink : public ByteSink { */ StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) { if (initialAppendCapacity > 0 && - (uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) { + static_cast<uint32_t>(initialAppendCapacity) > dest->capacity() - dest->length()) { dest->reserve(dest->length() + initialAppendCapacity); } } diff --git a/thirdparty/icu4c/common/unicode/bytestrie.h b/thirdparty/icu4c/common/unicode/bytestrie.h index 1719a6bb83..c07dfada94 100644 --- a/thirdparty/icu4c/common/unicode/bytestrie.h +++ b/thirdparty/icu4c/common/unicode/bytestrie.h @@ -109,7 +109,7 @@ public: */ uint64_t getState64() const { return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) | - (uint64_t)(pos_ - bytes_); + static_cast<uint64_t>(pos_ - bytes_); } /** @@ -439,7 +439,7 @@ private: } static inline UStringTrieResult valueResult(int32_t node) { - return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node&kValueIsFinal)); + return static_cast<UStringTrieResult>(USTRINGTRIE_INTERMEDIATE_VALUE - (node & kValueIsFinal)); } // Handles a branch node for both next(byte) and next(string). diff --git a/thirdparty/icu4c/common/unicode/char16ptr.h b/thirdparty/icu4c/common/unicode/char16ptr.h index de8182c7ad..daf35cd43b 100644 --- a/thirdparty/icu4c/common/unicode/char16ptr.h +++ b/thirdparty/icu4c/common/unicode/char16ptr.h @@ -12,6 +12,7 @@ #if U_SHOW_CPLUSPLUS_API #include <cstddef> +#include <string_view> /** * \file @@ -306,6 +307,76 @@ inline OldUChar *toOldUCharPtr(char16_t *p) { return reinterpret_cast<OldUChar *>(p); } +#ifndef U_FORCE_HIDE_INTERNAL_API +/** + * Is T convertible to a std::u16string_view or some other 16-bit string view? + * @internal + */ +template<typename T> +constexpr bool ConvertibleToU16StringView = + std::is_convertible_v<T, std::u16string_view> +#if !U_CHAR16_IS_TYPEDEF && (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 180000) + || std::is_convertible_v<T, std::basic_string_view<uint16_t>> +#endif +#if U_SIZEOF_WCHAR_T==2 + || std::is_convertible_v<T, std::wstring_view> +#endif + ; + +namespace internal { +/** + * Pass-through overload. + * @internal + */ +inline std::u16string_view toU16StringView(std::u16string_view sv) { return sv; } + +#if !U_CHAR16_IS_TYPEDEF && (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 180000) +/** + * Basically undefined behavior but sometimes necessary conversion + * from std::basic_string_view<uint16_t> to std::u16string_view. + * @internal + */ +inline std::u16string_view toU16StringView(std::basic_string_view<uint16_t> sv) { + return { ConstChar16Ptr(sv.data()), sv.length() }; +} +#endif + +#if U_SIZEOF_WCHAR_T==2 +/** + * Basically undefined behavior but sometimes necessary conversion + * from std::wstring_view to std::u16string_view. + * @internal + */ +inline std::u16string_view toU16StringView(std::wstring_view sv) { + return { ConstChar16Ptr(sv.data()), sv.length() }; +} +#endif + +/** + * Pass-through overload. + * @internal + */ +template <typename T, + typename = typename std::enable_if_t<!std::is_pointer_v<std::remove_reference_t<T>>>> +inline std::u16string_view toU16StringViewNullable(const T& text) { + return toU16StringView(text); +} + +/** + * In case of nullptr, return an empty view. + * @internal + */ +template <typename T, + typename = typename std::enable_if_t<std::is_pointer_v<std::remove_reference_t<T>>>, + typename = void> +inline std::u16string_view toU16StringViewNullable(const T& text) { + if (text == nullptr) return {}; // For backward compatibility. + return toU16StringView(text); +} + +} // internal +#endif // U_FORCE_HIDE_INTERNAL_API + U_NAMESPACE_END #endif /* U_SHOW_CPLUSPLUS_API */ diff --git a/thirdparty/icu4c/common/unicode/docmain.h b/thirdparty/icu4c/common/unicode/docmain.h index fa4dbbc052..676e868e7c 100644 --- a/thirdparty/icu4c/common/unicode/docmain.h +++ b/thirdparty/icu4c/common/unicode/docmain.h @@ -138,6 +138,11 @@ * <td>icu::DateFormat</td> * </tr> * <tr> + * <td>Relative Date and Time Formatting</td> + * <td>ureldatefmt.h</td> + * <td>icu::RelativeDateTimeFormatter</td> + * </tr> + * <tr> * <td>Message Formatting</td> * <td>umsg.h</td> * <td>icu::MessageFormat</td> diff --git a/thirdparty/icu4c/common/unicode/edits.h b/thirdparty/icu4c/common/unicode/edits.h index dda9d3ca75..bc3c943822 100644 --- a/thirdparty/icu4c/common/unicode/edits.h +++ b/thirdparty/icu4c/common/unicode/edits.h @@ -508,7 +508,7 @@ private: Edits ©Array(const Edits &other); Edits &moveArray(Edits &src) noexcept; - void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; } + void setLastUnit(int32_t last) { array[length - 1] = static_cast<uint16_t>(last); } int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; } void append(int32_t r); diff --git a/thirdparty/icu4c/common/unicode/idna.h b/thirdparty/icu4c/common/unicode/idna.h index 1c57205bae..1e36fa771f 100644 --- a/thirdparty/icu4c/common/unicode/idna.h +++ b/thirdparty/icu4c/common/unicode/idna.h @@ -70,6 +70,7 @@ public: * The worker functions use transitional processing, including deviation mappings, * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE * is used in which case the deviation characters are passed through without change. + * <b>Unicode 15.1 UTS #46 deprecated transitional processing.</b> * * Disallowed characters are mapped to U+FFFD. * @@ -82,6 +83,8 @@ public: * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. * * @param options Bit set to modify the processing and error checking. + * These should include UIDNA_DEFAULT, or + * UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE. * See option bit set values in uidna.h. * @param errorCode Standard ICU error code. Its input value must * pass the U_SUCCESS() test, or else the function returns diff --git a/thirdparty/icu4c/common/unicode/locid.h b/thirdparty/icu4c/common/unicode/locid.h index 60282d623d..e1afd598cf 100644 --- a/thirdparty/icu4c/common/unicode/locid.h +++ b/thirdparty/icu4c/common/unicode/locid.h @@ -727,7 +727,9 @@ public: * * @stable ICU 49 */ - void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status); + void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status) { + setKeywordValue(StringPiece{keywordName}, StringPiece{keywordValue}, status); + } /** * Sets or removes the value for a keyword. diff --git a/thirdparty/icu4c/common/unicode/messagepattern.h b/thirdparty/icu4c/common/unicode/messagepattern.h index 55b09bfbd4..db36ede727 100644 --- a/thirdparty/icu4c/common/unicode/messagepattern.h +++ b/thirdparty/icu4c/common/unicode/messagepattern.h @@ -775,7 +775,7 @@ public: UMessagePatternArgType getArgType() const { UMessagePatternPartType msgType=getType(); if(msgType ==UMSGPAT_PART_TYPE_ARG_START || msgType ==UMSGPAT_PART_TYPE_ARG_LIMIT) { - return (UMessagePatternArgType)value; + return static_cast<UMessagePatternArgType>(value); } else { return UMSGPAT_ARG_TYPE_NONE; } @@ -821,6 +821,7 @@ public: static const int32_t MAX_LENGTH=0xffff; static const int32_t MAX_VALUE=0x7fff; + static const int32_t MAX_NESTED_LEVELS=0x03ff; // Some fields are not final because they are modified during pattern parsing. // After pattern parsing, the parts are effectively immutable. diff --git a/thirdparty/icu4c/common/unicode/normalizer2.h b/thirdparty/icu4c/common/unicode/normalizer2.h index 6856ff8720..01271623f3 100644 --- a/thirdparty/icu4c/common/unicode/normalizer2.h +++ b/thirdparty/icu4c/common/unicode/normalizer2.h @@ -163,7 +163,6 @@ public: static const Normalizer2 * getNFKCCasefoldInstance(UErrorCode &errorCode); -#ifndef U_HIDE_DRAFT_API /** * Returns a Normalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization * which is equivalent to applying the NFKC_Simple_Casefold mappings and then NFC. @@ -176,11 +175,10 @@ public: * immediately. Check for U_FAILURE() on output or use with * function chaining. (See User Guide for details.) * @return the requested Normalizer2, if successful - * @draft ICU 74 + * @stable ICU 74 */ static const Normalizer2 * getNFKCSimpleCasefoldInstance(UErrorCode &errorCode); -#endif // U_HIDE_DRAFT_API /** * Returns a Normalizer2 instance which uses the specified data file diff --git a/thirdparty/icu4c/common/unicode/platform.h b/thirdparty/icu4c/common/unicode/platform.h index 7aca76c67d..b2fcb21ef1 100644 --- a/thirdparty/icu4c/common/unicode/platform.h +++ b/thirdparty/icu4c/common/unicode/platform.h @@ -132,6 +132,8 @@ #define U_PF_BROWSER_NATIVE_CLIENT 4020 /** Android is based on Linux. @internal */ #define U_PF_ANDROID 4050 +/** Haiku is a POSIX-ish platform. @internal */ +#define U_PF_HAIKU 4080 /** Fuchsia is a POSIX-ish platform. @internal */ #define U_PF_FUCHSIA 4100 /* Maximum value for Linux-based platform is 4499 */ @@ -154,6 +156,8 @@ # define U_PLATFORM U_PF_MINGW #elif defined(__CYGWIN__) # define U_PLATFORM U_PF_CYGWIN + /* Cygwin uchar.h doesn't exist until Cygwin 3.5. */ +# include <cygwin/version.h> #elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) # define U_PLATFORM U_PF_WINDOWS #elif defined(__ANDROID__) @@ -200,6 +204,8 @@ # define U_PLATFORM U_PF_OS390 #elif defined(__OS400__) || defined(__TOS_OS400__) # define U_PLATFORM U_PF_OS400 +#elif defined(__HAIKU__) +# define U_PLATFORM U_PF_HAIKU #elif defined(__EMSCRIPTEN__) # define U_PLATFORM U_PF_EMSCRIPTEN #else @@ -235,7 +241,7 @@ /** * \def U_PLATFORM_USES_ONLY_WIN32_API * Defines whether the platform uses only the Win32 API. - * Set to 1 for Windows/MSVC and MinGW but not Cygwin. + * Set to 1 for Windows/MSVC, ClangCL and MinGW but not Cygwin. * @internal */ #ifdef U_PLATFORM_USES_ONLY_WIN32_API @@ -250,7 +256,7 @@ /** * \def U_PLATFORM_HAS_WIN32_API * Defines whether the Win32 API is available on the platform. - * Set to 1 for Windows/MSVC, MinGW and Cygwin. + * Set to 1 for Windows/MSVC, ClangCL, MinGW and Cygwin. * @internal */ #ifdef U_PLATFORM_HAS_WIN32_API @@ -722,12 +728,16 @@ /* * Notes: * C++11 and C11 require support for UTF-16 literals - * Doesn't work on Mac C11 (see workaround in ptypes.h). + * Doesn't work on Mac C11 (see workaround in ptypes.h) + * or Cygwin less than 3.5. */ -# if defined(__cplusplus) || !U_PLATFORM_IS_DARWIN_BASED +# if defined(__cplusplus) # define U_HAVE_CHAR16_T 1 -# else +# elif U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && CYGWIN_VERSION_DLL_MAJOR < 3005) # define U_HAVE_CHAR16_T 0 +# else + // conformant C11 +# define U_HAVE_CHAR16_T 1 # endif #endif @@ -735,7 +745,9 @@ * @{ * \def U_DECLARE_UTF16 * Do not use this macro because it is not defined on all platforms. - * Use the UNICODE_STRING or U_STRING_DECL macros instead. + * In C++, use std::u16string_view literals, see the UNICODE_STRING docs. + * In C, use u"UTF-16 literals". + * See also the public U_STRING_DECL macro. * @internal */ #ifdef U_DECLARE_UTF16 @@ -766,7 +778,7 @@ #elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllexport__) && \ UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllimport__)) # define U_EXPORT __declspec(dllexport) -#elif defined(__GNUC__) +#elif defined(__GNUC__) || defined(__open_xl__) # define U_EXPORT __attribute__((visibility("default"))) #elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \ || (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550) @@ -805,7 +817,7 @@ */ #ifdef U_HIDDEN /* Use the predefined value. */ -#elif defined(__GNUC__) +#elif defined(__GNUC__) || defined(__open_xl__) # define U_HIDDEN __attribute__((visibility("hidden"))) #else # define U_HIDDEN diff --git a/thirdparty/icu4c/common/unicode/simpleformatter.h b/thirdparty/icu4c/common/unicode/simpleformatter.h index 33bdd6d48f..62c75d08bb 100644 --- a/thirdparty/icu4c/common/unicode/simpleformatter.h +++ b/thirdparty/icu4c/common/unicode/simpleformatter.h @@ -63,7 +63,7 @@ public: * Default constructor. * @stable ICU 57 */ - SimpleFormatter() : compiledPattern((char16_t)0) {} + SimpleFormatter() : compiledPattern(static_cast<char16_t>(0)) {} /** * Constructs a formatter from the pattern string. diff --git a/thirdparty/icu4c/common/unicode/stringpiece.h b/thirdparty/icu4c/common/unicode/stringpiece.h index 6f2a50eafc..74919d7222 100644 --- a/thirdparty/icu4c/common/unicode/stringpiece.h +++ b/thirdparty/icu4c/common/unicode/stringpiece.h @@ -32,6 +32,7 @@ #if U_SHOW_CPLUSPLUS_API #include <cstddef> +#include <string_view> #include <type_traits> #include "unicode/uobject.h" @@ -176,6 +177,16 @@ class U_COMMON_API StringPiece : public UMemory { */ StringPiece(const StringPiece& x, int32_t pos, int32_t len); +#ifndef U_HIDE_INTERNAL_API + /** + * Converts to a std::string_view(). + * @internal + */ + inline operator std::string_view() const { + return {data(), static_cast<std::string_view::size_type>(size())}; + } +#endif // U_HIDE_INTERNAL_API + /** * Returns the string pointer. May be nullptr if it is empty. * diff --git a/thirdparty/icu4c/common/unicode/stringtriebuilder.h b/thirdparty/icu4c/common/unicode/stringtriebuilder.h index 429d7883f1..62108dfe3e 100644 --- a/thirdparty/icu4c/common/unicode/stringtriebuilder.h +++ b/thirdparty/icu4c/common/unicode/stringtriebuilder.h @@ -346,7 +346,7 @@ protected: virtual void write(StringTrieBuilder &builder) override; // Adds a unit with a final value. void add(int32_t c, int32_t value) { - units[length]=(char16_t)c; + units[length] = static_cast<char16_t>(c); equal[length]=nullptr; values[length]=value; ++length; @@ -354,7 +354,7 @@ protected: } // Adds a unit which leads to another match node. void add(int32_t c, Node *node) { - units[length]=(char16_t)c; + units[length] = static_cast<char16_t>(c); equal[length]=node; values[length]=0; ++length; diff --git a/thirdparty/icu4c/common/unicode/ucasemap.h b/thirdparty/icu4c/common/unicode/ucasemap.h index d1c1b483ab..4b623e6910 100644 --- a/thirdparty/icu4c/common/unicode/ucasemap.h +++ b/thirdparty/icu4c/common/unicode/ucasemap.h @@ -202,8 +202,8 @@ ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * * The titlecase break iterator can be provided to customize for arbitrary * styles, using rules and dictionaries beyond the standard iterators. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. + * If the break iterator passed in is null, the default Unicode algorithm + * will be used to determine the titlecase positions. * * This function uses only the setText(), first() and next() methods of the * provided break iterator. @@ -312,8 +312,8 @@ ucasemap_utf8ToUpper(const UCaseMap *csm, * * The titlecase break iterator can be provided to customize for arbitrary * styles, using rules and dictionaries beyond the standard iterators. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. + * If the break iterator passed in is null, the default Unicode algorithm + * will be used to determine the titlecase positions. * * This function uses only the setUText(), first(), next() and close() methods of the * provided break iterator. diff --git a/thirdparty/icu4c/common/unicode/uchar.h b/thirdparty/icu4c/common/unicode/uchar.h index 4572230d9f..0daa7dd214 100644 --- a/thirdparty/icu4c/common/unicode/uchar.h +++ b/thirdparty/icu4c/common/unicode/uchar.h @@ -54,14 +54,14 @@ U_CDECL_BEGIN /*==========================================================================*/ /** * Unicode version number, default for the current ICU version. - * The actual Unicode Character Database (UCD) data is stored in uprops.dat + * The actual Unicode Character Database (UCD) data is stored in uprops.icu * and may be generated from UCD files from a different Unicode version. * Call u_getUnicodeVersion to get the actual Unicode version of the data. * * @see u_getUnicodeVersion * @stable ICU 2.0 */ -#define U_UNICODE_VERSION "15.1" +#define U_UNICODE_VERSION "16.0" /** * \file @@ -533,33 +533,39 @@ typedef enum UProperty { * @stable ICU 70 */ UCHAR_RGI_EMOJI=71, -#ifndef U_HIDE_DRAFT_API /** * Binary property IDS_Unary_Operator. * For programmatic determination of Ideographic Description Sequences. * - * @draft ICU 74 + * @stable ICU 74 */ UCHAR_IDS_UNARY_OPERATOR=72, /** * Binary property ID_Compat_Math_Start. * Used in mathematical identifier profile in UAX #31. - * @draft ICU 74 + * @stable ICU 74 */ UCHAR_ID_COMPAT_MATH_START=73, /** * Binary property ID_Compat_Math_Continue. * Used in mathematical identifier profile in UAX #31. - * @draft ICU 74 + * @stable ICU 74 */ UCHAR_ID_COMPAT_MATH_CONTINUE=74, +#ifndef U_HIDE_DRAFT_API + /** + * Binary property Modifier_Combining_Mark. + * Used by the AMTRA algorithm in UAX #53. + * @draft ICU 76 + */ + UCHAR_MODIFIER_COMBINING_MARK=75, #endif // U_HIDE_DRAFT_API #ifndef U_HIDE_DEPRECATED_API /** * One more than the last constant for binary Unicode properties. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - UCHAR_BINARY_LIMIT=75, + UCHAR_BINARY_LIMIT=76, #endif // U_HIDE_DEPRECATED_API /** Enumerated property Bidi_Class. @@ -677,13 +683,19 @@ typedef enum UProperty { * @draft ICU 75 */ UCHAR_IDENTIFIER_STATUS=0x1019, + /** + * Enumerated property Indic_Conjunct_Break. + * Used in the grapheme cluster break algorithm in UAX #29. + * @draft ICU 76 + */ + UCHAR_INDIC_CONJUNCT_BREAK=0x101A, #endif // U_HIDE_DRAFT_API #ifndef U_HIDE_DEPRECATED_API /** * One more than the last constant for enumerated/integer Unicode properties. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - UCHAR_INT_LIMIT=0x101A, + UCHAR_INT_LIMIT=0x101B, #endif // U_HIDE_DEPRECATED_API /** Bitmask property General_Category_Mask. @@ -1952,6 +1964,29 @@ enum UBlockCode { /** @stable ICU 74 */ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I = 328, /*[2EBF0]*/ + // New blocks in Unicode 16.0 + + /** @stable ICU 76 */ + UBLOCK_EGYPTIAN_HIEROGLYPHS_EXTENDED_A = 329, /*[13460]*/ + /** @stable ICU 76 */ + UBLOCK_GARAY = 330, /*[10D40]*/ + /** @stable ICU 76 */ + UBLOCK_GURUNG_KHEMA = 331, /*[16100]*/ + /** @stable ICU 76 */ + UBLOCK_KIRAT_RAI = 332, /*[16D40]*/ + /** @stable ICU 76 */ + UBLOCK_MYANMAR_EXTENDED_C = 333, /*[116D0]*/ + /** @stable ICU 76 */ + UBLOCK_OL_ONAL = 334, /*[1E5D0]*/ + /** @stable ICU 76 */ + UBLOCK_SUNUWAR = 335, /*[11BC0]*/ + /** @stable ICU 76 */ + UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT = 336, /*[1CC00]*/ + /** @stable ICU 76 */ + UBLOCK_TODHRI = 337, /*[105C0]*/ + /** @stable ICU 76 */ + UBLOCK_TULU_TIGALARI = 338, /*[11380]*/ + #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal UBlockCode value. @@ -1959,7 +1994,7 @@ enum UBlockCode { * * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - UBLOCK_COUNT = 329, + UBLOCK_COUNT = 339, #endif // U_HIDE_DEPRECATED_API /** @stable ICU 2.0 */ @@ -2254,6 +2289,8 @@ typedef enum UJoiningGroup { U_JG_THIN_YEH, /**< @stable ICU 70 */ U_JG_VERTICAL_TAIL, /**< @stable ICU 70 */ + U_JG_KASHMIRI_YEH, /**< @stable ICU 76 */ + #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal UJoiningGroup value. @@ -2702,8 +2739,35 @@ typedef enum UIndicSyllabicCategory { U_INSC_VOWEL_DEPENDENT, /** @stable ICU 63 */ U_INSC_VOWEL_INDEPENDENT, + /** @stable ICU 76 */ + U_INSC_REORDERING_KILLER, } UIndicSyllabicCategory; +#ifndef U_HIDE_DRAFT_API +/** + * Indic Conjunct Break constants. + * + * @see UCHAR_INDIC_CONJUNCT_BREAK + * @draft ICU 76 + */ +typedef enum UIndicConjunctBreak { + /* + * Note: UIndicConjunctBreak constants are parsed by preparseucd.py. + * It matches lines like + * U_INCB_<Unicode Indic_Conjunct_Break value name> + */ + + /** @draft ICU 76 */ + U_INCB_NONE, + /** @draft ICU 76 */ + U_INCB_CONSONANT, + /** @draft ICU 76 */ + U_INCB_EXTEND, + /** @draft ICU 76 */ + U_INCB_LINKER, +} UIndicConjunctBreak; +#endif // U_HIDE_DRAFT_API + /** * Vertical Orientation constants. * diff --git a/thirdparty/icu4c/common/unicode/ucharstrie.h b/thirdparty/icu4c/common/unicode/ucharstrie.h index fa1b55616c..ca4b469470 100644 --- a/thirdparty/icu4c/common/unicode/ucharstrie.h +++ b/thirdparty/icu4c/common/unicode/ucharstrie.h @@ -107,7 +107,7 @@ public: */ uint64_t getState64() const { return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) | - (uint64_t)(pos_ - uchars_); + static_cast<uint64_t>(pos_ - uchars_); } /** @@ -493,7 +493,7 @@ private: } static inline UStringTrieResult valueResult(int32_t node) { - return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node>>15)); + return static_cast<UStringTrieResult>(USTRINGTRIE_INTERMEDIATE_VALUE - (node >> 15)); } // Handles a branch node for both next(uchar) and next(string). diff --git a/thirdparty/icu4c/common/unicode/uidna.h b/thirdparty/icu4c/common/unicode/uidna.h index 24a81ceadd..362a2dcbe6 100644 --- a/thirdparty/icu4c/common/unicode/uidna.h +++ b/thirdparty/icu4c/common/unicode/uidna.h @@ -49,11 +49,19 @@ */ enum { /** - * Default options value: None of the other options are set. + * Default options value: UTS #46 nontransitional processing. * For use in static worker and factory methods. + * + * Since ICU 76, this is the same as + * UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE, + * corresponding to Unicode 15.1 UTS #46 deprecating transitional processing. + * (These options are ignored by the IDNA2003 implementation.) + * + * Before ICU 76, this constant did not set any of the options. + * * @stable ICU 2.6 */ - UIDNA_DEFAULT=0, + UIDNA_DEFAULT=0x30, #ifndef U_HIDE_DEPRECATED_API /** * Option to allow unassigned code points in domain names and labels. @@ -91,19 +99,27 @@ enum { /** * IDNA option for nontransitional processing in ToASCII(). * For use in static worker and factory methods. + * * <p>By default, ToASCII() uses transitional processing. + * Unicode 15.1 UTS #46 deprecated transitional processing. + * * <p>This option is ignored by the IDNA2003 implementation. * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.) * @stable ICU 4.6 + * @see UIDNA_DEFAULT */ UIDNA_NONTRANSITIONAL_TO_ASCII=0x10, /** * IDNA option for nontransitional processing in ToUnicode(). * For use in static worker and factory methods. + * * <p>By default, ToUnicode() uses transitional processing. + * Unicode 15.1 UTS #46 deprecated transitional processing. + * * <p>This option is ignored by the IDNA2003 implementation. * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.) * @stable ICU 4.6 + * @see UIDNA_DEFAULT */ UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20, /** @@ -134,6 +150,8 @@ typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */ * For details about the UTS #46 implementation see the IDNA C++ class in idna.h. * * @param options Bit set to modify the processing and error checking. + * These should include UIDNA_DEFAULT, or + * UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE. * See option bit set values in uidna.h. * @param pErrorCode Standard ICU error code. Its input value must * pass the U_SUCCESS() test, or else the function returns diff --git a/thirdparty/icu4c/common/unicode/ulocale.h b/thirdparty/icu4c/common/unicode/ulocale.h index 33e92844bc..1b3af3a5f2 100644 --- a/thirdparty/icu4c/common/unicode/ulocale.h +++ b/thirdparty/icu4c/common/unicode/ulocale.h @@ -13,16 +13,15 @@ * \brief C API: Locale ID functionality similar to C++ class Locale */ -#ifndef U_HIDE_DRAFT_API /** * Opaque C service object type for the locale API - * @draft ICU 74 + * @stable ICU 74 */ struct ULocale; /** * C typedef for struct ULocale. - * @draft ICU 74 + * @stable ICU 74 */ typedef struct ULocale ULocale; @@ -37,7 +36,7 @@ typedef struct ULocale ULocale; * @param err the error code * @return the locale. * - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI ULocale* U_EXPORT2 ulocale_openForLocaleID(const char* localeID, int32_t length, UErrorCode* err); @@ -53,7 +52,7 @@ ulocale_openForLocaleID(const char* localeID, int32_t length, UErrorCode* err); * @param err the error code * @return the locale. * - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI ULocale* U_EXPORT2 ulocale_openForLanguageTag(const char* tag, int32_t length, UErrorCode* err); @@ -62,7 +61,7 @@ ulocale_openForLanguageTag(const char* tag, int32_t length, UErrorCode* err); * Close the locale and destroy it's internal states. * * @param locale the locale - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI void U_EXPORT2 ulocale_close(ULocale* locale); @@ -72,7 +71,7 @@ ulocale_close(ULocale* locale); * * @param locale the locale * @return the language code of the locale. - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI const char* U_EXPORT2 ulocale_getLanguage(const ULocale* locale); @@ -82,7 +81,7 @@ ulocale_getLanguage(const ULocale* locale); * * @param locale the locale * @return A pointer to the script. - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI const char* U_EXPORT2 ulocale_getScript(const ULocale* locale); @@ -92,7 +91,7 @@ ulocale_getScript(const ULocale* locale); * * @param locale the locale * @return A pointer to the region. - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI const char* U_EXPORT2 ulocale_getRegion(const ULocale* locale); @@ -102,7 +101,7 @@ ulocale_getRegion(const ULocale* locale); * * @param locale the locale * @return A pointer to the variant. - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI const char* U_EXPORT2 ulocale_getVariant(const ULocale* locale); @@ -115,7 +114,7 @@ ulocale_getVariant(const ULocale* locale); * * @param locale the locale * @return A pointer to "name". - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI const char* U_EXPORT2 ulocale_getLocaleID(const ULocale* locale); @@ -126,7 +125,7 @@ ulocale_getLocaleID(const ULocale* locale); * * @param locale the locale * @return A pointer to "base name". - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI const char* U_EXPORT2 ulocale_getBaseName(const ULocale* locale); @@ -136,7 +135,7 @@ ulocale_getBaseName(const ULocale* locale); * * @param locale the locale * @return false if it is a real locale, true if it is a bogus locale - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI bool U_EXPORT2 ulocale_isBogus(const ULocale* locale); @@ -148,7 +147,7 @@ ulocale_isBogus(const ULocale* locale); * @param err the error code * @return pointer to UEnumeration, or nullptr if there are no keywords. * Client must call uenum_close() to dispose the returned value. - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI UEnumeration* U_EXPORT2 ulocale_getKeywords(const ULocale* locale, UErrorCode *err); @@ -160,7 +159,7 @@ ulocale_getKeywords(const ULocale* locale, UErrorCode *err); * @param err the error code * @return pointer to UEnumeration, or nullptr if there are no keywords. * Client must call uenum_close() to dispose the returned value. - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI UEnumeration* U_EXPORT2 ulocale_getUnicodeKeywords(const ULocale* locale, UErrorCode *err); @@ -178,7 +177,7 @@ ulocale_getUnicodeKeywords(const ULocale* locale, UErrorCode *err); * @param valueBuffer The buffer to receive the value. * @param valueBufferCapacity The capacity of receiving valueBuffer. * @param err the error code - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI int32_t U_EXPORT2 ulocale_getKeywordValue( @@ -198,7 +197,7 @@ ulocale_getKeywordValue( * @param valueBuffer The buffer to receive the Unicode value. * @param valueBufferCapacity The capacity of receiving valueBuffer. * @param err the error code - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI int32_t U_EXPORT2 ulocale_getUnicodeKeywordValue( @@ -216,7 +215,7 @@ U_NAMESPACE_BEGIN * * @see LocalPointerBase * @see LocalPointer - * @draft ICU 74 + * @stable ICU 74 */ U_DEFINE_LOCAL_OPEN_POINTER(LocalULocalePointer, ULocale, ulocale_close); @@ -224,6 +223,4 @@ U_NAMESPACE_END #endif /* U_SHOW_CPLUSPLUS_API */ -#endif /* U_HIDE_DRAFT_API */ - #endif /*_ULOCALE */ diff --git a/thirdparty/icu4c/common/unicode/ulocbuilder.h b/thirdparty/icu4c/common/unicode/ulocbuilder.h index 9b147292f4..ce61995bde 100644 --- a/thirdparty/icu4c/common/unicode/ulocbuilder.h +++ b/thirdparty/icu4c/common/unicode/ulocbuilder.h @@ -12,17 +12,15 @@ * \brief C API: Builder API for Locale */ -#ifndef U_HIDE_DRAFT_API - /** * Opaque C service object type for the locale builder API - * @draft ICU 74 + * @stable ICU 74 */ struct ULocaleBuilder; /** * C typedef for struct ULocaleBuilder. - * @draft ICU 74 + * @stable ICU 74 */ typedef struct ULocaleBuilder ULocaleBuilder; @@ -62,7 +60,7 @@ typedef struct ULocaleBuilder ULocaleBuilder; * UErrorCode, then track the error of the validation of the input parameter * into the internal UErrorCode. * - * @draft ICU 74 + * @stable ICU 74 */ /** @@ -71,7 +69,7 @@ typedef struct ULocaleBuilder ULocaleBuilder; * empty string. The created builder should be destroyed by calling * ulocbld_close(); * - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI ULocaleBuilder* U_EXPORT2 ulocbld_open(void); @@ -79,7 +77,7 @@ ulocbld_open(void); /** * Close the builder and destroy it's internal states. * @param builder the builder - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI void U_EXPORT2 ulocbld_close(ULocaleBuilder* builder); @@ -97,7 +95,7 @@ ulocbld_close(ULocaleBuilder* builder); * @param length the length of the locale; if negative, then the locale need to be * null terminated, * - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI void U_EXPORT2 ulocbld_setLocale(ULocaleBuilder* builder, const char* locale, int32_t length); @@ -113,7 +111,7 @@ ulocbld_setLocale(ULocaleBuilder* builder, const char* locale, int32_t length); * @param locale the locale, a ULocale* pointer. The builder adopts the locale * after the call and the client must not delete it. * - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI void U_EXPORT2 ulocbld_adoptULocale(ULocaleBuilder* builder, ULocale* locale); @@ -136,7 +134,7 @@ ulocbld_adoptULocale(ULocaleBuilder* builder, ULocale* locale); * the length is non-negative) * @param length the length of the tag; if negative, then the tag need to be * null terminated, - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI void U_EXPORT2 ulocbld_setLanguageTag(ULocaleBuilder* builder, const char* tag, int32_t length); @@ -156,7 +154,7 @@ ulocbld_setLanguageTag(ULocaleBuilder* builder, const char* tag, int32_t length) * the length is non-negative) * @param length the length of the language; if negative, then the language need to be * null terminated, - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI void U_EXPORT2 ulocbld_setLanguage(ULocaleBuilder* builder, const char* language, int32_t length); @@ -177,7 +175,7 @@ ulocbld_setLanguage(ULocaleBuilder* builder, const char* language, int32_t lengt * the length is non-negative) * @param length the length of the script; if negative, then the script need to be * null terminated, - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI void U_EXPORT2 ulocbld_setScript(ULocaleBuilder* builder, const char* script, int32_t length); @@ -201,7 +199,7 @@ ulocbld_setScript(ULocaleBuilder* builder, const char* script, int32_t length); * the length is non-negative) * @param length the length of the region; if negative, then the region need to be * null terminated, - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI void U_EXPORT2 ulocbld_setRegion(ULocaleBuilder* builder, const char* region, int32_t length); @@ -227,7 +225,7 @@ ulocbld_setRegion(ULocaleBuilder* builder, const char* region, int32_t length); * the length is non-negative) * @param length the length of the variant; if negative, then the variant need to be * null terminated, - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI void U_EXPORT2 ulocbld_setVariant(ULocaleBuilder* builder, const char* variant, int32_t length); @@ -253,7 +251,7 @@ ulocbld_setVariant(ULocaleBuilder* builder, const char* variant, int32_t length) * the length is non-negative) * @param length the length of the value; if negative, then the value need to be * null terminated, - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI void U_EXPORT2 ulocbld_setExtension(ULocaleBuilder* builder, char key, const char* value, int32_t length); @@ -282,7 +280,7 @@ ulocbld_setExtension(ULocaleBuilder* builder, char key, const char* value, int32 * @param typeLength the length of the type; if negative, then the type need to * be null terminated, * @return This builder. - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI void U_EXPORT2 ulocbld_setUnicodeLocaleKeyword(ULocaleBuilder* builder, @@ -299,7 +297,7 @@ ulocbld_setUnicodeLocaleKeyword(ULocaleBuilder* builder, * terminated when the length is non-negative) * @param length the length of the attribute; if negative, then the attribute * need to be null terminated, - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI void U_EXPORT2 ulocbld_addUnicodeLocaleAttribute( @@ -318,7 +316,7 @@ ulocbld_addUnicodeLocaleAttribute( * terminated when the length is non-negative) * @param length the length of the attribute; if negative, then the attribute * need to be null terminated, - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI void U_EXPORT2 ulocbld_removeUnicodeLocaleAttribute( @@ -329,7 +327,7 @@ ulocbld_removeUnicodeLocaleAttribute( * <p>This method clears the internal UErrorCode. * * @param builder the builder - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI void U_EXPORT2 ulocbld_clear(ULocaleBuilder* builder); @@ -339,7 +337,7 @@ ulocbld_clear(ULocaleBuilder* builder); * Language, script, region and variant are unchanged. * * @param builder the builder - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI void U_EXPORT2 ulocbld_clearExtensions(ULocaleBuilder* builder); @@ -358,7 +356,7 @@ ulocbld_clearExtensions(ULocaleBuilder* builder); * @param localeCapacity the size of the locale buffer to store the locale id * @param err the error code * @return the length of the locale id in buffer - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI int32_t U_EXPORT2 ulocbld_buildLocaleID(ULocaleBuilder* builder, char* locale, @@ -377,7 +375,7 @@ ulocbld_buildLocaleID(ULocaleBuilder* builder, char* locale, * @param err the error code. * @return the locale, a ULocale* pointer. The created ULocale must be * destroyed by calling {@link ulocale_close}. - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI ULocale* U_EXPORT2 ulocbld_buildULocale(ULocaleBuilder* builder, UErrorCode* err); @@ -397,7 +395,7 @@ ulocbld_buildULocale(ULocaleBuilder* builder, UErrorCode* err); * tag * @param err the error code * @return the length of the language tag in buffer - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI int32_t U_EXPORT2 ulocbld_buildLanguageTag(ULocaleBuilder* builder, char* language, @@ -412,7 +410,7 @@ ulocbld_buildLanguageTag(ULocaleBuilder* builder, char* language, * Unchanged if there is no such error or if outErrorCode * already contained an error. * @return true if U_FAILURE(*outErrorCode) - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI UBool U_EXPORT2 ulocbld_copyErrorTo(const ULocaleBuilder* builder, UErrorCode *outErrorCode); @@ -428,7 +426,7 @@ U_NAMESPACE_BEGIN * * @see LocalPointerBase * @see LocalPointer - * @draft ICU 74 + * @stable ICU 74 */ U_DEFINE_LOCAL_OPEN_POINTER(LocalULocaleBuilderPointer, ULocaleBuilder, ulocbld_close); @@ -436,6 +434,4 @@ U_NAMESPACE_END #endif /* U_SHOW_CPLUSPLUS_API */ -#endif /* U_HIDE_DRAFT_API */ - #endif // __ULOCBUILDER_H__ diff --git a/thirdparty/icu4c/common/unicode/uniset.h b/thirdparty/icu4c/common/unicode/uniset.h index 127e4ce063..d070fd631a 100644 --- a/thirdparty/icu4c/common/unicode/uniset.h +++ b/thirdparty/icu4c/common/unicode/uniset.h @@ -313,7 +313,7 @@ private: char16_t *pat = nullptr; int32_t patLen = 0; - UVector* strings = nullptr; // maintained in sorted order + UVector* strings_ = nullptr; // maintained in sorted order UnicodeSetStringSpan *stringSpan = nullptr; /** @@ -1102,6 +1102,118 @@ public: */ UChar32 charAt(int32_t index) const; +#ifndef U_HIDE_DRAFT_API + /** + * Returns a C++ "range" for iterating over the code points of this set. + * + * \code + * UnicodeSet set(u"[abcçカ🚴]", errorCode); + * for (UChar32 c : set.codePoints()) { + * printf("set.codePoint U+%04lx\n", (long)c); + * } + * \endcode + * + * @return a "range" object for iterating over the code points of this set. + * @draft ICU 76 + * @see ranges + * @see strings + * @see begin + * @see end + */ + inline U_HEADER_NESTED_NAMESPACE::USetCodePoints codePoints() const { + return U_HEADER_NESTED_NAMESPACE::USetCodePoints(toUSet()); + } + + /** + * Returns a C++ "range" for iterating over the code point ranges of this set. + * + * \code + * UnicodeSet set(u"[abcçカ🚴]", errorCode); + * for (auto [start, end] : set.ranges()) { + * printf("set.range U+%04lx..U+%04lx\n", (long)start, (long)end); + * } + * for (auto range : set.ranges()) { + * for (UChar32 c : range) { + * printf("set.range.c U+%04lx\n", (long)c); + * } + * } + * \endcode + * + * @return a "range" object for iterating over the code point ranges of this set. + * @draft ICU 76 + * @see codePoints + * @see strings + * @see begin + * @see end + */ + inline U_HEADER_NESTED_NAMESPACE::USetRanges ranges() const { + return U_HEADER_NESTED_NAMESPACE::USetRanges(toUSet()); + } + + /** + * Returns a C++ "range" for iterating over the empty and multi-character strings of this set. + * Returns each string as a std::u16string_view without copying its contents. + * + * \code + * UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode); + * for (auto s : set.strings()) { + * UnicodeString us(s); + * std::string u8; + * printf("set.string length %ld \"%s\"\n", (long)s.length(), us.toUTF8String(u8).c_str()); + * } + * \endcode + * + * @return a "range" object for iterating over the strings of this set. + * @draft ICU 76 + * @see codePoints + * @see ranges + * @see begin + * @see end + */ + inline U_HEADER_NESTED_NAMESPACE::USetStrings strings() const { + return U_HEADER_NESTED_NAMESPACE::USetStrings(toUSet()); + } + + /** + * Returns a C++ iterator for iterating over all of the elements of this set. + * Convenient all-in one iteration, but creates a UnicodeString for each + * code point or string. + * (Similar to how Java UnicodeSet *is an* Iterable<String>.) + * + * Code points are returned first, then empty and multi-character strings. + * + * \code + * UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode); + * for (auto el : set) { + * std::string u8; + * printf("set.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str()); + * } + * \endcode + * + * @return an all-elements iterator. + * @draft ICU 76 + * @see end + * @see codePoints + * @see ranges + * @see strings + */ + inline U_HEADER_NESTED_NAMESPACE::USetElementIterator begin() const { + return U_HEADER_NESTED_NAMESPACE::USetElements(toUSet()).begin(); + } + + /** + * @return an exclusive-end sentinel for iterating over all of the elements of this set. + * @draft ICU 76 + * @see begin + * @see codePoints + * @see ranges + * @see strings + */ + inline U_HEADER_NESTED_NAMESPACE::USetElementIterator end() const { + return U_HEADER_NESTED_NAMESPACE::USetElements(toUSet()).end(); + } +#endif // U_HIDE_DRAFT_API + /** * Adds the specified range to this set if it is not already * present. If this set already contains the specified range, @@ -1731,7 +1843,7 @@ inline bool UnicodeSet::operator!=(const UnicodeSet& o) const { } inline UBool UnicodeSet::isFrozen() const { - return (UBool)(bmpSet!=nullptr || stringSpan!=nullptr); + return bmpSet != nullptr || stringSpan != nullptr; } inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const { @@ -1747,7 +1859,7 @@ inline UBool UnicodeSet::containsSome(const UnicodeString& s) const { } inline UBool UnicodeSet::isBogus() const { - return (UBool)(fFlags & kIsBogus); + return fFlags & kIsBogus; } inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) { diff --git a/thirdparty/icu4c/common/unicode/unistr.h b/thirdparty/icu4c/common/unicode/unistr.h index a41e6358fa..39d789fd6e 100644 --- a/thirdparty/icu4c/common/unicode/unistr.h +++ b/thirdparty/icu4c/common/unicode/unistr.h @@ -33,6 +33,7 @@ #if U_SHOW_CPLUSPLUS_API #include <cstddef> +#include <string_view> #include "unicode/char16ptr.h" #include "unicode/rep.h" #include "unicode/std_string.h" @@ -97,16 +98,19 @@ class UnicodeStringAppendable; // unicode/appendable.h #define US_INV icu::UnicodeString::kInvariant /** - * Unicode String literals in C++. + * \def UNICODE_STRING + * Obsolete macro approximating UnicodeString literals. * - * Note: these macros are not recommended for new code. - * Prior to the availability of C++11 and u"unicode string literals", - * these macros were provided for portability and efficiency when + * Prior to the availability of C++11 and u"UTF-16 string literals", + * this macro was provided for portability and efficiency when * initializing UnicodeStrings from literals. * - * They work only for strings that contain "invariant characters", i.e., - * only latin letters, digits, and some punctuation. - * See utypes.h for details. + * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time + * length determination: + * \code + * UnicodeString str(u"literal"); + * if (str == u"other literal") { ... } + * \endcode * * The string parameter must be a C string literal. * The length of the string, not including the terminating @@ -121,16 +125,12 @@ class UnicodeStringAppendable; // unicode/appendable.h /** * Unicode String literals in C++. - * Dependent on the platform properties, different UnicodeString - * constructors should be used to create a UnicodeString object from - * a string literal. - * The macros are defined for improved performance. - * They work only for strings that contain "invariant characters", i.e., - * only latin letters, digits, and some punctuation. - * See utypes.h for details. + * Obsolete macro approximating UnicodeString literals. + * See UNICODE_STRING. * * The string parameter must be a C string literal. * @stable ICU 2.0 + * @see UNICODE_STRING */ #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) @@ -327,6 +327,30 @@ public: */ inline bool operator== (const UnicodeString& text) const; +#ifndef U_HIDE_DRAFT_API + /** + * Equality operator. Performs only bitwise comparison with `text` + * which is, or which is implicitly convertible to, + * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view. + * + * For performance, you can use UTF-16 string literals with compile-time + * length determination: + * \code + * UnicodeString str = ...; + * if (str == u"literal") { ... } + * \endcode + * @param text The string view to compare to this string. + * @return true if `text` contains the same characters as this one, false otherwise. + * @draft ICU 76 + */ + template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>> + inline bool operator==(const S &text) const { + std::u16string_view sv(internal::toU16StringView(text)); + uint32_t len; // unsigned to avoid a compiler warning + return !isBogus() && (len = length()) == sv.length() && doEquals(sv.data(), len); + } +#endif // U_HIDE_DRAFT_API + /** * Inequality operator. Performs only bitwise comparison. * @param text The UnicodeString to compare to this one. @@ -336,6 +360,30 @@ public: */ inline bool operator!= (const UnicodeString& text) const; +#ifndef U_HIDE_DRAFT_API + /** + * Inequality operator. Performs only bitwise comparison with `text` + * which is, or which is implicitly convertible to, + * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view. + * + * For performance, you can use std::u16string_view literals with compile-time + * length determination: + * \code + * #include <string_view> + * using namespace std::string_view_literals; + * UnicodeString str = ...; + * if (str != u"literal"sv) { ... } + * \endcode + * @param text The string view to compare to this string. + * @return false if `text` contains the same characters as this one, true otherwise. + * @draft ICU 76 + */ + template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>> + inline bool operator!=(const S &text) const { + return !operator==(text); + } +#endif // U_HIDE_DRAFT_API + /** * Greater than operator. Performs only bitwise comparison. * @param text The UnicodeString to compare to this one. @@ -1897,6 +1945,24 @@ public: */ UnicodeString &fastCopyFrom(const UnicodeString &src); +#ifndef U_HIDE_DRAFT_API + /** + * Assignment operator. Replaces the characters in this UnicodeString + * with a copy of the characters from the `src` + * which is, or which is implicitly convertible to, + * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view. + * + * @param src The string view containing the characters to copy. + * @return a reference to this + * @draft ICU 76 + */ + template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>> + inline UnicodeString &operator=(const S &src) { + unBogus(); + return doReplace(0, length(), internal::toU16StringView(src)); + } +#endif // U_HIDE_DRAFT_API + /** * Move assignment operator; might leave src in bogus state. * This string will have the same contents and state that the source string had. @@ -2146,6 +2212,23 @@ public: */ inline UnicodeString& operator+= (const UnicodeString& srcText); +#ifndef U_HIDE_DRAFT_API + /** + * Append operator. Appends the characters in `src` + * which is, or which is implicitly convertible to, + * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view, + * to the UnicodeString object. + * + * @param src the source for the new characters + * @return a reference to this + * @draft ICU 76 + */ + template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>> + inline UnicodeString& operator+=(const S &src) { + return doAppend(internal::toU16StringView(src)); + } +#endif // U_HIDE_DRAFT_API + /** * Append the characters * in `srcText` in the range @@ -2191,8 +2274,8 @@ public: int32_t srcLength); /** - * Append the characters in `srcChars` to the UnicodeString object - * at offset `start`. `srcChars` is not modified. + * Append the characters in `srcChars` to the UnicodeString object. + * `srcChars` is not modified. * @param srcChars the source for the new characters * @param srcLength the number of Unicode characters in `srcChars`; * can be -1 if `srcChars` is NUL-terminated @@ -2202,6 +2285,23 @@ public: inline UnicodeString& append(ConstChar16Ptr srcChars, int32_t srcLength); +#ifndef U_HIDE_DRAFT_API + /** + * Appends the characters in `src` + * which is, or which is implicitly convertible to, + * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view, + * to the UnicodeString object. + * + * @param src the source for the new characters + * @return a reference to this + * @draft ICU 76 + */ + template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>> + inline UnicodeString& append(const S &src) { + return doAppend(internal::toU16StringView(src)); + } +#endif // U_HIDE_DRAFT_API + /** * Append the code unit `srcChar` to the UnicodeString object. * @param srcChar the code unit to append @@ -2556,7 +2656,7 @@ public: * @stable ICU 2.0 */ inline UnicodeString& remove(int32_t start, - int32_t length = (int32_t)INT32_MAX); + int32_t length = static_cast<int32_t>(INT32_MAX)); /** * Remove the characters in the range @@ -2567,7 +2667,7 @@ public: * @stable ICU 2.0 */ inline UnicodeString& removeBetween(int32_t start, - int32_t limit = (int32_t)INT32_MAX); + int32_t limit = static_cast<int32_t>(INT32_MAX)); /** * Retain only the characters in the range @@ -2693,8 +2793,8 @@ public: * styles, using rules and dictionaries beyond the standard iterators. * It may be more efficient to always provide an iterator to avoid * opening and closing one for each string. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. + * If the break iterator passed in is null, the default Unicode algorithm + * will be used to determine the titlecase positions. * * This function uses only the setText(), first() and next() methods of the * provided break iterator. @@ -2721,8 +2821,8 @@ public: * styles, using rules and dictionaries beyond the standard iterators. * It may be more efficient to always provide an iterator to avoid * opening and closing one for each string. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. + * If the break iterator passed in is null, the default Unicode algorithm + * will be used to determine the titlecase positions. * * This function uses only the setText(), first() and next() methods of the * provided break iterator. @@ -2750,8 +2850,8 @@ public: * styles, using rules and dictionaries beyond the standard iterators. * It may be more efficient to always provide an iterator to avoid * opening and closing one for each string. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. + * If the break iterator passed in is null, the default Unicode algorithm + * will be used to determine the titlecase positions. * * This function uses only the setText(), first() and next() methods of the * provided break iterator. @@ -2925,6 +3025,37 @@ public: */ const char16_t *getTerminatedBuffer(); +#ifndef U_HIDE_DRAFT_API + /** + * Converts to a std::u16string_view. + * + * @return a string view of the contents of this string + * @draft ICU 76 + */ + inline operator std::u16string_view() const { + return {getBuffer(), static_cast<std::u16string_view::size_type>(length())}; + } + +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Converts to a std::wstring_view. + * + * Note: This should remain draft until C++ standard plans + * about char16_t vs. wchar_t become clearer. + * + * @return a string view of the contents of this string + * @draft ICU 76 + */ + inline operator std::wstring_view() const { + const char16_t *p = getBuffer(); +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return { reinterpret_cast<const wchar_t *>(p), (std::wstring_view::size_type)length() }; + } +#endif // U_SIZEOF_WCHAR_T +#endif // U_HIDE_DRAFT_API + //======================================== // Constructors //======================================== @@ -2969,19 +3100,32 @@ public: */ UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); +#ifdef U_HIDE_DRAFT_API /** * char16_t* constructor. * * It is recommended to mark this constructor "explicit" by * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` * on the compiler command line or similar. + * + * Note, for string literals: + * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time + * length determination: + * \code + * UnicodeString str(u"literal"); + * if (str == u"other literal") { ... } + * \endcode + * * @param text The characters to place in the UnicodeString. `text` * must be NUL (U+0000) terminated. * @stable ICU 2.0 */ - UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text); + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text) : + UnicodeString(text, -1) {} +#endif // U_HIDE_DRAFT_API -#if !U_CHAR16_IS_TYPEDEF +#if !U_CHAR16_IS_TYPEDEF && \ + (defined(U_HIDE_DRAFT_API) || (defined(_LIBCPP_VERSION) && _LIBCPP_VERSION >= 180000)) /** * uint16_t * constructor. * Delegates to UnicodeString(const char16_t *). @@ -2989,14 +3133,23 @@ public: * It is recommended to mark this constructor "explicit" by * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` * on the compiler command line or similar. + * + * Note, for string literals: + * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time + * length determination: + * \code + * UnicodeString str(u"literal"); + * if (str == u"other literal") { ... } + * \endcode + * * @param text NUL-terminated UTF-16 string * @stable ICU 59 */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) : - UnicodeString(ConstChar16Ptr(text)) {} + UnicodeString(ConstChar16Ptr(text), -1) {} #endif -#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) +#if defined(U_HIDE_DRAFT_API) && (U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)) /** * wchar_t * constructor. * (Only defined if U_SIZEOF_WCHAR_T==2.) @@ -3005,11 +3158,20 @@ public: * It is recommended to mark this constructor "explicit" by * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` * on the compiler command line or similar. + * + * Note, for string literals: + * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time + * length determination: + * \code + * UnicodeString str(u"literal"); + * if (str == u"other literal") { ... } + * \endcode + * * @param text NUL-terminated UTF-16 string * @stable ICU 59 */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) : - UnicodeString(ConstChar16Ptr(text)) {} + UnicodeString(ConstChar16Ptr(text), -1) {} #endif /** @@ -3026,6 +3188,15 @@ public: /** * char16_t* constructor. + * + * Note, for string literals: + * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time + * length determination: + * \code + * UnicodeString str(u"literal"); + * if (str == u"other literal") { ... } + * \endcode + * * @param text The characters to place in the UnicodeString. * @param textLength The number of Unicode characters in `text` * to copy. @@ -3038,6 +3209,15 @@ public: /** * uint16_t * constructor. * Delegates to UnicodeString(const char16_t *, int32_t). + * + * Note, for string literals: + * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time + * length determination: + * \code + * UnicodeString str(u"literal"); + * if (str == u"other literal") { ... } + * \endcode + * * @param text UTF-16 string * @param textLength string length * @stable ICU 59 @@ -3051,7 +3231,16 @@ public: * wchar_t * constructor. * (Only defined if U_SIZEOF_WCHAR_T==2.) * Delegates to UnicodeString(const char16_t *, int32_t). - * @param text NUL-terminated UTF-16 string + * + * Note, for string literals: + * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time + * length determination: + * \code + * UnicodeString str(u"literal"); + * if (str == u"other literal") { ... } + * \endcode + * + * @param text UTF-16 string * @param textLength string length * @stable ICU 59 */ @@ -3068,6 +3257,26 @@ public: */ inline UnicodeString(const std::nullptr_t text, int32_t textLength); +#ifndef U_HIDE_DRAFT_API + /** + * Constructor from `text` + * which is, or which is implicitly convertible to, + * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view. + * The string is bogus if the string view is too long. + * + * If you need a UnicodeString but need not copy the string view contents, + * then you can call the UnicodeString::readOnlyAlias() function instead of this constructor. + * + * @param text UTF-16 string + * @draft ICU 76 + */ + template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>> + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const S &text) { + fUnion.fFields.fLengthAndFlags = kShortString; + doAppend(internal::toU16StringViewNullable(text)); + } +#endif // U_HIDE_DRAFT_API + /** * Readonly-aliasing char16_t* constructor. * The text will be used for the UnicodeString object, but @@ -3082,6 +3291,14 @@ public: * When using fastCopyFrom(), the text will be aliased again, * so that both strings then alias the same readonly-text. * + * Note, for string literals: + * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time + * length determination: + * \code + * UnicodeString alias = UnicodeString::readOnlyAlias(u"literal"); + * if (str == u"other literal") { ... } + * \endcode + * * @param isTerminated specifies if `text` is `NUL`-terminated. * This must be true if `textLength==-1`. * @param text The characters to alias for the UnicodeString. @@ -3160,8 +3377,14 @@ public: * * For ASCII (really "invariant character") strings it is more efficient to use * the constructor that takes a US_INV (for its enum EInvariant). - * For ASCII (invariant-character) string literals, see UNICODE_STRING and - * UNICODE_STRING_SIMPLE. + * + * Note, for string literals: + * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time + * length determination: + * \code + * UnicodeString str(u"literal"); + * if (str == u"other literal") { ... } + * \endcode * * It is recommended to mark this constructor "explicit" by * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` @@ -3169,8 +3392,6 @@ public: * @param codepageData an array of bytes, null-terminated, * in the platform's default codepage. * @stable ICU 2.0 - * @see UNICODE_STRING - * @see UNICODE_STRING_SIMPLE */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData); @@ -3270,6 +3491,15 @@ public: * // use ustr ... * } * \endcode + * + * Note, for string literals: + * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time + * length determination: + * \code + * UnicodeString str(u"literal"); + * if (str == u"other literal") { ... } + * \endcode + * * @param src String using only invariant characters. * @param textLength Length of src, or -1 if NUL-terminated. * @param inv Signature-distinguishing parameter, use US_INV. @@ -3343,6 +3573,58 @@ public: */ virtual ~UnicodeString(); +#ifndef U_HIDE_DRAFT_API + /** + * Readonly-aliasing factory method. + * Aliases the same buffer as the input `text` + * which is, or which is implicitly convertible to, + * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view. + * The string is bogus if the string view is too long. + * + * The text will be used for the UnicodeString object, but + * it will not be released when the UnicodeString is destroyed. + * This has copy-on-write semantics: + * When the string is modified, then the buffer is first copied into + * newly allocated memory. + * The aliased buffer is never modified. + * + * In an assignment to another UnicodeString, when using the copy constructor + * or the assignment operator, the text will be copied. + * When using fastCopyFrom(), the text will be aliased again, + * so that both strings then alias the same readonly-text. + * + * @param text The string view to alias for the UnicodeString. + * @draft ICU 76 + */ + template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>> + static inline UnicodeString readOnlyAlias(const S &text) { + return readOnlyAliasFromU16StringView(internal::toU16StringView(text)); + } + + /** + * Readonly-aliasing factory method. + * Aliases the same buffer as the input `text`. + * + * The text will be used for the UnicodeString object, but + * it will not be released when the UnicodeString is destroyed. + * This has copy-on-write semantics: + * When the string is modified, then the buffer is first copied into + * newly allocated memory. + * The aliased buffer is never modified. + * + * In an assignment to another UnicodeString, when using the copy constructor + * or the assignment operator, the text will be copied. + * When using fastCopyFrom(), the text will be aliased again, + * so that both strings then alias the same readonly-text. + * + * @param text The UnicodeString to alias. + * @draft ICU 76 + */ + static inline UnicodeString readOnlyAlias(const UnicodeString &text) { + return readOnlyAliasFromUnicodeString(text); + } +#endif // U_HIDE_DRAFT_API + /** * Create a UnicodeString from a UTF-8 string. * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. @@ -3470,6 +3752,9 @@ protected: virtual UChar32 getChar32At(int32_t offset) const override; private: + static UnicodeString readOnlyAliasFromU16StringView(std::u16string_view text); + static UnicodeString readOnlyAliasFromUnicodeString(const UnicodeString &text); + // For char* constructors. Could be made public. UnicodeString &setToUTF8(StringPiece utf8); // For extract(char*). @@ -3485,7 +3770,10 @@ private: * Internal string contents comparison, called by operator==. * Requires: this & text not bogus and have same lengths. */ - UBool doEquals(const UnicodeString &text, int32_t len) const; + inline UBool doEquals(const UnicodeString &text, int32_t len) const { + return doEquals(text.getArrayStart(), len); + } + UBool doEquals(const char16_t *text, int32_t len) const; inline UBool doEqualsSubstring(int32_t start, @@ -3580,9 +3868,11 @@ private: const char16_t *srcChars, int32_t srcStart, int32_t srcLength); + UnicodeString& doReplace(int32_t start, int32_t length, std::u16string_view src); UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength); UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength); + UnicodeString& doAppend(std::u16string_view src); UnicodeString& doReverse(int32_t start, int32_t length); @@ -3711,7 +4001,7 @@ private: * Must be at least U16_MAX_LENGTH for the single-code point constructor to work. * @see UNISTR_OBJECT_SIZE */ - US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR, + US_STACKBUF_SIZE = static_cast<int32_t>(UNISTR_OBJECT_SIZE - sizeof(void*) - 2) / U_SIZEOF_UCHAR, kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index) kInvalidHashCode=0, // invalid hash code kEmptyHashCode=1, // hash code for empty string @@ -3802,7 +4092,7 @@ private: }; /** - * Create a new UnicodeString with the concatenation of two others. + * Creates a new UnicodeString from the concatenation of two others. * * @param s1 The first string to be copied to the new one. * @param s2 The second string to be copied to the new one, after s1. @@ -3812,6 +4102,29 @@ private: U_COMMON_API UnicodeString U_EXPORT2 operator+ (const UnicodeString &s1, const UnicodeString &s2); +#ifndef U_HIDE_DRAFT_API +/** + * Creates a new UnicodeString from the concatenation of a UnicodeString and `s2` + * which is, or which is implicitly convertible to, + * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view. + * + * @param s1 The string to be copied to the new one. + * @param s2 The string view to be copied to the new string, after s1. + * @return UnicodeString(s1).append(s2) + * @draft ICU 76 + */ +template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>> +inline UnicodeString operator+(const UnicodeString &s1, const S &s2) { + return unistr_internalConcat(s1, internal::toU16StringView(s2)); +} +#endif // U_HIDE_DRAFT_API + +#ifndef U_FORCE_HIDE_INTERNAL_API +/** @internal */ +U_COMMON_API UnicodeString U_EXPORT2 +unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2); +#endif + //======================================== // Inline members //======================================== @@ -3914,18 +4227,18 @@ UnicodeString::hashCode() const inline UBool UnicodeString::isBogus() const -{ return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); } +{ return fUnion.fFields.fLengthAndFlags & kIsBogus; } inline UBool UnicodeString::isWritable() const -{ return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); } +{ return !(fUnion.fFields.fLengthAndFlags & (kOpenGetBuffer | kIsBogus)); } inline UBool UnicodeString::isBufferWritable() const { - return (UBool)( + return !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && - (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1)); + (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1); } inline const char16_t * @@ -3950,7 +4263,7 @@ UnicodeString::doCompare(int32_t start, int32_t srcLength) const { if(srcText.isBogus()) { - return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise + return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise } else { srcText.pinIndices(srcStart, srcLength); return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); @@ -4057,7 +4370,7 @@ UnicodeString::doCompareCodePointOrder(int32_t start, int32_t srcLength) const { if(srcText.isBogus()) { - return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise + return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise } else { srcText.pinIndices(srcStart, srcLength); return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); @@ -4119,7 +4432,7 @@ UnicodeString::doCaseCompare(int32_t start, uint32_t options) const { if(srcText.isBogus()) { - return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise + return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise } else { srcText.pinIndices(srcStart, srcLength); return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); @@ -4531,7 +4844,7 @@ UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { inline char16_t UnicodeString::doCharAt(int32_t offset) const { - if((uint32_t)offset < (uint32_t)length()) { + if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(length())) { return getArrayStart()[offset]; } else { return kInvalidUChar; @@ -4564,7 +4877,7 @@ inline void UnicodeString::setShortLength(int32_t len) { // requires 0 <= len <= kMaxShortLength fUnion.fFields.fLengthAndFlags = - (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift)); + static_cast<int16_t>((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift)); } inline void @@ -4758,7 +5071,7 @@ UnicodeString::truncate(int32_t targetLength) // truncate(0) of a bogus string makes the string empty and non-bogus unBogus(); return false; - } else if((uint32_t)targetLength < (uint32_t)length()) { + } else if (static_cast<uint32_t>(targetLength) < static_cast<uint32_t>(length())) { setLength(targetLength); return true; } else { diff --git a/thirdparty/icu4c/common/unicode/unorm2.h b/thirdparty/icu4c/common/unicode/unorm2.h index 3844041f17..48f614d74f 100644 --- a/thirdparty/icu4c/common/unicode/unorm2.h +++ b/thirdparty/icu4c/common/unicode/unorm2.h @@ -197,7 +197,6 @@ unorm2_getNFKDInstance(UErrorCode *pErrorCode); U_CAPI const UNormalizer2 * U_EXPORT2 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode); -#ifndef U_HIDE_DRAFT_API /** * Returns a UNormalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization * which is equivalent to applying the NFKC_Simple_Casefold mappings and then NFC. @@ -210,11 +209,10 @@ unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode); * immediately. Check for U_FAILURE() on output or use with * function chaining. (See User Guide for details.) * @return the requested Normalizer2, if successful - * @draft ICU 74 + * @stable ICU 74 */ U_CAPI const UNormalizer2 * U_EXPORT2 unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode); -#endif // U_HIDE_DRAFT_API /** * Returns a UNormalizer2 instance which uses the specified data file diff --git a/thirdparty/icu4c/common/unicode/urename.h b/thirdparty/icu4c/common/unicode/urename.h index c94592ec03..e39c779e81 100644 --- a/thirdparty/icu4c/common/unicode/urename.h +++ b/thirdparty/icu4c/common/unicode/urename.h @@ -490,6 +490,7 @@ #define ubiditransform_close U_ICU_ENTRY_POINT_RENAME(ubiditransform_close) #define ubiditransform_open U_ICU_ENTRY_POINT_RENAME(ubiditransform_open) #define ubiditransform_transform U_ICU_ENTRY_POINT_RENAME(ubiditransform_transform) +#define ublock_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ublock_addPropertyStarts) #define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode) #define ubrk_clone U_ICU_ENTRY_POINT_RENAME(ubrk_clone) #define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close) @@ -991,6 +992,7 @@ #define uhash_compareCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareCaselessUnicodeString) #define uhash_compareChars U_ICU_ENTRY_POINT_RENAME(uhash_compareChars) #define uhash_compareIChars U_ICU_ENTRY_POINT_RENAME(uhash_compareIChars) +#define uhash_compareIStringView U_ICU_ENTRY_POINT_RENAME(uhash_compareIStringView) #define uhash_compareLong U_ICU_ENTRY_POINT_RENAME(uhash_compareLong) #define uhash_compareScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_compareScriptSet) #define uhash_compareUChars U_ICU_ENTRY_POINT_RENAME(uhash_compareUChars) @@ -1008,6 +1010,7 @@ #define uhash_hashCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashCaselessUnicodeString) #define uhash_hashChars U_ICU_ENTRY_POINT_RENAME(uhash_hashChars) #define uhash_hashIChars U_ICU_ENTRY_POINT_RENAME(uhash_hashIChars) +#define uhash_hashIStringView U_ICU_ENTRY_POINT_RENAME(uhash_hashIStringView) #define uhash_hashLong U_ICU_ENTRY_POINT_RENAME(uhash_hashLong) #define uhash_hashScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_hashScriptSet) #define uhash_hashUChars U_ICU_ENTRY_POINT_RENAME(uhash_hashUChars) @@ -1209,10 +1212,14 @@ #define ulocimp_minimizeSubtags U_ICU_ENTRY_POINT_RENAME(ulocimp_minimizeSubtags) #define ulocimp_setKeywordValue U_ICU_ENTRY_POINT_RENAME(ulocimp_setKeywordValue) #define ulocimp_toBcpKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpKey) +#define ulocimp_toBcpKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpKeyWithFallback) #define ulocimp_toBcpType U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpType) +#define ulocimp_toBcpTypeWithFallback U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpTypeWithFallback) #define ulocimp_toLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocimp_toLanguageTag) #define ulocimp_toLegacyKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyKey) +#define ulocimp_toLegacyKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyKeyWithFallback) #define ulocimp_toLegacyType U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyType) +#define ulocimp_toLegacyTypeWithFallback U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyTypeWithFallback) #define ultag_getTKeyStart U_ICU_ENTRY_POINT_RENAME(ultag_getTKeyStart) #define ultag_isExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isExtensionSubtags) #define ultag_isLanguageSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isLanguageSubtag) @@ -1773,6 +1780,8 @@ #define uset_getSerializedRange U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRange) #define uset_getSerializedRangeCount U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRangeCount) #define uset_getSerializedSet U_ICU_ENTRY_POINT_RENAME(uset_getSerializedSet) +#define uset_getString U_ICU_ENTRY_POINT_RENAME(uset_getString) +#define uset_getStringCount U_ICU_ENTRY_POINT_RENAME(uset_getStringCount) #define uset_hasStrings U_ICU_ENTRY_POINT_RENAME(uset_hasStrings) #define uset_indexOf U_ICU_ENTRY_POINT_RENAME(uset_indexOf) #define uset_isEmpty U_ICU_ENTRY_POINT_RENAME(uset_isEmpty) @@ -1811,7 +1820,6 @@ #define usnum_setMinimumIntegerDigits U_ICU_ENTRY_POINT_RENAME(usnum_setMinimumIntegerDigits) #define usnum_setSign U_ICU_ENTRY_POINT_RENAME(usnum_setSign) #define usnum_setToInt64 U_ICU_ENTRY_POINT_RENAME(usnum_setToInt64) -#define usnum_truncateStart U_ICU_ENTRY_POINT_RENAME(usnum_truncateStart) #define usnumf_close U_ICU_ENTRY_POINT_RENAME(usnumf_close) #define usnumf_format U_ICU_ENTRY_POINT_RENAME(usnumf_format) #define usnumf_formatInt64 U_ICU_ENTRY_POINT_RENAME(usnumf_formatInt64) diff --git a/thirdparty/icu4c/common/unicode/uscript.h b/thirdparty/icu4c/common/unicode/uscript.h index 2c9cee1bfa..d983c40901 100644 --- a/thirdparty/icu4c/common/unicode/uscript.h +++ b/thirdparty/icu4c/common/unicode/uscript.h @@ -503,6 +503,21 @@ typedef enum UScriptCode { /** @stable ICU 75 */ USCRIPT_ARABIC_NASTALIQ = 200, /* Aran */ + /** @stable ICU 76 */ + USCRIPT_GARAY = 201, /* Gara */ + /** @stable ICU 76 */ + USCRIPT_GURUNG_KHEMA = 202, /* Gukh */ + /** @stable ICU 76 */ + USCRIPT_KIRAT_RAI = 203, /* Krai */ + /** @stable ICU 76 */ + USCRIPT_OL_ONAL = 204, /* Onao */ + /** @stable ICU 76 */ + USCRIPT_SUNUWAR = 205, /* Sunu */ + /** @stable ICU 76 */ + USCRIPT_TODHRI = 206, /* Todr */ + /** @stable ICU 76 */ + USCRIPT_TULU_TIGALARI = 207, /* Tutg */ + #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal UScriptCode value. @@ -510,7 +525,7 @@ typedef enum UScriptCode { * * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - USCRIPT_CODE_LIMIT = 201 + USCRIPT_CODE_LIMIT = 208 #endif // U_HIDE_DEPRECATED_API } UScriptCode; diff --git a/thirdparty/icu4c/common/unicode/uset.h b/thirdparty/icu4c/common/unicode/uset.h index cce95ce921..c8f9b5592d 100644 --- a/thirdparty/icu4c/common/unicode/uset.h +++ b/thirdparty/icu4c/common/unicode/uset.h @@ -33,7 +33,10 @@ #include "unicode/uchar.h" #if U_SHOW_CPLUSPLUS_API +#include <string_view> +#include "unicode/char16ptr.h" #include "unicode/localpointer.h" +#include "unicode/unistr.h" #endif // U_SHOW_CPLUSPLUS_API #ifndef USET_DEFINED @@ -955,7 +958,7 @@ uset_charAt(const USet* set, int32_t charIndex); /** * Returns the number of characters and strings contained in this set. - * The last (uset_getItemCount() - uset_getRangeCount()) items are strings. + * The last uset_getStringCount() == (uset_getItemCount() - uset_getRangeCount()) items are strings. * * This is slower than uset_getRangeCount() and uset_getItemCount() because * it counts the code points of all ranges. @@ -965,6 +968,8 @@ uset_charAt(const USet* set, int32_t charIndex); * contained in set * @stable ICU 2.4 * @see uset_getRangeCount + * @see uset_getStringCount + * @see uset_getItemCount */ U_CAPI int32_t U_EXPORT2 uset_size(const USet* set); @@ -975,11 +980,42 @@ uset_size(const USet* set); * @stable ICU 70 * @see uset_getItemCount * @see uset_getItem + * @see uset_getStringCount * @see uset_size */ U_CAPI int32_t U_EXPORT2 uset_getRangeCount(const USet *set); +#ifndef U_HIDE_DRAFT_API + +/** + * @param set the set + * @return the number of strings in this set. + * @draft ICU 76 + * @see uset_getRangeCount + * @see uset_getItemCount + * @see uset_size + */ +U_CAPI int32_t U_EXPORT2 +uset_getStringCount(const USet *set); + +/** + * Returns the index-th string (empty or multi-character) in the set. + * The string may not be NUL-terminated. + * The output length must be used, and the caller must not read more than that many UChars. + * + * @param set the set + * @param index the string index, 0 .. uset_getStringCount() - 1 + * @param pLength the output string length; must not be NULL + * @return the pointer to the string; NULL if the index is out of range or pLength is NULL + * @draft ICU 76 + * @see uset_getStringCount + */ +U_CAPI const UChar* U_EXPORT2 +uset_getString(const USet *set, int32_t index, int32_t *pLength); + +#endif // U_HIDE_DRAFT_API + /** * Returns the number of items in this set. An item is either a range * of characters or a single multicharacter string. @@ -987,6 +1023,8 @@ uset_getRangeCount(const USet *set); * @return a non-negative integer counting the character ranges * and/or strings contained in set * @stable ICU 2.4 + * @see uset_getRangeCount + * @see uset_getStringCount */ U_CAPI int32_t U_EXPORT2 uset_getItemCount(const USet* set); @@ -1001,6 +1039,7 @@ uset_getItemCount(const USet* set); * If <code>itemIndex</code> is at least uset_getRangeCount() and less than uset_getItemCount(), then * this function copies the string into <code>str[strCapacity]</code> and * returns the length of the string (0 for the empty string). + * See uset_getString() for a function that does not copy the string contents. * * If <code>itemIndex</code> is out of range, then this function returns -1. * @@ -1018,6 +1057,7 @@ uset_getItemCount(const USet* set); * @return the length of the string (0 or >= 2), or 0 if the item is a range, * or -1 if the itemIndex is out of range * @stable ICU 2.4 + * @see uset_getString */ U_CAPI int32_t U_EXPORT2 uset_getItem(const USet* set, int32_t itemIndex, @@ -1285,4 +1325,574 @@ U_CAPI UBool U_EXPORT2 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex, UChar32* pStart, UChar32* pEnd); -#endif +#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API +#ifndef U_HIDE_DRAFT_API + +namespace U_HEADER_ONLY_NAMESPACE { + +// Note: Not U_COMMON_API, and not a subclass of UMemory, because this is a header-only class, +// not intended to be used via export from the ICU DLL. + +/** + * Iterator returned by USetCodePoints. + * @draft ICU 76 + */ +class USetCodePointIterator { +public: + /** @draft ICU 76 */ + USetCodePointIterator(const USetCodePointIterator &other) = default; + + /** @draft ICU 76 */ + bool operator==(const USetCodePointIterator &other) const { + // No need to compare rangeCount & end given private constructor + // and assuming we don't compare iterators across the set being modified. + // And comparing rangeIndex is redundant with comparing c. + // We might even skip comparing uset. + // Unless we want operator==() to be "correct" for more than iteration. + return uset == other.uset && c == other.c; + } + + /** @draft ICU 76 */ + bool operator!=(const USetCodePointIterator &other) const { return !operator==(other); } + + /** @draft ICU 76 */ + UChar32 operator*() const { return c; } + + /** + * Pre-increment. + * @draft ICU 76 + */ + USetCodePointIterator &operator++() { + if (c < end) { + ++c; + } else if (rangeIndex < rangeCount) { + UErrorCode errorCode = U_ZERO_ERROR; + int32_t result = uset_getItem(uset, rangeIndex, &c, &end, nullptr, 0, &errorCode); + if (U_SUCCESS(errorCode) && result == 0) { + ++rangeIndex; + } else { + c = end = U_SENTINEL; + } + } else { + c = end = U_SENTINEL; + } + return *this; + } + + /** + * Post-increment. + * @draft ICU 76 + */ + USetCodePointIterator operator++(int) { + USetCodePointIterator result(*this); + operator++(); + return result; + } + +private: + friend class USetCodePoints; + + USetCodePointIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount) + : uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount), + c(U_SENTINEL), end(U_SENTINEL) { + // Fetch the first range. + operator++(); + } + + const USet *uset; + int32_t rangeIndex; + int32_t rangeCount; + UChar32 c, end; +}; + +/** + * C++ "range" for iterating over the code points of a USet. + * + * \code + * using U_HEADER_NESTED_NAMESPACE::USetCodePoints; + * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴]", -1, &errorCode)); + * for (UChar32 c : USetCodePoints(uset.getAlias())) { + * printf("uset.codePoint U+%04lx\n", (long)c); + * } + * \endcode + * + * C++ UnicodeSet has member functions for iteration, including codePoints(). + * + * @draft ICU 76 + * @see USetRanges + * @see USetStrings + * @see USetElements + */ +class USetCodePoints { +public: + /** + * Constructs a C++ "range" object over the code points of the USet. + * @draft ICU 76 + */ + USetCodePoints(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {} + + /** @draft ICU 76 */ + USetCodePoints(const USetCodePoints &other) = default; + + /** @draft ICU 76 */ + USetCodePointIterator begin() const { + return USetCodePointIterator(uset, 0, rangeCount); + } + + /** @draft ICU 76 */ + USetCodePointIterator end() const { + return USetCodePointIterator(uset, rangeCount, rangeCount); + } + +private: + const USet *uset; + int32_t rangeCount; +}; + +/** + * A contiguous range of code points in a USet/UnicodeSet. + * Returned by USetRangeIterator which is returned by USetRanges. + * Both the rangeStart and rangeEnd are in the range. + * (end() returns an iterator corresponding to rangeEnd+1.) + * @draft ICU 76 + */ +struct CodePointRange { + /** @draft ICU 76 */ + struct iterator { + /** @draft ICU 76 */ + iterator(UChar32 c) : c(c) {} + + /** @draft ICU 76 */ + bool operator==(const iterator &other) const { return c == other.c; } + /** @draft ICU 76 */ + bool operator!=(const iterator &other) const { return !operator==(other); } + + /** @draft ICU 76 */ + UChar32 operator*() const { return c; } + + /** + * Pre-increment. + * @draft ICU 76 + */ + iterator &operator++() { + ++c; + return *this; + } + + /** + * Post-increment. + * @draft ICU 76 + */ + iterator operator++(int) { + return c++; + } + + /** + * The current code point in the range. + * @draft ICU 76 + */ + UChar32 c; + }; + + /** @draft ICU 76 */ + CodePointRange(UChar32 start, UChar32 end) : rangeStart(start), rangeEnd(end) {} + /** @draft ICU 76 */ + CodePointRange(const CodePointRange &other) = default; + /** @draft ICU 76 */ + size_t size() const { return (rangeEnd + 1) - rangeStart; } + /** @draft ICU 76 */ + iterator begin() const { return rangeStart; } + /** @draft ICU 76 */ + iterator end() const { return rangeEnd + 1; } + + /** + * Start of a USet/UnicodeSet range of code points. + * @draft ICU 76 + */ + UChar32 rangeStart; + /** + * Inclusive end of a USet/UnicodeSet range of code points. + * @draft ICU 76 + */ + UChar32 rangeEnd; +}; + +/** + * Iterator returned by USetRanges. + * @draft ICU 76 + */ +class USetRangeIterator { +public: + /** @draft ICU 76 */ + USetRangeIterator(const USetRangeIterator &other) = default; + + /** @draft ICU 76 */ + bool operator==(const USetRangeIterator &other) const { + // No need to compare rangeCount given private constructor + // and assuming we don't compare iterators across the set being modified. + // We might even skip comparing uset. + // Unless we want operator==() to be "correct" for more than iteration. + return uset == other.uset && rangeIndex == other.rangeIndex; + } + + /** @draft ICU 76 */ + bool operator!=(const USetRangeIterator &other) const { return !operator==(other); } + + /** @draft ICU 76 */ + CodePointRange operator*() const { + if (rangeIndex < rangeCount) { + UChar32 start, end; + UErrorCode errorCode = U_ZERO_ERROR; + int32_t result = uset_getItem(uset, rangeIndex, &start, &end, nullptr, 0, &errorCode); + if (U_SUCCESS(errorCode) && result == 0) { + return CodePointRange(start, end); + } + } + return CodePointRange(U_SENTINEL, U_SENTINEL); + } + + /** + * Pre-increment. + * @draft ICU 76 + */ + USetRangeIterator &operator++() { + ++rangeIndex; + return *this; + } + + /** + * Post-increment. + * @draft ICU 76 + */ + USetRangeIterator operator++(int) { + USetRangeIterator result(*this); + ++rangeIndex; + return result; + } + +private: + friend class USetRanges; + + USetRangeIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount) + : uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount) {} + + const USet *uset; + int32_t rangeIndex; + int32_t rangeCount; +}; + +/** + * C++ "range" for iterating over the code point ranges of a USet. + * + * \code + * using U_HEADER_NESTED_NAMESPACE::USetRanges; + * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴]", -1, &errorCode)); + * for (auto [start, end] : USetRanges(uset.getAlias())) { + * printf("uset.range U+%04lx..U+%04lx\n", (long)start, (long)end); + * } + * for (auto range : USetRanges(uset.getAlias())) { + * for (UChar32 c : range) { + * printf("uset.range.c U+%04lx\n", (long)c); + * } + * } + * \endcode + * + * C++ UnicodeSet has member functions for iteration, including ranges(). + * + * @draft ICU 76 + * @see USetCodePoints + * @see USetStrings + * @see USetElements + */ +class USetRanges { +public: + /** + * Constructs a C++ "range" object over the code point ranges of the USet. + * @draft ICU 76 + */ + USetRanges(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {} + + /** @draft ICU 76 */ + USetRanges(const USetRanges &other) = default; + + /** @draft ICU 76 */ + USetRangeIterator begin() const { + return USetRangeIterator(uset, 0, rangeCount); + } + + /** @draft ICU 76 */ + USetRangeIterator end() const { + return USetRangeIterator(uset, rangeCount, rangeCount); + } + +private: + const USet *uset; + int32_t rangeCount; +}; + +/** + * Iterator returned by USetStrings. + * @draft ICU 76 + */ +class USetStringIterator { +public: + /** @draft ICU 76 */ + USetStringIterator(const USetStringIterator &other) = default; + + /** @draft ICU 76 */ + bool operator==(const USetStringIterator &other) const { + // No need to compare count given private constructor + // and assuming we don't compare iterators across the set being modified. + // We might even skip comparing uset. + // Unless we want operator==() to be "correct" for more than iteration. + return uset == other.uset && index == other.index; + } + + /** @draft ICU 76 */ + bool operator!=(const USetStringIterator &other) const { return !operator==(other); } + + /** @draft ICU 76 */ + std::u16string_view operator*() const { + if (index < count) { + int32_t length; + const UChar *uchars = uset_getString(uset, index, &length); + // assert uchars != nullptr; + return {ConstChar16Ptr(uchars), static_cast<uint32_t>(length)}; + } + return {}; + } + + /** + * Pre-increment. + * @draft ICU 76 + */ + USetStringIterator &operator++() { + ++index; + return *this; + } + + /** + * Post-increment. + * @draft ICU 76 + */ + USetStringIterator operator++(int) { + USetStringIterator result(*this); + ++index; + return result; + } + +private: + friend class USetStrings; + + USetStringIterator(const USet *uset, int32_t index, int32_t count) + : uset(uset), index(index), count(count) {} + + const USet *uset; + int32_t index; + int32_t count; +}; + +/** + * C++ "range" for iterating over the empty and multi-character strings of a USet. + * + * \code + * using U_HEADER_NESTED_NAMESPACE::USetStrings; + * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode)); + * for (auto s : USetStrings(uset.getAlias())) { + * UnicodeString us(s); + * std::string u8; + * printf("uset.string length %ld \"%s\"\n", (long)s.length(), us.toUTF8String(u8).c_str()); + * } + * \endcode + * + * C++ UnicodeSet has member functions for iteration, including strings(). + * + * @draft ICU 76 + * @see USetCodePoints + * @see USetRanges + * @see USetElements + */ +class USetStrings { +public: + /** + * Constructs a C++ "range" object over the strings of the USet. + * @draft ICU 76 + */ + USetStrings(const USet *uset) : uset(uset), count(uset_getStringCount(uset)) {} + + /** @draft ICU 76 */ + USetStrings(const USetStrings &other) = default; + + /** @draft ICU 76 */ + USetStringIterator begin() const { + return USetStringIterator(uset, 0, count); + } + + /** @draft ICU 76 */ + USetStringIterator end() const { + return USetStringIterator(uset, count, count); + } + +private: + const USet *uset; + int32_t count; +}; + +/** + * Iterator returned by USetElements. + * @draft ICU 76 + */ +class USetElementIterator { +public: + /** @draft ICU 76 */ + USetElementIterator(const USetElementIterator &other) = default; + + /** @draft ICU 76 */ + bool operator==(const USetElementIterator &other) const { + // No need to compare rangeCount & end given private constructor + // and assuming we don't compare iterators across the set being modified. + // We might even skip comparing uset. + // Unless we want operator==() to be "correct" for more than iteration. + return uset == other.uset && c == other.c && index == other.index; + } + + /** @draft ICU 76 */ + bool operator!=(const USetElementIterator &other) const { return !operator==(other); } + + /** @draft ICU 76 */ + UnicodeString operator*() const { + if (c >= 0) { + return UnicodeString(c); + } else if (index < totalCount) { + int32_t length; + const UChar *uchars = uset_getString(uset, index - rangeCount, &length); + // assert uchars != nullptr; + return UnicodeString(uchars, length); + } else { + return UnicodeString(); + } + } + + /** + * Pre-increment. + * @draft ICU 76 + */ + USetElementIterator &operator++() { + if (c < end) { + ++c; + } else if (index < rangeCount) { + UErrorCode errorCode = U_ZERO_ERROR; + int32_t result = uset_getItem(uset, index, &c, &end, nullptr, 0, &errorCode); + if (U_SUCCESS(errorCode) && result == 0) { + ++index; + } else { + c = end = U_SENTINEL; + } + } else if (c >= 0) { + // assert index == rangeCount; + // Switch from the last range to the first string. + c = end = U_SENTINEL; + } else { + ++index; + } + return *this; + } + + /** + * Post-increment. + * @draft ICU 76 + */ + USetElementIterator operator++(int) { + USetElementIterator result(*this); + operator++(); + return result; + } + +private: + friend class USetElements; + + USetElementIterator(const USet *uset, int32_t index, int32_t rangeCount, int32_t totalCount) + : uset(uset), index(index), rangeCount(rangeCount), totalCount(totalCount), + c(U_SENTINEL), end(U_SENTINEL) { + if (index < rangeCount) { + // Fetch the first range. + operator++(); + } + // Otherwise don't move beyond the (index - rangeCount)-th string. + } + + const USet *uset; + int32_t index; + /** Number of UnicodeSet/USet code point ranges. */ + int32_t rangeCount; + /** + * Number of code point ranges plus number of strings. + * index starts from 0, counts ranges while less than rangeCount, + * then counts strings while at least rangeCount and less than totalCount. + * + * Note that totalCount is the same as uset_getItemCount(), but usually + * smaller than the number of elements returned by this iterator + * because we return each code point of each range. + */ + int32_t totalCount; + UChar32 c, end; +}; + +/** + * A C++ "range" for iterating over all of the elements of a USet. + * Convenient all-in one iteration, but creates a UnicodeString for each + * code point or string. + * + * Code points are returned first, then empty and multi-character strings. + * + * \code + * using U_HEADER_NESTED_NAMESPACE::USetElements; + * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode)); + * for (auto el : USetElements(uset.getAlias())) { + * std::string u8; + * printf("uset.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str()); + * } + * \endcode + * + * C++ UnicodeSet has member functions for iteration, including begin() and end(). + * + * @return an all-elements iterator. + * @draft ICU 76 + * @see USetCodePoints + * @see USetRanges + * @see USetStrings + */ +class USetElements { +public: + /** + * Constructs a C++ "range" object over all of the elements of the USet. + * @draft ICU 76 + */ + USetElements(const USet *uset) + : uset(uset), rangeCount(uset_getRangeCount(uset)), + stringCount(uset_getStringCount(uset)) {} + + /** @draft ICU 76 */ + USetElements(const USetElements &other) = default; + + /** @draft ICU 76 */ + USetElementIterator begin() const { + return USetElementIterator(uset, 0, rangeCount, rangeCount + stringCount); + } + + /** @draft ICU 76 */ + USetElementIterator end() const { + return USetElementIterator(uset, rangeCount + stringCount, rangeCount, rangeCount + stringCount); + } + +private: + const USet *uset; + int32_t rangeCount, stringCount; +}; + +} // namespace U_HEADER_ONLY_NAMESPACE + +#endif // U_HIDE_DRAFT_API +#endif // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API + +#endif // __USET_H__ diff --git a/thirdparty/icu4c/common/unicode/ustring.h b/thirdparty/icu4c/common/unicode/ustring.h index 03c697c722..a3d08254d6 100644 --- a/thirdparty/icu4c/common/unicode/ustring.h +++ b/thirdparty/icu4c/common/unicode/ustring.h @@ -1107,8 +1107,8 @@ u_strToLower(UChar *dest, int32_t destCapacity, * styles, using rules and dictionaries beyond the standard iterators. * It may be more efficient to always provide an iterator to avoid * opening and closing one for each string. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. + * If the break iterator passed in is null, the default Unicode algorithm + * will be used to determine the titlecase positions. * * This function uses only the setText(), first() and next() methods of the * provided break iterator. diff --git a/thirdparty/icu4c/common/unicode/utypes.h b/thirdparty/icu4c/common/unicode/utypes.h index 6c7cc51345..0151ebd470 100644 --- a/thirdparty/icu4c/common/unicode/utypes.h +++ b/thirdparty/icu4c/common/unicode/utypes.h @@ -54,22 +54,37 @@ * integer and other types. */ +/** @{ API visibility control */ /** * \def U_SHOW_CPLUSPLUS_API + * When defined to 1 (=default) and compiled with a C++ compiler, both C and C++ APIs are visible. + * Otherwise, only C APIs are visible; this is for C++ users who want to + * restrict their usage to binary stable C APIs exported by ICU DLLs. + * @internal + */ +/** + * \def U_SHOW_CPLUSPLUS_HEADER_API + * When defined to 1 (=default) and compiled with a C++ compiler, C++ header-only APIs are visible. + * This is for C++ users who restrict their usage to binary stable C APIs exported by ICU DLLs + * (U_SHOW_CPLUSPLUS_API=0) + * but who still want to use C++ header-only APIs which do not rely on ICU DLL exports. * @internal */ #ifdef __cplusplus # ifndef U_SHOW_CPLUSPLUS_API # define U_SHOW_CPLUSPLUS_API 1 # endif +# ifndef U_SHOW_CPLUSPLUS_HEADER_API +# define U_SHOW_CPLUSPLUS_HEADER_API 1 +# endif #else # undef U_SHOW_CPLUSPLUS_API # define U_SHOW_CPLUSPLUS_API 0 +# undef U_SHOW_CPLUSPLUS_HEADER_API +# define U_SHOW_CPLUSPLUS_HEADER_API 0 #endif -/** @{ API visibility control */ - /** * \def U_HIDE_DRAFT_API * Define this to 1 to request that draft API be "hidden" @@ -582,14 +597,13 @@ typedef enum UErrorCode { U_MF_MISSING_SELECTOR_ANNOTATION_ERROR, /**< A selector expression evaluates to an unannotated operand. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */ U_MF_DUPLICATE_DECLARATION_ERROR, /**< The same variable is declared in more than one .local or .input declaration. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */ U_MF_OPERAND_MISMATCH_ERROR, /**< An operand provided to a function does not have the required form for that function @internal ICU 75 technology preview @deprecated This API is for technology preview only. */ - U_MF_UNSUPPORTED_STATEMENT_ERROR, /**< A message includes a reserved statement. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */ - U_MF_UNSUPPORTED_EXPRESSION_ERROR, /**< A message includes syntax reserved for future standardization or private implementation use. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */ + U_MF_DUPLICATE_VARIANT_ERROR, /**< A message includes a variant with the same key list as another variant. @internal ICU 76 technology preview @deprecated This API is for technology preview only. */ #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal formatting API error code. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - U_FMT_PARSE_ERROR_LIMIT = 0x10121, + U_FMT_PARSE_ERROR_LIMIT = 0x10120, #endif // U_HIDE_DEPRECATED_API /* @@ -713,13 +727,13 @@ typedef enum UErrorCode { * @stable ICU 2.0 */ static - inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); } + inline UBool U_SUCCESS(UErrorCode code) { return code <= U_ZERO_ERROR; } /** * Does the error code indicate a failure? * @stable ICU 2.0 */ static - inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); } + inline UBool U_FAILURE(UErrorCode code) { return code > U_ZERO_ERROR; } #else /** * Does the error code indicate success? diff --git a/thirdparty/icu4c/common/unicode/uvernum.h b/thirdparty/icu4c/common/unicode/uvernum.h index eb7c50f385..a3cb882623 100644 --- a/thirdparty/icu4c/common/unicode/uvernum.h +++ b/thirdparty/icu4c/common/unicode/uvernum.h @@ -53,7 +53,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION_MAJOR_NUM 75 +#define U_ICU_VERSION_MAJOR_NUM 76 /** The current ICU minor version as an integer. * This value will change in the subsequent releases of ICU @@ -79,7 +79,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_SUFFIX _75 +#define U_ICU_VERSION_SUFFIX _76 /** * \def U_DEF2_ICU_ENTRY_POINT_RENAME @@ -132,7 +132,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION "75.1" +#define U_ICU_VERSION "76.1" /** * The current ICU library major version number as a string, for library name suffixes. @@ -145,13 +145,13 @@ * * @stable ICU 2.6 */ -#define U_ICU_VERSION_SHORT "75" +#define U_ICU_VERSION_SHORT "76" #ifndef U_HIDE_INTERNAL_API /** Data version in ICU4C. * @internal ICU 4.4 Internal Use Only **/ -#define U_ICU_DATA_VERSION "75.1" +#define U_ICU_DATA_VERSION "76.1" #endif /* U_HIDE_INTERNAL_API */ /*=========================================================================== diff --git a/thirdparty/icu4c/common/unicode/uversion.h b/thirdparty/icu4c/common/unicode/uversion.h index 113568df8c..25d73a3aeb 100644 --- a/thirdparty/icu4c/common/unicode/uversion.h +++ b/thirdparty/icu4c/common/unicode/uversion.h @@ -124,6 +124,49 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; # if U_USING_ICU_NAMESPACE U_NAMESPACE_USE # endif + +#ifndef U_HIDE_DRAFT_API +/** + * \def U_HEADER_NESTED_NAMESPACE + * Nested namespace used inside U_ICU_NAMESPACE for header-only APIs. + * Different when used inside ICU to prevent public use of internal instantiations: + * "header" when compiling calling code; "internal" when compiling ICU library code. + * + * When compiling for Windows, where DLL exports of APIs are explicit, + * this is always "header". Header-only types are not marked for export, + * which on Windows already avoids callers linking with library instantiations. + * + * @draft ICU 76 + * @see U_HEADER_ONLY_NAMESPACE + */ + +/** + * \def U_HEADER_ONLY_NAMESPACE + * Namespace used for header-only APIs. + * Different when used inside ICU to prevent public use of internal instantiations. + * "U_ICU_NAMESPACE::header" or "U_ICU_NAMESPACE::internal", + * see U_HEADER_NESTED_NAMESPACE for details. + * + * @draft ICU 76 + */ + +// The first test is the same as for defining U_EXPORT for Windows. +#if defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllexport__) && \ + UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllimport__)) +# define U_HEADER_NESTED_NAMESPACE header +#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ + defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \ + defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION) +# define U_HEADER_NESTED_NAMESPACE internal +#else +# define U_HEADER_NESTED_NAMESPACE header +#endif + +#define U_HEADER_ONLY_NAMESPACE U_ICU_NAMESPACE::U_HEADER_NESTED_NAMESPACE + +namespace U_HEADER_ONLY_NAMESPACE {} +#endif // U_HIDE_DRAFT_API + #endif /* __cplusplus */ /*===========================================================================*/ |