summaryrefslogtreecommitdiffstats
path: root/thirdparty/icu4c/common/unicode
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/icu4c/common/unicode')
-rw-r--r--thirdparty/icu4c/common/unicode/brkiter.h2
-rw-r--r--thirdparty/icu4c/common/unicode/bytestream.h2
-rw-r--r--thirdparty/icu4c/common/unicode/bytestrie.h4
-rw-r--r--thirdparty/icu4c/common/unicode/char16ptr.h71
-rw-r--r--thirdparty/icu4c/common/unicode/docmain.h5
-rw-r--r--thirdparty/icu4c/common/unicode/edits.h2
-rw-r--r--thirdparty/icu4c/common/unicode/idna.h3
-rw-r--r--thirdparty/icu4c/common/unicode/locid.h4
-rw-r--r--thirdparty/icu4c/common/unicode/messagepattern.h3
-rw-r--r--thirdparty/icu4c/common/unicode/normalizer2.h4
-rw-r--r--thirdparty/icu4c/common/unicode/platform.h28
-rw-r--r--thirdparty/icu4c/common/unicode/simpleformatter.h2
-rw-r--r--thirdparty/icu4c/common/unicode/stringpiece.h11
-rw-r--r--thirdparty/icu4c/common/unicode/stringtriebuilder.h4
-rw-r--r--thirdparty/icu4c/common/unicode/ucasemap.h8
-rw-r--r--thirdparty/icu4c/common/unicode/uchar.h82
-rw-r--r--thirdparty/icu4c/common/unicode/ucharstrie.h4
-rw-r--r--thirdparty/icu4c/common/unicode/uidna.h22
-rw-r--r--thirdparty/icu4c/common/unicode/ulocale.h37
-rw-r--r--thirdparty/icu4c/common/unicode/ulocbuilder.h50
-rw-r--r--thirdparty/icu4c/common/unicode/uniset.h118
-rw-r--r--thirdparty/icu4c/common/unicode/unistr.h407
-rw-r--r--thirdparty/icu4c/common/unicode/unorm2.h4
-rw-r--r--thirdparty/icu4c/common/unicode/urename.h10
-rw-r--r--thirdparty/icu4c/common/unicode/uscript.h17
-rw-r--r--thirdparty/icu4c/common/unicode/uset.h614
-rw-r--r--thirdparty/icu4c/common/unicode/ustring.h4
-rw-r--r--thirdparty/icu4c/common/unicode/utypes.h28
-rw-r--r--thirdparty/icu4c/common/unicode/uvernum.h10
-rw-r--r--thirdparty/icu4c/common/unicode/uversion.h43
30 files changed, 1447 insertions, 156 deletions
diff --git a/thirdparty/icu4c/common/unicode/brkiter.h b/thirdparty/icu4c/common/unicode/brkiter.h
index bd96629285..30c59c4a94 100644
--- a/thirdparty/icu4c/common/unicode/brkiter.h
+++ b/thirdparty/icu4c/common/unicode/brkiter.h
@@ -219,7 +219,7 @@ public:
* boundaries have been returned.
* @stable ICU 2.0
*/
- DONE = (int32_t)-1
+ DONE = static_cast<int32_t>(-1)
};
/**
diff --git a/thirdparty/icu4c/common/unicode/bytestream.h b/thirdparty/icu4c/common/unicode/bytestream.h
index 997746e428..bea41461bc 100644
--- a/thirdparty/icu4c/common/unicode/bytestream.h
+++ b/thirdparty/icu4c/common/unicode/bytestream.h
@@ -281,7 +281,7 @@ class StringByteSink : public ByteSink {
*/
StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) {
if (initialAppendCapacity > 0 &&
- (uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) {
+ static_cast<uint32_t>(initialAppendCapacity) > dest->capacity() - dest->length()) {
dest->reserve(dest->length() + initialAppendCapacity);
}
}
diff --git a/thirdparty/icu4c/common/unicode/bytestrie.h b/thirdparty/icu4c/common/unicode/bytestrie.h
index 1719a6bb83..c07dfada94 100644
--- a/thirdparty/icu4c/common/unicode/bytestrie.h
+++ b/thirdparty/icu4c/common/unicode/bytestrie.h
@@ -109,7 +109,7 @@ public:
*/
uint64_t getState64() const {
return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) |
- (uint64_t)(pos_ - bytes_);
+ static_cast<uint64_t>(pos_ - bytes_);
}
/**
@@ -439,7 +439,7 @@ private:
}
static inline UStringTrieResult valueResult(int32_t node) {
- return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node&kValueIsFinal));
+ return static_cast<UStringTrieResult>(USTRINGTRIE_INTERMEDIATE_VALUE - (node & kValueIsFinal));
}
// Handles a branch node for both next(byte) and next(string).
diff --git a/thirdparty/icu4c/common/unicode/char16ptr.h b/thirdparty/icu4c/common/unicode/char16ptr.h
index de8182c7ad..daf35cd43b 100644
--- a/thirdparty/icu4c/common/unicode/char16ptr.h
+++ b/thirdparty/icu4c/common/unicode/char16ptr.h
@@ -12,6 +12,7 @@
#if U_SHOW_CPLUSPLUS_API
#include <cstddef>
+#include <string_view>
/**
* \file
@@ -306,6 +307,76 @@ inline OldUChar *toOldUCharPtr(char16_t *p) {
return reinterpret_cast<OldUChar *>(p);
}
+#ifndef U_FORCE_HIDE_INTERNAL_API
+/**
+ * Is T convertible to a std::u16string_view or some other 16-bit string view?
+ * @internal
+ */
+template<typename T>
+constexpr bool ConvertibleToU16StringView =
+ std::is_convertible_v<T, std::u16string_view>
+#if !U_CHAR16_IS_TYPEDEF && (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 180000)
+ || std::is_convertible_v<T, std::basic_string_view<uint16_t>>
+#endif
+#if U_SIZEOF_WCHAR_T==2
+ || std::is_convertible_v<T, std::wstring_view>
+#endif
+ ;
+
+namespace internal {
+/**
+ * Pass-through overload.
+ * @internal
+ */
+inline std::u16string_view toU16StringView(std::u16string_view sv) { return sv; }
+
+#if !U_CHAR16_IS_TYPEDEF && (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 180000)
+/**
+ * Basically undefined behavior but sometimes necessary conversion
+ * from std::basic_string_view<uint16_t> to std::u16string_view.
+ * @internal
+ */
+inline std::u16string_view toU16StringView(std::basic_string_view<uint16_t> sv) {
+ return { ConstChar16Ptr(sv.data()), sv.length() };
+}
+#endif
+
+#if U_SIZEOF_WCHAR_T==2
+/**
+ * Basically undefined behavior but sometimes necessary conversion
+ * from std::wstring_view to std::u16string_view.
+ * @internal
+ */
+inline std::u16string_view toU16StringView(std::wstring_view sv) {
+ return { ConstChar16Ptr(sv.data()), sv.length() };
+}
+#endif
+
+/**
+ * Pass-through overload.
+ * @internal
+ */
+template <typename T,
+ typename = typename std::enable_if_t<!std::is_pointer_v<std::remove_reference_t<T>>>>
+inline std::u16string_view toU16StringViewNullable(const T& text) {
+ return toU16StringView(text);
+}
+
+/**
+ * In case of nullptr, return an empty view.
+ * @internal
+ */
+template <typename T,
+ typename = typename std::enable_if_t<std::is_pointer_v<std::remove_reference_t<T>>>,
+ typename = void>
+inline std::u16string_view toU16StringViewNullable(const T& text) {
+ if (text == nullptr) return {}; // For backward compatibility.
+ return toU16StringView(text);
+}
+
+} // internal
+#endif // U_FORCE_HIDE_INTERNAL_API
+
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
diff --git a/thirdparty/icu4c/common/unicode/docmain.h b/thirdparty/icu4c/common/unicode/docmain.h
index fa4dbbc052..676e868e7c 100644
--- a/thirdparty/icu4c/common/unicode/docmain.h
+++ b/thirdparty/icu4c/common/unicode/docmain.h
@@ -138,6 +138,11 @@
* <td>icu::DateFormat</td>
* </tr>
* <tr>
+ * <td>Relative Date and Time Formatting</td>
+ * <td>ureldatefmt.h</td>
+ * <td>icu::RelativeDateTimeFormatter</td>
+ * </tr>
+ * <tr>
* <td>Message Formatting</td>
* <td>umsg.h</td>
* <td>icu::MessageFormat</td>
diff --git a/thirdparty/icu4c/common/unicode/edits.h b/thirdparty/icu4c/common/unicode/edits.h
index dda9d3ca75..bc3c943822 100644
--- a/thirdparty/icu4c/common/unicode/edits.h
+++ b/thirdparty/icu4c/common/unicode/edits.h
@@ -508,7 +508,7 @@ private:
Edits &copyArray(const Edits &other);
Edits &moveArray(Edits &src) noexcept;
- void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
+ void setLastUnit(int32_t last) { array[length - 1] = static_cast<uint16_t>(last); }
int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
void append(int32_t r);
diff --git a/thirdparty/icu4c/common/unicode/idna.h b/thirdparty/icu4c/common/unicode/idna.h
index 1c57205bae..1e36fa771f 100644
--- a/thirdparty/icu4c/common/unicode/idna.h
+++ b/thirdparty/icu4c/common/unicode/idna.h
@@ -70,6 +70,7 @@ public:
* The worker functions use transitional processing, including deviation mappings,
* unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
* is used in which case the deviation characters are passed through without change.
+ * <b>Unicode 15.1 UTS #46 deprecated transitional processing.</b>
*
* Disallowed characters are mapped to U+FFFD.
*
@@ -82,6 +83,8 @@ public:
* letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
*
* @param options Bit set to modify the processing and error checking.
+ * These should include UIDNA_DEFAULT, or
+ * UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE.
* See option bit set values in uidna.h.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
diff --git a/thirdparty/icu4c/common/unicode/locid.h b/thirdparty/icu4c/common/unicode/locid.h
index 60282d623d..e1afd598cf 100644
--- a/thirdparty/icu4c/common/unicode/locid.h
+++ b/thirdparty/icu4c/common/unicode/locid.h
@@ -727,7 +727,9 @@ public:
*
* @stable ICU 49
*/
- void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status);
+ void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status) {
+ setKeywordValue(StringPiece{keywordName}, StringPiece{keywordValue}, status);
+ }
/**
* Sets or removes the value for a keyword.
diff --git a/thirdparty/icu4c/common/unicode/messagepattern.h b/thirdparty/icu4c/common/unicode/messagepattern.h
index 55b09bfbd4..db36ede727 100644
--- a/thirdparty/icu4c/common/unicode/messagepattern.h
+++ b/thirdparty/icu4c/common/unicode/messagepattern.h
@@ -775,7 +775,7 @@ public:
UMessagePatternArgType getArgType() const {
UMessagePatternPartType msgType=getType();
if(msgType ==UMSGPAT_PART_TYPE_ARG_START || msgType ==UMSGPAT_PART_TYPE_ARG_LIMIT) {
- return (UMessagePatternArgType)value;
+ return static_cast<UMessagePatternArgType>(value);
} else {
return UMSGPAT_ARG_TYPE_NONE;
}
@@ -821,6 +821,7 @@ public:
static const int32_t MAX_LENGTH=0xffff;
static const int32_t MAX_VALUE=0x7fff;
+ static const int32_t MAX_NESTED_LEVELS=0x03ff;
// Some fields are not final because they are modified during pattern parsing.
// After pattern parsing, the parts are effectively immutable.
diff --git a/thirdparty/icu4c/common/unicode/normalizer2.h b/thirdparty/icu4c/common/unicode/normalizer2.h
index 6856ff8720..01271623f3 100644
--- a/thirdparty/icu4c/common/unicode/normalizer2.h
+++ b/thirdparty/icu4c/common/unicode/normalizer2.h
@@ -163,7 +163,6 @@ public:
static const Normalizer2 *
getNFKCCasefoldInstance(UErrorCode &errorCode);
-#ifndef U_HIDE_DRAFT_API
/**
* Returns a Normalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization
* which is equivalent to applying the NFKC_Simple_Casefold mappings and then NFC.
@@ -176,11 +175,10 @@ public:
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested Normalizer2, if successful
- * @draft ICU 74
+ * @stable ICU 74
*/
static const Normalizer2 *
getNFKCSimpleCasefoldInstance(UErrorCode &errorCode);
-#endif // U_HIDE_DRAFT_API
/**
* Returns a Normalizer2 instance which uses the specified data file
diff --git a/thirdparty/icu4c/common/unicode/platform.h b/thirdparty/icu4c/common/unicode/platform.h
index 7aca76c67d..b2fcb21ef1 100644
--- a/thirdparty/icu4c/common/unicode/platform.h
+++ b/thirdparty/icu4c/common/unicode/platform.h
@@ -132,6 +132,8 @@
#define U_PF_BROWSER_NATIVE_CLIENT 4020
/** Android is based on Linux. @internal */
#define U_PF_ANDROID 4050
+/** Haiku is a POSIX-ish platform. @internal */
+#define U_PF_HAIKU 4080
/** Fuchsia is a POSIX-ish platform. @internal */
#define U_PF_FUCHSIA 4100
/* Maximum value for Linux-based platform is 4499 */
@@ -154,6 +156,8 @@
# define U_PLATFORM U_PF_MINGW
#elif defined(__CYGWIN__)
# define U_PLATFORM U_PF_CYGWIN
+ /* Cygwin uchar.h doesn't exist until Cygwin 3.5. */
+# include <cygwin/version.h>
#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
# define U_PLATFORM U_PF_WINDOWS
#elif defined(__ANDROID__)
@@ -200,6 +204,8 @@
# define U_PLATFORM U_PF_OS390
#elif defined(__OS400__) || defined(__TOS_OS400__)
# define U_PLATFORM U_PF_OS400
+#elif defined(__HAIKU__)
+# define U_PLATFORM U_PF_HAIKU
#elif defined(__EMSCRIPTEN__)
# define U_PLATFORM U_PF_EMSCRIPTEN
#else
@@ -235,7 +241,7 @@
/**
* \def U_PLATFORM_USES_ONLY_WIN32_API
* Defines whether the platform uses only the Win32 API.
- * Set to 1 for Windows/MSVC and MinGW but not Cygwin.
+ * Set to 1 for Windows/MSVC, ClangCL and MinGW but not Cygwin.
* @internal
*/
#ifdef U_PLATFORM_USES_ONLY_WIN32_API
@@ -250,7 +256,7 @@
/**
* \def U_PLATFORM_HAS_WIN32_API
* Defines whether the Win32 API is available on the platform.
- * Set to 1 for Windows/MSVC, MinGW and Cygwin.
+ * Set to 1 for Windows/MSVC, ClangCL, MinGW and Cygwin.
* @internal
*/
#ifdef U_PLATFORM_HAS_WIN32_API
@@ -722,12 +728,16 @@
/*
* Notes:
* C++11 and C11 require support for UTF-16 literals
- * Doesn't work on Mac C11 (see workaround in ptypes.h).
+ * Doesn't work on Mac C11 (see workaround in ptypes.h)
+ * or Cygwin less than 3.5.
*/
-# if defined(__cplusplus) || !U_PLATFORM_IS_DARWIN_BASED
+# if defined(__cplusplus)
# define U_HAVE_CHAR16_T 1
-# else
+# elif U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && CYGWIN_VERSION_DLL_MAJOR < 3005)
# define U_HAVE_CHAR16_T 0
+# else
+ // conformant C11
+# define U_HAVE_CHAR16_T 1
# endif
#endif
@@ -735,7 +745,9 @@
* @{
* \def U_DECLARE_UTF16
* Do not use this macro because it is not defined on all platforms.
- * Use the UNICODE_STRING or U_STRING_DECL macros instead.
+ * In C++, use std::u16string_view literals, see the UNICODE_STRING docs.
+ * In C, use u"UTF-16 literals".
+ * See also the public U_STRING_DECL macro.
* @internal
*/
#ifdef U_DECLARE_UTF16
@@ -766,7 +778,7 @@
#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllexport__) && \
UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllimport__))
# define U_EXPORT __declspec(dllexport)
-#elif defined(__GNUC__)
+#elif defined(__GNUC__) || defined(__open_xl__)
# define U_EXPORT __attribute__((visibility("default")))
#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \
|| (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550)
@@ -805,7 +817,7 @@
*/
#ifdef U_HIDDEN
/* Use the predefined value. */
-#elif defined(__GNUC__)
+#elif defined(__GNUC__) || defined(__open_xl__)
# define U_HIDDEN __attribute__((visibility("hidden")))
#else
# define U_HIDDEN
diff --git a/thirdparty/icu4c/common/unicode/simpleformatter.h b/thirdparty/icu4c/common/unicode/simpleformatter.h
index 33bdd6d48f..62c75d08bb 100644
--- a/thirdparty/icu4c/common/unicode/simpleformatter.h
+++ b/thirdparty/icu4c/common/unicode/simpleformatter.h
@@ -63,7 +63,7 @@ public:
* Default constructor.
* @stable ICU 57
*/
- SimpleFormatter() : compiledPattern((char16_t)0) {}
+ SimpleFormatter() : compiledPattern(static_cast<char16_t>(0)) {}
/**
* Constructs a formatter from the pattern string.
diff --git a/thirdparty/icu4c/common/unicode/stringpiece.h b/thirdparty/icu4c/common/unicode/stringpiece.h
index 6f2a50eafc..74919d7222 100644
--- a/thirdparty/icu4c/common/unicode/stringpiece.h
+++ b/thirdparty/icu4c/common/unicode/stringpiece.h
@@ -32,6 +32,7 @@
#if U_SHOW_CPLUSPLUS_API
#include <cstddef>
+#include <string_view>
#include <type_traits>
#include "unicode/uobject.h"
@@ -176,6 +177,16 @@ class U_COMMON_API StringPiece : public UMemory {
*/
StringPiece(const StringPiece& x, int32_t pos, int32_t len);
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Converts to a std::string_view().
+ * @internal
+ */
+ inline operator std::string_view() const {
+ return {data(), static_cast<std::string_view::size_type>(size())};
+ }
+#endif // U_HIDE_INTERNAL_API
+
/**
* Returns the string pointer. May be nullptr if it is empty.
*
diff --git a/thirdparty/icu4c/common/unicode/stringtriebuilder.h b/thirdparty/icu4c/common/unicode/stringtriebuilder.h
index 429d7883f1..62108dfe3e 100644
--- a/thirdparty/icu4c/common/unicode/stringtriebuilder.h
+++ b/thirdparty/icu4c/common/unicode/stringtriebuilder.h
@@ -346,7 +346,7 @@ protected:
virtual void write(StringTrieBuilder &builder) override;
// Adds a unit with a final value.
void add(int32_t c, int32_t value) {
- units[length]=(char16_t)c;
+ units[length] = static_cast<char16_t>(c);
equal[length]=nullptr;
values[length]=value;
++length;
@@ -354,7 +354,7 @@ protected:
}
// Adds a unit which leads to another match node.
void add(int32_t c, Node *node) {
- units[length]=(char16_t)c;
+ units[length] = static_cast<char16_t>(c);
equal[length]=node;
values[length]=0;
++length;
diff --git a/thirdparty/icu4c/common/unicode/ucasemap.h b/thirdparty/icu4c/common/unicode/ucasemap.h
index d1c1b483ab..4b623e6910 100644
--- a/thirdparty/icu4c/common/unicode/ucasemap.h
+++ b/thirdparty/icu4c/common/unicode/ucasemap.h
@@ -202,8 +202,8 @@ ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode
*
* The titlecase break iterator can be provided to customize for arbitrary
* styles, using rules and dictionaries beyond the standard iterators.
- * The standard titlecase iterator for the root locale implements the
- * algorithm of Unicode TR 21.
+ * If the break iterator passed in is null, the default Unicode algorithm
+ * will be used to determine the titlecase positions.
*
* This function uses only the setText(), first() and next() methods of the
* provided break iterator.
@@ -312,8 +312,8 @@ ucasemap_utf8ToUpper(const UCaseMap *csm,
*
* The titlecase break iterator can be provided to customize for arbitrary
* styles, using rules and dictionaries beyond the standard iterators.
- * The standard titlecase iterator for the root locale implements the
- * algorithm of Unicode TR 21.
+ * If the break iterator passed in is null, the default Unicode algorithm
+ * will be used to determine the titlecase positions.
*
* This function uses only the setUText(), first(), next() and close() methods of the
* provided break iterator.
diff --git a/thirdparty/icu4c/common/unicode/uchar.h b/thirdparty/icu4c/common/unicode/uchar.h
index 4572230d9f..0daa7dd214 100644
--- a/thirdparty/icu4c/common/unicode/uchar.h
+++ b/thirdparty/icu4c/common/unicode/uchar.h
@@ -54,14 +54,14 @@ U_CDECL_BEGIN
/*==========================================================================*/
/**
* Unicode version number, default for the current ICU version.
- * The actual Unicode Character Database (UCD) data is stored in uprops.dat
+ * The actual Unicode Character Database (UCD) data is stored in uprops.icu
* and may be generated from UCD files from a different Unicode version.
* Call u_getUnicodeVersion to get the actual Unicode version of the data.
*
* @see u_getUnicodeVersion
* @stable ICU 2.0
*/
-#define U_UNICODE_VERSION "15.1"
+#define U_UNICODE_VERSION "16.0"
/**
* \file
@@ -533,33 +533,39 @@ typedef enum UProperty {
* @stable ICU 70
*/
UCHAR_RGI_EMOJI=71,
-#ifndef U_HIDE_DRAFT_API
/**
* Binary property IDS_Unary_Operator.
* For programmatic determination of Ideographic Description Sequences.
*
- * @draft ICU 74
+ * @stable ICU 74
*/
UCHAR_IDS_UNARY_OPERATOR=72,
/**
* Binary property ID_Compat_Math_Start.
* Used in mathematical identifier profile in UAX #31.
- * @draft ICU 74
+ * @stable ICU 74
*/
UCHAR_ID_COMPAT_MATH_START=73,
/**
* Binary property ID_Compat_Math_Continue.
* Used in mathematical identifier profile in UAX #31.
- * @draft ICU 74
+ * @stable ICU 74
*/
UCHAR_ID_COMPAT_MATH_CONTINUE=74,
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Binary property Modifier_Combining_Mark.
+ * Used by the AMTRA algorithm in UAX #53.
+ * @draft ICU 76
+ */
+ UCHAR_MODIFIER_COMBINING_MARK=75,
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the last constant for binary Unicode properties.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
- UCHAR_BINARY_LIMIT=75,
+ UCHAR_BINARY_LIMIT=76,
#endif // U_HIDE_DEPRECATED_API
/** Enumerated property Bidi_Class.
@@ -677,13 +683,19 @@ typedef enum UProperty {
* @draft ICU 75
*/
UCHAR_IDENTIFIER_STATUS=0x1019,
+ /**
+ * Enumerated property Indic_Conjunct_Break.
+ * Used in the grapheme cluster break algorithm in UAX #29.
+ * @draft ICU 76
+ */
+ UCHAR_INDIC_CONJUNCT_BREAK=0x101A,
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the last constant for enumerated/integer Unicode properties.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
- UCHAR_INT_LIMIT=0x101A,
+ UCHAR_INT_LIMIT=0x101B,
#endif // U_HIDE_DEPRECATED_API
/** Bitmask property General_Category_Mask.
@@ -1952,6 +1964,29 @@ enum UBlockCode {
/** @stable ICU 74 */
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I = 328, /*[2EBF0]*/
+ // New blocks in Unicode 16.0
+
+ /** @stable ICU 76 */
+ UBLOCK_EGYPTIAN_HIEROGLYPHS_EXTENDED_A = 329, /*[13460]*/
+ /** @stable ICU 76 */
+ UBLOCK_GARAY = 330, /*[10D40]*/
+ /** @stable ICU 76 */
+ UBLOCK_GURUNG_KHEMA = 331, /*[16100]*/
+ /** @stable ICU 76 */
+ UBLOCK_KIRAT_RAI = 332, /*[16D40]*/
+ /** @stable ICU 76 */
+ UBLOCK_MYANMAR_EXTENDED_C = 333, /*[116D0]*/
+ /** @stable ICU 76 */
+ UBLOCK_OL_ONAL = 334, /*[1E5D0]*/
+ /** @stable ICU 76 */
+ UBLOCK_SUNUWAR = 335, /*[11BC0]*/
+ /** @stable ICU 76 */
+ UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT = 336, /*[1CC00]*/
+ /** @stable ICU 76 */
+ UBLOCK_TODHRI = 337, /*[105C0]*/
+ /** @stable ICU 76 */
+ UBLOCK_TULU_TIGALARI = 338, /*[11380]*/
+
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UBlockCode value.
@@ -1959,7 +1994,7 @@ enum UBlockCode {
*
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
- UBLOCK_COUNT = 329,
+ UBLOCK_COUNT = 339,
#endif // U_HIDE_DEPRECATED_API
/** @stable ICU 2.0 */
@@ -2254,6 +2289,8 @@ typedef enum UJoiningGroup {
U_JG_THIN_YEH, /**< @stable ICU 70 */
U_JG_VERTICAL_TAIL, /**< @stable ICU 70 */
+ U_JG_KASHMIRI_YEH, /**< @stable ICU 76 */
+
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UJoiningGroup value.
@@ -2702,8 +2739,35 @@ typedef enum UIndicSyllabicCategory {
U_INSC_VOWEL_DEPENDENT,
/** @stable ICU 63 */
U_INSC_VOWEL_INDEPENDENT,
+ /** @stable ICU 76 */
+ U_INSC_REORDERING_KILLER,
} UIndicSyllabicCategory;
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Indic Conjunct Break constants.
+ *
+ * @see UCHAR_INDIC_CONJUNCT_BREAK
+ * @draft ICU 76
+ */
+typedef enum UIndicConjunctBreak {
+ /*
+ * Note: UIndicConjunctBreak constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_INCB_<Unicode Indic_Conjunct_Break value name>
+ */
+
+ /** @draft ICU 76 */
+ U_INCB_NONE,
+ /** @draft ICU 76 */
+ U_INCB_CONSONANT,
+ /** @draft ICU 76 */
+ U_INCB_EXTEND,
+ /** @draft ICU 76 */
+ U_INCB_LINKER,
+} UIndicConjunctBreak;
+#endif // U_HIDE_DRAFT_API
+
/**
* Vertical Orientation constants.
*
diff --git a/thirdparty/icu4c/common/unicode/ucharstrie.h b/thirdparty/icu4c/common/unicode/ucharstrie.h
index fa1b55616c..ca4b469470 100644
--- a/thirdparty/icu4c/common/unicode/ucharstrie.h
+++ b/thirdparty/icu4c/common/unicode/ucharstrie.h
@@ -107,7 +107,7 @@ public:
*/
uint64_t getState64() const {
return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) |
- (uint64_t)(pos_ - uchars_);
+ static_cast<uint64_t>(pos_ - uchars_);
}
/**
@@ -493,7 +493,7 @@ private:
}
static inline UStringTrieResult valueResult(int32_t node) {
- return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node>>15));
+ return static_cast<UStringTrieResult>(USTRINGTRIE_INTERMEDIATE_VALUE - (node >> 15));
}
// Handles a branch node for both next(uchar) and next(string).
diff --git a/thirdparty/icu4c/common/unicode/uidna.h b/thirdparty/icu4c/common/unicode/uidna.h
index 24a81ceadd..362a2dcbe6 100644
--- a/thirdparty/icu4c/common/unicode/uidna.h
+++ b/thirdparty/icu4c/common/unicode/uidna.h
@@ -49,11 +49,19 @@
*/
enum {
/**
- * Default options value: None of the other options are set.
+ * Default options value: UTS #46 nontransitional processing.
* For use in static worker and factory methods.
+ *
+ * Since ICU 76, this is the same as
+ * UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE,
+ * corresponding to Unicode 15.1 UTS #46 deprecating transitional processing.
+ * (These options are ignored by the IDNA2003 implementation.)
+ *
+ * Before ICU 76, this constant did not set any of the options.
+ *
* @stable ICU 2.6
*/
- UIDNA_DEFAULT=0,
+ UIDNA_DEFAULT=0x30,
#ifndef U_HIDE_DEPRECATED_API
/**
* Option to allow unassigned code points in domain names and labels.
@@ -91,19 +99,27 @@ enum {
/**
* IDNA option for nontransitional processing in ToASCII().
* For use in static worker and factory methods.
+ *
* <p>By default, ToASCII() uses transitional processing.
+ * Unicode 15.1 UTS #46 deprecated transitional processing.
+ *
* <p>This option is ignored by the IDNA2003 implementation.
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
* @stable ICU 4.6
+ * @see UIDNA_DEFAULT
*/
UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
/**
* IDNA option for nontransitional processing in ToUnicode().
* For use in static worker and factory methods.
+ *
* <p>By default, ToUnicode() uses transitional processing.
+ * Unicode 15.1 UTS #46 deprecated transitional processing.
+ *
* <p>This option is ignored by the IDNA2003 implementation.
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
* @stable ICU 4.6
+ * @see UIDNA_DEFAULT
*/
UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
/**
@@ -134,6 +150,8 @@ typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
* For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
*
* @param options Bit set to modify the processing and error checking.
+ * These should include UIDNA_DEFAULT, or
+ * UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE.
* See option bit set values in uidna.h.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
diff --git a/thirdparty/icu4c/common/unicode/ulocale.h b/thirdparty/icu4c/common/unicode/ulocale.h
index 33e92844bc..1b3af3a5f2 100644
--- a/thirdparty/icu4c/common/unicode/ulocale.h
+++ b/thirdparty/icu4c/common/unicode/ulocale.h
@@ -13,16 +13,15 @@
* \brief C API: Locale ID functionality similar to C++ class Locale
*/
-#ifndef U_HIDE_DRAFT_API
/**
* Opaque C service object type for the locale API
- * @draft ICU 74
+ * @stable ICU 74
*/
struct ULocale;
/**
* C typedef for struct ULocale.
- * @draft ICU 74
+ * @stable ICU 74
*/
typedef struct ULocale ULocale;
@@ -37,7 +36,7 @@ typedef struct ULocale ULocale;
* @param err the error code
* @return the locale.
*
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI ULocale* U_EXPORT2
ulocale_openForLocaleID(const char* localeID, int32_t length, UErrorCode* err);
@@ -53,7 +52,7 @@ ulocale_openForLocaleID(const char* localeID, int32_t length, UErrorCode* err);
* @param err the error code
* @return the locale.
*
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI ULocale* U_EXPORT2
ulocale_openForLanguageTag(const char* tag, int32_t length, UErrorCode* err);
@@ -62,7 +61,7 @@ ulocale_openForLanguageTag(const char* tag, int32_t length, UErrorCode* err);
* Close the locale and destroy it's internal states.
*
* @param locale the locale
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI void U_EXPORT2
ulocale_close(ULocale* locale);
@@ -72,7 +71,7 @@ ulocale_close(ULocale* locale);
*
* @param locale the locale
* @return the language code of the locale.
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI const char* U_EXPORT2
ulocale_getLanguage(const ULocale* locale);
@@ -82,7 +81,7 @@ ulocale_getLanguage(const ULocale* locale);
*
* @param locale the locale
* @return A pointer to the script.
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI const char* U_EXPORT2
ulocale_getScript(const ULocale* locale);
@@ -92,7 +91,7 @@ ulocale_getScript(const ULocale* locale);
*
* @param locale the locale
* @return A pointer to the region.
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI const char* U_EXPORT2
ulocale_getRegion(const ULocale* locale);
@@ -102,7 +101,7 @@ ulocale_getRegion(const ULocale* locale);
*
* @param locale the locale
* @return A pointer to the variant.
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI const char* U_EXPORT2
ulocale_getVariant(const ULocale* locale);
@@ -115,7 +114,7 @@ ulocale_getVariant(const ULocale* locale);
*
* @param locale the locale
* @return A pointer to "name".
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI const char* U_EXPORT2
ulocale_getLocaleID(const ULocale* locale);
@@ -126,7 +125,7 @@ ulocale_getLocaleID(const ULocale* locale);
*
* @param locale the locale
* @return A pointer to "base name".
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI const char* U_EXPORT2
ulocale_getBaseName(const ULocale* locale);
@@ -136,7 +135,7 @@ ulocale_getBaseName(const ULocale* locale);
*
* @param locale the locale
* @return false if it is a real locale, true if it is a bogus locale
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI bool U_EXPORT2
ulocale_isBogus(const ULocale* locale);
@@ -148,7 +147,7 @@ ulocale_isBogus(const ULocale* locale);
* @param err the error code
* @return pointer to UEnumeration, or nullptr if there are no keywords.
* Client must call uenum_close() to dispose the returned value.
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI UEnumeration* U_EXPORT2
ulocale_getKeywords(const ULocale* locale, UErrorCode *err);
@@ -160,7 +159,7 @@ ulocale_getKeywords(const ULocale* locale, UErrorCode *err);
* @param err the error code
* @return pointer to UEnumeration, or nullptr if there are no keywords.
* Client must call uenum_close() to dispose the returned value.
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI UEnumeration* U_EXPORT2
ulocale_getUnicodeKeywords(const ULocale* locale, UErrorCode *err);
@@ -178,7 +177,7 @@ ulocale_getUnicodeKeywords(const ULocale* locale, UErrorCode *err);
* @param valueBuffer The buffer to receive the value.
* @param valueBufferCapacity The capacity of receiving valueBuffer.
* @param err the error code
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI int32_t U_EXPORT2
ulocale_getKeywordValue(
@@ -198,7 +197,7 @@ ulocale_getKeywordValue(
* @param valueBuffer The buffer to receive the Unicode value.
* @param valueBufferCapacity The capacity of receiving valueBuffer.
* @param err the error code
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI int32_t U_EXPORT2
ulocale_getUnicodeKeywordValue(
@@ -216,7 +215,7 @@ U_NAMESPACE_BEGIN
*
* @see LocalPointerBase
* @see LocalPointer
- * @draft ICU 74
+ * @stable ICU 74
*/
U_DEFINE_LOCAL_OPEN_POINTER(LocalULocalePointer, ULocale, ulocale_close);
@@ -224,6 +223,4 @@ U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
-#endif /* U_HIDE_DRAFT_API */
-
#endif /*_ULOCALE */
diff --git a/thirdparty/icu4c/common/unicode/ulocbuilder.h b/thirdparty/icu4c/common/unicode/ulocbuilder.h
index 9b147292f4..ce61995bde 100644
--- a/thirdparty/icu4c/common/unicode/ulocbuilder.h
+++ b/thirdparty/icu4c/common/unicode/ulocbuilder.h
@@ -12,17 +12,15 @@
* \brief C API: Builder API for Locale
*/
-#ifndef U_HIDE_DRAFT_API
-
/**
* Opaque C service object type for the locale builder API
- * @draft ICU 74
+ * @stable ICU 74
*/
struct ULocaleBuilder;
/**
* C typedef for struct ULocaleBuilder.
- * @draft ICU 74
+ * @stable ICU 74
*/
typedef struct ULocaleBuilder ULocaleBuilder;
@@ -62,7 +60,7 @@ typedef struct ULocaleBuilder ULocaleBuilder;
* UErrorCode, then track the error of the validation of the input parameter
* into the internal UErrorCode.
*
- * @draft ICU 74
+ * @stable ICU 74
*/
/**
@@ -71,7 +69,7 @@ typedef struct ULocaleBuilder ULocaleBuilder;
* empty string. The created builder should be destroyed by calling
* ulocbld_close();
*
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI ULocaleBuilder* U_EXPORT2
ulocbld_open(void);
@@ -79,7 +77,7 @@ ulocbld_open(void);
/**
* Close the builder and destroy it's internal states.
* @param builder the builder
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI void U_EXPORT2
ulocbld_close(ULocaleBuilder* builder);
@@ -97,7 +95,7 @@ ulocbld_close(ULocaleBuilder* builder);
* @param length the length of the locale; if negative, then the locale need to be
* null terminated,
*
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI void U_EXPORT2
ulocbld_setLocale(ULocaleBuilder* builder, const char* locale, int32_t length);
@@ -113,7 +111,7 @@ ulocbld_setLocale(ULocaleBuilder* builder, const char* locale, int32_t length);
* @param locale the locale, a ULocale* pointer. The builder adopts the locale
* after the call and the client must not delete it.
*
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI void U_EXPORT2
ulocbld_adoptULocale(ULocaleBuilder* builder, ULocale* locale);
@@ -136,7 +134,7 @@ ulocbld_adoptULocale(ULocaleBuilder* builder, ULocale* locale);
* the length is non-negative)
* @param length the length of the tag; if negative, then the tag need to be
* null terminated,
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI void U_EXPORT2
ulocbld_setLanguageTag(ULocaleBuilder* builder, const char* tag, int32_t length);
@@ -156,7 +154,7 @@ ulocbld_setLanguageTag(ULocaleBuilder* builder, const char* tag, int32_t length)
* the length is non-negative)
* @param length the length of the language; if negative, then the language need to be
* null terminated,
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI void U_EXPORT2
ulocbld_setLanguage(ULocaleBuilder* builder, const char* language, int32_t length);
@@ -177,7 +175,7 @@ ulocbld_setLanguage(ULocaleBuilder* builder, const char* language, int32_t lengt
* the length is non-negative)
* @param length the length of the script; if negative, then the script need to be
* null terminated,
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI void U_EXPORT2
ulocbld_setScript(ULocaleBuilder* builder, const char* script, int32_t length);
@@ -201,7 +199,7 @@ ulocbld_setScript(ULocaleBuilder* builder, const char* script, int32_t length);
* the length is non-negative)
* @param length the length of the region; if negative, then the region need to be
* null terminated,
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI void U_EXPORT2
ulocbld_setRegion(ULocaleBuilder* builder, const char* region, int32_t length);
@@ -227,7 +225,7 @@ ulocbld_setRegion(ULocaleBuilder* builder, const char* region, int32_t length);
* the length is non-negative)
* @param length the length of the variant; if negative, then the variant need to be
* null terminated,
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI void U_EXPORT2
ulocbld_setVariant(ULocaleBuilder* builder, const char* variant, int32_t length);
@@ -253,7 +251,7 @@ ulocbld_setVariant(ULocaleBuilder* builder, const char* variant, int32_t length)
* the length is non-negative)
* @param length the length of the value; if negative, then the value need to be
* null terminated,
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI void U_EXPORT2
ulocbld_setExtension(ULocaleBuilder* builder, char key, const char* value, int32_t length);
@@ -282,7 +280,7 @@ ulocbld_setExtension(ULocaleBuilder* builder, char key, const char* value, int32
* @param typeLength the length of the type; if negative, then the type need to
* be null terminated,
* @return This builder.
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI void U_EXPORT2
ulocbld_setUnicodeLocaleKeyword(ULocaleBuilder* builder,
@@ -299,7 +297,7 @@ ulocbld_setUnicodeLocaleKeyword(ULocaleBuilder* builder,
* terminated when the length is non-negative)
* @param length the length of the attribute; if negative, then the attribute
* need to be null terminated,
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI void U_EXPORT2
ulocbld_addUnicodeLocaleAttribute(
@@ -318,7 +316,7 @@ ulocbld_addUnicodeLocaleAttribute(
* terminated when the length is non-negative)
* @param length the length of the attribute; if negative, then the attribute
* need to be null terminated,
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI void U_EXPORT2
ulocbld_removeUnicodeLocaleAttribute(
@@ -329,7 +327,7 @@ ulocbld_removeUnicodeLocaleAttribute(
* <p>This method clears the internal UErrorCode.
*
* @param builder the builder
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI void U_EXPORT2
ulocbld_clear(ULocaleBuilder* builder);
@@ -339,7 +337,7 @@ ulocbld_clear(ULocaleBuilder* builder);
* Language, script, region and variant are unchanged.
*
* @param builder the builder
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI void U_EXPORT2
ulocbld_clearExtensions(ULocaleBuilder* builder);
@@ -358,7 +356,7 @@ ulocbld_clearExtensions(ULocaleBuilder* builder);
* @param localeCapacity the size of the locale buffer to store the locale id
* @param err the error code
* @return the length of the locale id in buffer
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI int32_t U_EXPORT2
ulocbld_buildLocaleID(ULocaleBuilder* builder, char* locale,
@@ -377,7 +375,7 @@ ulocbld_buildLocaleID(ULocaleBuilder* builder, char* locale,
* @param err the error code.
* @return the locale, a ULocale* pointer. The created ULocale must be
* destroyed by calling {@link ulocale_close}.
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI ULocale* U_EXPORT2
ulocbld_buildULocale(ULocaleBuilder* builder, UErrorCode* err);
@@ -397,7 +395,7 @@ ulocbld_buildULocale(ULocaleBuilder* builder, UErrorCode* err);
* tag
* @param err the error code
* @return the length of the language tag in buffer
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI int32_t U_EXPORT2
ulocbld_buildLanguageTag(ULocaleBuilder* builder, char* language,
@@ -412,7 +410,7 @@ ulocbld_buildLanguageTag(ULocaleBuilder* builder, char* language,
* Unchanged if there is no such error or if outErrorCode
* already contained an error.
* @return true if U_FAILURE(*outErrorCode)
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI UBool U_EXPORT2
ulocbld_copyErrorTo(const ULocaleBuilder* builder, UErrorCode *outErrorCode);
@@ -428,7 +426,7 @@ U_NAMESPACE_BEGIN
*
* @see LocalPointerBase
* @see LocalPointer
- * @draft ICU 74
+ * @stable ICU 74
*/
U_DEFINE_LOCAL_OPEN_POINTER(LocalULocaleBuilderPointer, ULocaleBuilder, ulocbld_close);
@@ -436,6 +434,4 @@ U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
-#endif /* U_HIDE_DRAFT_API */
-
#endif // __ULOCBUILDER_H__
diff --git a/thirdparty/icu4c/common/unicode/uniset.h b/thirdparty/icu4c/common/unicode/uniset.h
index 127e4ce063..d070fd631a 100644
--- a/thirdparty/icu4c/common/unicode/uniset.h
+++ b/thirdparty/icu4c/common/unicode/uniset.h
@@ -313,7 +313,7 @@ private:
char16_t *pat = nullptr;
int32_t patLen = 0;
- UVector* strings = nullptr; // maintained in sorted order
+ UVector* strings_ = nullptr; // maintained in sorted order
UnicodeSetStringSpan *stringSpan = nullptr;
/**
@@ -1102,6 +1102,118 @@ public:
*/
UChar32 charAt(int32_t index) const;
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Returns a C++ "range" for iterating over the code points of this set.
+ *
+ * \code
+ * UnicodeSet set(u"[abcçカ🚴]", errorCode);
+ * for (UChar32 c : set.codePoints()) {
+ * printf("set.codePoint U+%04lx\n", (long)c);
+ * }
+ * \endcode
+ *
+ * @return a "range" object for iterating over the code points of this set.
+ * @draft ICU 76
+ * @see ranges
+ * @see strings
+ * @see begin
+ * @see end
+ */
+ inline U_HEADER_NESTED_NAMESPACE::USetCodePoints codePoints() const {
+ return U_HEADER_NESTED_NAMESPACE::USetCodePoints(toUSet());
+ }
+
+ /**
+ * Returns a C++ "range" for iterating over the code point ranges of this set.
+ *
+ * \code
+ * UnicodeSet set(u"[abcçカ🚴]", errorCode);
+ * for (auto [start, end] : set.ranges()) {
+ * printf("set.range U+%04lx..U+%04lx\n", (long)start, (long)end);
+ * }
+ * for (auto range : set.ranges()) {
+ * for (UChar32 c : range) {
+ * printf("set.range.c U+%04lx\n", (long)c);
+ * }
+ * }
+ * \endcode
+ *
+ * @return a "range" object for iterating over the code point ranges of this set.
+ * @draft ICU 76
+ * @see codePoints
+ * @see strings
+ * @see begin
+ * @see end
+ */
+ inline U_HEADER_NESTED_NAMESPACE::USetRanges ranges() const {
+ return U_HEADER_NESTED_NAMESPACE::USetRanges(toUSet());
+ }
+
+ /**
+ * Returns a C++ "range" for iterating over the empty and multi-character strings of this set.
+ * Returns each string as a std::u16string_view without copying its contents.
+ *
+ * \code
+ * UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode);
+ * for (auto s : set.strings()) {
+ * UnicodeString us(s);
+ * std::string u8;
+ * printf("set.string length %ld \"%s\"\n", (long)s.length(), us.toUTF8String(u8).c_str());
+ * }
+ * \endcode
+ *
+ * @return a "range" object for iterating over the strings of this set.
+ * @draft ICU 76
+ * @see codePoints
+ * @see ranges
+ * @see begin
+ * @see end
+ */
+ inline U_HEADER_NESTED_NAMESPACE::USetStrings strings() const {
+ return U_HEADER_NESTED_NAMESPACE::USetStrings(toUSet());
+ }
+
+ /**
+ * Returns a C++ iterator for iterating over all of the elements of this set.
+ * Convenient all-in one iteration, but creates a UnicodeString for each
+ * code point or string.
+ * (Similar to how Java UnicodeSet *is an* Iterable&lt;String&gt;.)
+ *
+ * Code points are returned first, then empty and multi-character strings.
+ *
+ * \code
+ * UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode);
+ * for (auto el : set) {
+ * std::string u8;
+ * printf("set.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str());
+ * }
+ * \endcode
+ *
+ * @return an all-elements iterator.
+ * @draft ICU 76
+ * @see end
+ * @see codePoints
+ * @see ranges
+ * @see strings
+ */
+ inline U_HEADER_NESTED_NAMESPACE::USetElementIterator begin() const {
+ return U_HEADER_NESTED_NAMESPACE::USetElements(toUSet()).begin();
+ }
+
+ /**
+ * @return an exclusive-end sentinel for iterating over all of the elements of this set.
+ * @draft ICU 76
+ * @see begin
+ * @see codePoints
+ * @see ranges
+ * @see strings
+ */
+ inline U_HEADER_NESTED_NAMESPACE::USetElementIterator end() const {
+ return U_HEADER_NESTED_NAMESPACE::USetElements(toUSet()).end();
+ }
+#endif // U_HIDE_DRAFT_API
+
/**
* Adds the specified range to this set if it is not already
* present. If this set already contains the specified range,
@@ -1731,7 +1843,7 @@ inline bool UnicodeSet::operator!=(const UnicodeSet& o) const {
}
inline UBool UnicodeSet::isFrozen() const {
- return (UBool)(bmpSet!=nullptr || stringSpan!=nullptr);
+ return bmpSet != nullptr || stringSpan != nullptr;
}
inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
@@ -1747,7 +1859,7 @@ inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
}
inline UBool UnicodeSet::isBogus() const {
- return (UBool)(fFlags & kIsBogus);
+ return fFlags & kIsBogus;
}
inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) {
diff --git a/thirdparty/icu4c/common/unicode/unistr.h b/thirdparty/icu4c/common/unicode/unistr.h
index a41e6358fa..39d789fd6e 100644
--- a/thirdparty/icu4c/common/unicode/unistr.h
+++ b/thirdparty/icu4c/common/unicode/unistr.h
@@ -33,6 +33,7 @@
#if U_SHOW_CPLUSPLUS_API
#include <cstddef>
+#include <string_view>
#include "unicode/char16ptr.h"
#include "unicode/rep.h"
#include "unicode/std_string.h"
@@ -97,16 +98,19 @@ class UnicodeStringAppendable; // unicode/appendable.h
#define US_INV icu::UnicodeString::kInvariant
/**
- * Unicode String literals in C++.
+ * \def UNICODE_STRING
+ * Obsolete macro approximating UnicodeString literals.
*
- * Note: these macros are not recommended for new code.
- * Prior to the availability of C++11 and u"unicode string literals",
- * these macros were provided for portability and efficiency when
+ * Prior to the availability of C++11 and u"UTF-16 string literals",
+ * this macro was provided for portability and efficiency when
* initializing UnicodeStrings from literals.
*
- * They work only for strings that contain "invariant characters", i.e.,
- * only latin letters, digits, and some punctuation.
- * See utypes.h for details.
+ * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
+ * length determination:
+ * \code
+ * UnicodeString str(u"literal");
+ * if (str == u"other literal") { ... }
+ * \endcode
*
* The string parameter must be a C string literal.
* The length of the string, not including the terminating
@@ -121,16 +125,12 @@ class UnicodeStringAppendable; // unicode/appendable.h
/**
* Unicode String literals in C++.
- * Dependent on the platform properties, different UnicodeString
- * constructors should be used to create a UnicodeString object from
- * a string literal.
- * The macros are defined for improved performance.
- * They work only for strings that contain "invariant characters", i.e.,
- * only latin letters, digits, and some punctuation.
- * See utypes.h for details.
+ * Obsolete macro approximating UnicodeString literals.
+ * See UNICODE_STRING.
*
* The string parameter must be a C string literal.
* @stable ICU 2.0
+ * @see UNICODE_STRING
*/
#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
@@ -327,6 +327,30 @@ public:
*/
inline bool operator== (const UnicodeString& text) const;
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Equality operator. Performs only bitwise comparison with `text`
+ * which is, or which is implicitly convertible to,
+ * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
+ *
+ * For performance, you can use UTF-16 string literals with compile-time
+ * length determination:
+ * \code
+ * UnicodeString str = ...;
+ * if (str == u"literal") { ... }
+ * \endcode
+ * @param text The string view to compare to this string.
+ * @return true if `text` contains the same characters as this one, false otherwise.
+ * @draft ICU 76
+ */
+ template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
+ inline bool operator==(const S &text) const {
+ std::u16string_view sv(internal::toU16StringView(text));
+ uint32_t len; // unsigned to avoid a compiler warning
+ return !isBogus() && (len = length()) == sv.length() && doEquals(sv.data(), len);
+ }
+#endif // U_HIDE_DRAFT_API
+
/**
* Inequality operator. Performs only bitwise comparison.
* @param text The UnicodeString to compare to this one.
@@ -336,6 +360,30 @@ public:
*/
inline bool operator!= (const UnicodeString& text) const;
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Inequality operator. Performs only bitwise comparison with `text`
+ * which is, or which is implicitly convertible to,
+ * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
+ *
+ * For performance, you can use std::u16string_view literals with compile-time
+ * length determination:
+ * \code
+ * #include &lt;string_view&gt;
+ * using namespace std::string_view_literals;
+ * UnicodeString str = ...;
+ * if (str != u"literal"sv) { ... }
+ * \endcode
+ * @param text The string view to compare to this string.
+ * @return false if `text` contains the same characters as this one, true otherwise.
+ * @draft ICU 76
+ */
+ template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
+ inline bool operator!=(const S &text) const {
+ return !operator==(text);
+ }
+#endif // U_HIDE_DRAFT_API
+
/**
* Greater than operator. Performs only bitwise comparison.
* @param text The UnicodeString to compare to this one.
@@ -1897,6 +1945,24 @@ public:
*/
UnicodeString &fastCopyFrom(const UnicodeString &src);
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Assignment operator. Replaces the characters in this UnicodeString
+ * with a copy of the characters from the `src`
+ * which is, or which is implicitly convertible to,
+ * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
+ *
+ * @param src The string view containing the characters to copy.
+ * @return a reference to this
+ * @draft ICU 76
+ */
+ template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
+ inline UnicodeString &operator=(const S &src) {
+ unBogus();
+ return doReplace(0, length(), internal::toU16StringView(src));
+ }
+#endif // U_HIDE_DRAFT_API
+
/**
* Move assignment operator; might leave src in bogus state.
* This string will have the same contents and state that the source string had.
@@ -2146,6 +2212,23 @@ public:
*/
inline UnicodeString& operator+= (const UnicodeString& srcText);
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Append operator. Appends the characters in `src`
+ * which is, or which is implicitly convertible to,
+ * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view,
+ * to the UnicodeString object.
+ *
+ * @param src the source for the new characters
+ * @return a reference to this
+ * @draft ICU 76
+ */
+ template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
+ inline UnicodeString& operator+=(const S &src) {
+ return doAppend(internal::toU16StringView(src));
+ }
+#endif // U_HIDE_DRAFT_API
+
/**
* Append the characters
* in `srcText` in the range
@@ -2191,8 +2274,8 @@ public:
int32_t srcLength);
/**
- * Append the characters in `srcChars` to the UnicodeString object
- * at offset `start`. `srcChars` is not modified.
+ * Append the characters in `srcChars` to the UnicodeString object.
+ * `srcChars` is not modified.
* @param srcChars the source for the new characters
* @param srcLength the number of Unicode characters in `srcChars`;
* can be -1 if `srcChars` is NUL-terminated
@@ -2202,6 +2285,23 @@ public:
inline UnicodeString& append(ConstChar16Ptr srcChars,
int32_t srcLength);
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Appends the characters in `src`
+ * which is, or which is implicitly convertible to,
+ * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view,
+ * to the UnicodeString object.
+ *
+ * @param src the source for the new characters
+ * @return a reference to this
+ * @draft ICU 76
+ */
+ template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
+ inline UnicodeString& append(const S &src) {
+ return doAppend(internal::toU16StringView(src));
+ }
+#endif // U_HIDE_DRAFT_API
+
/**
* Append the code unit `srcChar` to the UnicodeString object.
* @param srcChar the code unit to append
@@ -2556,7 +2656,7 @@ public:
* @stable ICU 2.0
*/
inline UnicodeString& remove(int32_t start,
- int32_t length = (int32_t)INT32_MAX);
+ int32_t length = static_cast<int32_t>(INT32_MAX));
/**
* Remove the characters in the range
@@ -2567,7 +2667,7 @@ public:
* @stable ICU 2.0
*/
inline UnicodeString& removeBetween(int32_t start,
- int32_t limit = (int32_t)INT32_MAX);
+ int32_t limit = static_cast<int32_t>(INT32_MAX));
/**
* Retain only the characters in the range
@@ -2693,8 +2793,8 @@ public:
* styles, using rules and dictionaries beyond the standard iterators.
* It may be more efficient to always provide an iterator to avoid
* opening and closing one for each string.
- * The standard titlecase iterator for the root locale implements the
- * algorithm of Unicode TR 21.
+ * If the break iterator passed in is null, the default Unicode algorithm
+ * will be used to determine the titlecase positions.
*
* This function uses only the setText(), first() and next() methods of the
* provided break iterator.
@@ -2721,8 +2821,8 @@ public:
* styles, using rules and dictionaries beyond the standard iterators.
* It may be more efficient to always provide an iterator to avoid
* opening and closing one for each string.
- * The standard titlecase iterator for the root locale implements the
- * algorithm of Unicode TR 21.
+ * If the break iterator passed in is null, the default Unicode algorithm
+ * will be used to determine the titlecase positions.
*
* This function uses only the setText(), first() and next() methods of the
* provided break iterator.
@@ -2750,8 +2850,8 @@ public:
* styles, using rules and dictionaries beyond the standard iterators.
* It may be more efficient to always provide an iterator to avoid
* opening and closing one for each string.
- * The standard titlecase iterator for the root locale implements the
- * algorithm of Unicode TR 21.
+ * If the break iterator passed in is null, the default Unicode algorithm
+ * will be used to determine the titlecase positions.
*
* This function uses only the setText(), first() and next() methods of the
* provided break iterator.
@@ -2925,6 +3025,37 @@ public:
*/
const char16_t *getTerminatedBuffer();
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Converts to a std::u16string_view.
+ *
+ * @return a string view of the contents of this string
+ * @draft ICU 76
+ */
+ inline operator std::u16string_view() const {
+ return {getBuffer(), static_cast<std::u16string_view::size_type>(length())};
+ }
+
+#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
+ /**
+ * Converts to a std::wstring_view.
+ *
+ * Note: This should remain draft until C++ standard plans
+ * about char16_t vs. wchar_t become clearer.
+ *
+ * @return a string view of the contents of this string
+ * @draft ICU 76
+ */
+ inline operator std::wstring_view() const {
+ const char16_t *p = getBuffer();
+#ifdef U_ALIASING_BARRIER
+ U_ALIASING_BARRIER(p);
+#endif
+ return { reinterpret_cast<const wchar_t *>(p), (std::wstring_view::size_type)length() };
+ }
+#endif // U_SIZEOF_WCHAR_T
+#endif // U_HIDE_DRAFT_API
+
//========================================
// Constructors
//========================================
@@ -2969,19 +3100,32 @@ public:
*/
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
+#ifdef U_HIDE_DRAFT_API
/**
* char16_t* constructor.
*
* It is recommended to mark this constructor "explicit" by
* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
* on the compiler command line or similar.
+ *
+ * Note, for string literals:
+ * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
+ * length determination:
+ * \code
+ * UnicodeString str(u"literal");
+ * if (str == u"other literal") { ... }
+ * \endcode
+ *
* @param text The characters to place in the UnicodeString. `text`
* must be NUL (U+0000) terminated.
* @stable ICU 2.0
*/
- UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);
+ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text) :
+ UnicodeString(text, -1) {}
+#endif // U_HIDE_DRAFT_API
-#if !U_CHAR16_IS_TYPEDEF
+#if !U_CHAR16_IS_TYPEDEF && \
+ (defined(U_HIDE_DRAFT_API) || (defined(_LIBCPP_VERSION) && _LIBCPP_VERSION >= 180000))
/**
* uint16_t * constructor.
* Delegates to UnicodeString(const char16_t *).
@@ -2989,14 +3133,23 @@ public:
* It is recommended to mark this constructor "explicit" by
* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
* on the compiler command line or similar.
+ *
+ * Note, for string literals:
+ * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
+ * length determination:
+ * \code
+ * UnicodeString str(u"literal");
+ * if (str == u"other literal") { ... }
+ * \endcode
+ *
* @param text NUL-terminated UTF-16 string
* @stable ICU 59
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
- UnicodeString(ConstChar16Ptr(text)) {}
+ UnicodeString(ConstChar16Ptr(text), -1) {}
#endif
-#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
+#if defined(U_HIDE_DRAFT_API) && (U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN))
/**
* wchar_t * constructor.
* (Only defined if U_SIZEOF_WCHAR_T==2.)
@@ -3005,11 +3158,20 @@ public:
* It is recommended to mark this constructor "explicit" by
* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
* on the compiler command line or similar.
+ *
+ * Note, for string literals:
+ * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
+ * length determination:
+ * \code
+ * UnicodeString str(u"literal");
+ * if (str == u"other literal") { ... }
+ * \endcode
+ *
* @param text NUL-terminated UTF-16 string
* @stable ICU 59
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
- UnicodeString(ConstChar16Ptr(text)) {}
+ UnicodeString(ConstChar16Ptr(text), -1) {}
#endif
/**
@@ -3026,6 +3188,15 @@ public:
/**
* char16_t* constructor.
+ *
+ * Note, for string literals:
+ * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
+ * length determination:
+ * \code
+ * UnicodeString str(u"literal");
+ * if (str == u"other literal") { ... }
+ * \endcode
+ *
* @param text The characters to place in the UnicodeString.
* @param textLength The number of Unicode characters in `text`
* to copy.
@@ -3038,6 +3209,15 @@ public:
/**
* uint16_t * constructor.
* Delegates to UnicodeString(const char16_t *, int32_t).
+ *
+ * Note, for string literals:
+ * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
+ * length determination:
+ * \code
+ * UnicodeString str(u"literal");
+ * if (str == u"other literal") { ... }
+ * \endcode
+ *
* @param text UTF-16 string
* @param textLength string length
* @stable ICU 59
@@ -3051,7 +3231,16 @@ public:
* wchar_t * constructor.
* (Only defined if U_SIZEOF_WCHAR_T==2.)
* Delegates to UnicodeString(const char16_t *, int32_t).
- * @param text NUL-terminated UTF-16 string
+ *
+ * Note, for string literals:
+ * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
+ * length determination:
+ * \code
+ * UnicodeString str(u"literal");
+ * if (str == u"other literal") { ... }
+ * \endcode
+ *
+ * @param text UTF-16 string
* @param textLength string length
* @stable ICU 59
*/
@@ -3068,6 +3257,26 @@ public:
*/
inline UnicodeString(const std::nullptr_t text, int32_t textLength);
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Constructor from `text`
+ * which is, or which is implicitly convertible to,
+ * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
+ * The string is bogus if the string view is too long.
+ *
+ * If you need a UnicodeString but need not copy the string view contents,
+ * then you can call the UnicodeString::readOnlyAlias() function instead of this constructor.
+ *
+ * @param text UTF-16 string
+ * @draft ICU 76
+ */
+ template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
+ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const S &text) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ doAppend(internal::toU16StringViewNullable(text));
+ }
+#endif // U_HIDE_DRAFT_API
+
/**
* Readonly-aliasing char16_t* constructor.
* The text will be used for the UnicodeString object, but
@@ -3082,6 +3291,14 @@ public:
* When using fastCopyFrom(), the text will be aliased again,
* so that both strings then alias the same readonly-text.
*
+ * Note, for string literals:
+ * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
+ * length determination:
+ * \code
+ * UnicodeString alias = UnicodeString::readOnlyAlias(u"literal");
+ * if (str == u"other literal") { ... }
+ * \endcode
+ *
* @param isTerminated specifies if `text` is `NUL`-terminated.
* This must be true if `textLength==-1`.
* @param text The characters to alias for the UnicodeString.
@@ -3160,8 +3377,14 @@ public:
*
* For ASCII (really "invariant character") strings it is more efficient to use
* the constructor that takes a US_INV (for its enum EInvariant).
- * For ASCII (invariant-character) string literals, see UNICODE_STRING and
- * UNICODE_STRING_SIMPLE.
+ *
+ * Note, for string literals:
+ * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
+ * length determination:
+ * \code
+ * UnicodeString str(u"literal");
+ * if (str == u"other literal") { ... }
+ * \endcode
*
* It is recommended to mark this constructor "explicit" by
* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
@@ -3169,8 +3392,6 @@ public:
* @param codepageData an array of bytes, null-terminated,
* in the platform's default codepage.
* @stable ICU 2.0
- * @see UNICODE_STRING
- * @see UNICODE_STRING_SIMPLE
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
@@ -3270,6 +3491,15 @@ public:
* // use ustr ...
* }
* \endcode
+ *
+ * Note, for string literals:
+ * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
+ * length determination:
+ * \code
+ * UnicodeString str(u"literal");
+ * if (str == u"other literal") { ... }
+ * \endcode
+ *
* @param src String using only invariant characters.
* @param textLength Length of src, or -1 if NUL-terminated.
* @param inv Signature-distinguishing parameter, use US_INV.
@@ -3343,6 +3573,58 @@ public:
*/
virtual ~UnicodeString();
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Readonly-aliasing factory method.
+ * Aliases the same buffer as the input `text`
+ * which is, or which is implicitly convertible to,
+ * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
+ * The string is bogus if the string view is too long.
+ *
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has copy-on-write semantics:
+ * When the string is modified, then the buffer is first copied into
+ * newly allocated memory.
+ * The aliased buffer is never modified.
+ *
+ * In an assignment to another UnicodeString, when using the copy constructor
+ * or the assignment operator, the text will be copied.
+ * When using fastCopyFrom(), the text will be aliased again,
+ * so that both strings then alias the same readonly-text.
+ *
+ * @param text The string view to alias for the UnicodeString.
+ * @draft ICU 76
+ */
+ template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
+ static inline UnicodeString readOnlyAlias(const S &text) {
+ return readOnlyAliasFromU16StringView(internal::toU16StringView(text));
+ }
+
+ /**
+ * Readonly-aliasing factory method.
+ * Aliases the same buffer as the input `text`.
+ *
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has copy-on-write semantics:
+ * When the string is modified, then the buffer is first copied into
+ * newly allocated memory.
+ * The aliased buffer is never modified.
+ *
+ * In an assignment to another UnicodeString, when using the copy constructor
+ * or the assignment operator, the text will be copied.
+ * When using fastCopyFrom(), the text will be aliased again,
+ * so that both strings then alias the same readonly-text.
+ *
+ * @param text The UnicodeString to alias.
+ * @draft ICU 76
+ */
+ static inline UnicodeString readOnlyAlias(const UnicodeString &text) {
+ return readOnlyAliasFromUnicodeString(text);
+ }
+#endif // U_HIDE_DRAFT_API
+
/**
* Create a UnicodeString from a UTF-8 string.
* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
@@ -3470,6 +3752,9 @@ protected:
virtual UChar32 getChar32At(int32_t offset) const override;
private:
+ static UnicodeString readOnlyAliasFromU16StringView(std::u16string_view text);
+ static UnicodeString readOnlyAliasFromUnicodeString(const UnicodeString &text);
+
// For char* constructors. Could be made public.
UnicodeString &setToUTF8(StringPiece utf8);
// For extract(char*).
@@ -3485,7 +3770,10 @@ private:
* Internal string contents comparison, called by operator==.
* Requires: this & text not bogus and have same lengths.
*/
- UBool doEquals(const UnicodeString &text, int32_t len) const;
+ inline UBool doEquals(const UnicodeString &text, int32_t len) const {
+ return doEquals(text.getArrayStart(), len);
+ }
+ UBool doEquals(const char16_t *text, int32_t len) const;
inline UBool
doEqualsSubstring(int32_t start,
@@ -3580,9 +3868,11 @@ private:
const char16_t *srcChars,
int32_t srcStart,
int32_t srcLength);
+ UnicodeString& doReplace(int32_t start, int32_t length, std::u16string_view src);
UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
+ UnicodeString& doAppend(std::u16string_view src);
UnicodeString& doReverse(int32_t start,
int32_t length);
@@ -3711,7 +4001,7 @@ private:
* Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
* @see UNISTR_OBJECT_SIZE
*/
- US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR,
+ US_STACKBUF_SIZE = static_cast<int32_t>(UNISTR_OBJECT_SIZE - sizeof(void*) - 2) / U_SIZEOF_UCHAR,
kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
kInvalidHashCode=0, // invalid hash code
kEmptyHashCode=1, // hash code for empty string
@@ -3802,7 +4092,7 @@ private:
};
/**
- * Create a new UnicodeString with the concatenation of two others.
+ * Creates a new UnicodeString from the concatenation of two others.
*
* @param s1 The first string to be copied to the new one.
* @param s2 The second string to be copied to the new one, after s1.
@@ -3812,6 +4102,29 @@ private:
U_COMMON_API UnicodeString U_EXPORT2
operator+ (const UnicodeString &s1, const UnicodeString &s2);
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Creates a new UnicodeString from the concatenation of a UnicodeString and `s2`
+ * which is, or which is implicitly convertible to,
+ * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
+ *
+ * @param s1 The string to be copied to the new one.
+ * @param s2 The string view to be copied to the new string, after s1.
+ * @return UnicodeString(s1).append(s2)
+ * @draft ICU 76
+ */
+template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
+inline UnicodeString operator+(const UnicodeString &s1, const S &s2) {
+ return unistr_internalConcat(s1, internal::toU16StringView(s2));
+}
+#endif // U_HIDE_DRAFT_API
+
+#ifndef U_FORCE_HIDE_INTERNAL_API
+/** @internal */
+U_COMMON_API UnicodeString U_EXPORT2
+unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2);
+#endif
+
//========================================
// Inline members
//========================================
@@ -3914,18 +4227,18 @@ UnicodeString::hashCode() const
inline UBool
UnicodeString::isBogus() const
-{ return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
+{ return fUnion.fFields.fLengthAndFlags & kIsBogus; }
inline UBool
UnicodeString::isWritable() const
-{ return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); }
+{ return !(fUnion.fFields.fLengthAndFlags & (kOpenGetBuffer | kIsBogus)); }
inline UBool
UnicodeString::isBufferWritable() const
{
- return (UBool)(
+ return
!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
- (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1));
+ (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1);
}
inline const char16_t *
@@ -3950,7 +4263,7 @@ UnicodeString::doCompare(int32_t start,
int32_t srcLength) const
{
if(srcText.isBogus()) {
- return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
} else {
srcText.pinIndices(srcStart, srcLength);
return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
@@ -4057,7 +4370,7 @@ UnicodeString::doCompareCodePointOrder(int32_t start,
int32_t srcLength) const
{
if(srcText.isBogus()) {
- return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
} else {
srcText.pinIndices(srcStart, srcLength);
return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
@@ -4119,7 +4432,7 @@ UnicodeString::doCaseCompare(int32_t start,
uint32_t options) const
{
if(srcText.isBogus()) {
- return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
} else {
srcText.pinIndices(srcStart, srcLength);
return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
@@ -4531,7 +4844,7 @@ UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
inline char16_t
UnicodeString::doCharAt(int32_t offset) const
{
- if((uint32_t)offset < (uint32_t)length()) {
+ if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(length())) {
return getArrayStart()[offset];
} else {
return kInvalidUChar;
@@ -4564,7 +4877,7 @@ inline void
UnicodeString::setShortLength(int32_t len) {
// requires 0 <= len <= kMaxShortLength
fUnion.fFields.fLengthAndFlags =
- (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
+ static_cast<int16_t>((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
}
inline void
@@ -4758,7 +5071,7 @@ UnicodeString::truncate(int32_t targetLength)
// truncate(0) of a bogus string makes the string empty and non-bogus
unBogus();
return false;
- } else if((uint32_t)targetLength < (uint32_t)length()) {
+ } else if (static_cast<uint32_t>(targetLength) < static_cast<uint32_t>(length())) {
setLength(targetLength);
return true;
} else {
diff --git a/thirdparty/icu4c/common/unicode/unorm2.h b/thirdparty/icu4c/common/unicode/unorm2.h
index 3844041f17..48f614d74f 100644
--- a/thirdparty/icu4c/common/unicode/unorm2.h
+++ b/thirdparty/icu4c/common/unicode/unorm2.h
@@ -197,7 +197,6 @@ unorm2_getNFKDInstance(UErrorCode *pErrorCode);
U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
-#ifndef U_HIDE_DRAFT_API
/**
* Returns a UNormalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization
* which is equivalent to applying the NFKC_Simple_Casefold mappings and then NFC.
@@ -210,11 +209,10 @@ unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested Normalizer2, if successful
- * @draft ICU 74
+ * @stable ICU 74
*/
U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode);
-#endif // U_HIDE_DRAFT_API
/**
* Returns a UNormalizer2 instance which uses the specified data file
diff --git a/thirdparty/icu4c/common/unicode/urename.h b/thirdparty/icu4c/common/unicode/urename.h
index c94592ec03..e39c779e81 100644
--- a/thirdparty/icu4c/common/unicode/urename.h
+++ b/thirdparty/icu4c/common/unicode/urename.h
@@ -490,6 +490,7 @@
#define ubiditransform_close U_ICU_ENTRY_POINT_RENAME(ubiditransform_close)
#define ubiditransform_open U_ICU_ENTRY_POINT_RENAME(ubiditransform_open)
#define ubiditransform_transform U_ICU_ENTRY_POINT_RENAME(ubiditransform_transform)
+#define ublock_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ublock_addPropertyStarts)
#define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode)
#define ubrk_clone U_ICU_ENTRY_POINT_RENAME(ubrk_clone)
#define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close)
@@ -991,6 +992,7 @@
#define uhash_compareCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareCaselessUnicodeString)
#define uhash_compareChars U_ICU_ENTRY_POINT_RENAME(uhash_compareChars)
#define uhash_compareIChars U_ICU_ENTRY_POINT_RENAME(uhash_compareIChars)
+#define uhash_compareIStringView U_ICU_ENTRY_POINT_RENAME(uhash_compareIStringView)
#define uhash_compareLong U_ICU_ENTRY_POINT_RENAME(uhash_compareLong)
#define uhash_compareScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_compareScriptSet)
#define uhash_compareUChars U_ICU_ENTRY_POINT_RENAME(uhash_compareUChars)
@@ -1008,6 +1010,7 @@
#define uhash_hashCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashCaselessUnicodeString)
#define uhash_hashChars U_ICU_ENTRY_POINT_RENAME(uhash_hashChars)
#define uhash_hashIChars U_ICU_ENTRY_POINT_RENAME(uhash_hashIChars)
+#define uhash_hashIStringView U_ICU_ENTRY_POINT_RENAME(uhash_hashIStringView)
#define uhash_hashLong U_ICU_ENTRY_POINT_RENAME(uhash_hashLong)
#define uhash_hashScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_hashScriptSet)
#define uhash_hashUChars U_ICU_ENTRY_POINT_RENAME(uhash_hashUChars)
@@ -1209,10 +1212,14 @@
#define ulocimp_minimizeSubtags U_ICU_ENTRY_POINT_RENAME(ulocimp_minimizeSubtags)
#define ulocimp_setKeywordValue U_ICU_ENTRY_POINT_RENAME(ulocimp_setKeywordValue)
#define ulocimp_toBcpKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpKey)
+#define ulocimp_toBcpKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpKeyWithFallback)
#define ulocimp_toBcpType U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpType)
+#define ulocimp_toBcpTypeWithFallback U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpTypeWithFallback)
#define ulocimp_toLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocimp_toLanguageTag)
#define ulocimp_toLegacyKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyKey)
+#define ulocimp_toLegacyKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyKeyWithFallback)
#define ulocimp_toLegacyType U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyType)
+#define ulocimp_toLegacyTypeWithFallback U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyTypeWithFallback)
#define ultag_getTKeyStart U_ICU_ENTRY_POINT_RENAME(ultag_getTKeyStart)
#define ultag_isExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isExtensionSubtags)
#define ultag_isLanguageSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isLanguageSubtag)
@@ -1773,6 +1780,8 @@
#define uset_getSerializedRange U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRange)
#define uset_getSerializedRangeCount U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRangeCount)
#define uset_getSerializedSet U_ICU_ENTRY_POINT_RENAME(uset_getSerializedSet)
+#define uset_getString U_ICU_ENTRY_POINT_RENAME(uset_getString)
+#define uset_getStringCount U_ICU_ENTRY_POINT_RENAME(uset_getStringCount)
#define uset_hasStrings U_ICU_ENTRY_POINT_RENAME(uset_hasStrings)
#define uset_indexOf U_ICU_ENTRY_POINT_RENAME(uset_indexOf)
#define uset_isEmpty U_ICU_ENTRY_POINT_RENAME(uset_isEmpty)
@@ -1811,7 +1820,6 @@
#define usnum_setMinimumIntegerDigits U_ICU_ENTRY_POINT_RENAME(usnum_setMinimumIntegerDigits)
#define usnum_setSign U_ICU_ENTRY_POINT_RENAME(usnum_setSign)
#define usnum_setToInt64 U_ICU_ENTRY_POINT_RENAME(usnum_setToInt64)
-#define usnum_truncateStart U_ICU_ENTRY_POINT_RENAME(usnum_truncateStart)
#define usnumf_close U_ICU_ENTRY_POINT_RENAME(usnumf_close)
#define usnumf_format U_ICU_ENTRY_POINT_RENAME(usnumf_format)
#define usnumf_formatInt64 U_ICU_ENTRY_POINT_RENAME(usnumf_formatInt64)
diff --git a/thirdparty/icu4c/common/unicode/uscript.h b/thirdparty/icu4c/common/unicode/uscript.h
index 2c9cee1bfa..d983c40901 100644
--- a/thirdparty/icu4c/common/unicode/uscript.h
+++ b/thirdparty/icu4c/common/unicode/uscript.h
@@ -503,6 +503,21 @@ typedef enum UScriptCode {
/** @stable ICU 75 */
USCRIPT_ARABIC_NASTALIQ = 200, /* Aran */
+ /** @stable ICU 76 */
+ USCRIPT_GARAY = 201, /* Gara */
+ /** @stable ICU 76 */
+ USCRIPT_GURUNG_KHEMA = 202, /* Gukh */
+ /** @stable ICU 76 */
+ USCRIPT_KIRAT_RAI = 203, /* Krai */
+ /** @stable ICU 76 */
+ USCRIPT_OL_ONAL = 204, /* Onao */
+ /** @stable ICU 76 */
+ USCRIPT_SUNUWAR = 205, /* Sunu */
+ /** @stable ICU 76 */
+ USCRIPT_TODHRI = 206, /* Todr */
+ /** @stable ICU 76 */
+ USCRIPT_TULU_TIGALARI = 207, /* Tutg */
+
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UScriptCode value.
@@ -510,7 +525,7 @@ typedef enum UScriptCode {
*
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
- USCRIPT_CODE_LIMIT = 201
+ USCRIPT_CODE_LIMIT = 208
#endif // U_HIDE_DEPRECATED_API
} UScriptCode;
diff --git a/thirdparty/icu4c/common/unicode/uset.h b/thirdparty/icu4c/common/unicode/uset.h
index cce95ce921..c8f9b5592d 100644
--- a/thirdparty/icu4c/common/unicode/uset.h
+++ b/thirdparty/icu4c/common/unicode/uset.h
@@ -33,7 +33,10 @@
#include "unicode/uchar.h"
#if U_SHOW_CPLUSPLUS_API
+#include <string_view>
+#include "unicode/char16ptr.h"
#include "unicode/localpointer.h"
+#include "unicode/unistr.h"
#endif // U_SHOW_CPLUSPLUS_API
#ifndef USET_DEFINED
@@ -955,7 +958,7 @@ uset_charAt(const USet* set, int32_t charIndex);
/**
* Returns the number of characters and strings contained in this set.
- * The last (uset_getItemCount() - uset_getRangeCount()) items are strings.
+ * The last uset_getStringCount() == (uset_getItemCount() - uset_getRangeCount()) items are strings.
*
* This is slower than uset_getRangeCount() and uset_getItemCount() because
* it counts the code points of all ranges.
@@ -965,6 +968,8 @@ uset_charAt(const USet* set, int32_t charIndex);
* contained in set
* @stable ICU 2.4
* @see uset_getRangeCount
+ * @see uset_getStringCount
+ * @see uset_getItemCount
*/
U_CAPI int32_t U_EXPORT2
uset_size(const USet* set);
@@ -975,11 +980,42 @@ uset_size(const USet* set);
* @stable ICU 70
* @see uset_getItemCount
* @see uset_getItem
+ * @see uset_getStringCount
* @see uset_size
*/
U_CAPI int32_t U_EXPORT2
uset_getRangeCount(const USet *set);
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * @param set the set
+ * @return the number of strings in this set.
+ * @draft ICU 76
+ * @see uset_getRangeCount
+ * @see uset_getItemCount
+ * @see uset_size
+ */
+U_CAPI int32_t U_EXPORT2
+uset_getStringCount(const USet *set);
+
+/**
+ * Returns the index-th string (empty or multi-character) in the set.
+ * The string may not be NUL-terminated.
+ * The output length must be used, and the caller must not read more than that many UChars.
+ *
+ * @param set the set
+ * @param index the string index, 0 .. uset_getStringCount() - 1
+ * @param pLength the output string length; must not be NULL
+ * @return the pointer to the string; NULL if the index is out of range or pLength is NULL
+ * @draft ICU 76
+ * @see uset_getStringCount
+ */
+U_CAPI const UChar* U_EXPORT2
+uset_getString(const USet *set, int32_t index, int32_t *pLength);
+
+#endif // U_HIDE_DRAFT_API
+
/**
* Returns the number of items in this set. An item is either a range
* of characters or a single multicharacter string.
@@ -987,6 +1023,8 @@ uset_getRangeCount(const USet *set);
* @return a non-negative integer counting the character ranges
* and/or strings contained in set
* @stable ICU 2.4
+ * @see uset_getRangeCount
+ * @see uset_getStringCount
*/
U_CAPI int32_t U_EXPORT2
uset_getItemCount(const USet* set);
@@ -1001,6 +1039,7 @@ uset_getItemCount(const USet* set);
* If <code>itemIndex</code> is at least uset_getRangeCount() and less than uset_getItemCount(), then
* this function copies the string into <code>str[strCapacity]</code> and
* returns the length of the string (0 for the empty string).
+ * See uset_getString() for a function that does not copy the string contents.
*
* If <code>itemIndex</code> is out of range, then this function returns -1.
*
@@ -1018,6 +1057,7 @@ uset_getItemCount(const USet* set);
* @return the length of the string (0 or >= 2), or 0 if the item is a range,
* or -1 if the itemIndex is out of range
* @stable ICU 2.4
+ * @see uset_getString
*/
U_CAPI int32_t U_EXPORT2
uset_getItem(const USet* set, int32_t itemIndex,
@@ -1285,4 +1325,574 @@ U_CAPI UBool U_EXPORT2
uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
UChar32* pStart, UChar32* pEnd);
-#endif
+#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
+#ifndef U_HIDE_DRAFT_API
+
+namespace U_HEADER_ONLY_NAMESPACE {
+
+// Note: Not U_COMMON_API, and not a subclass of UMemory, because this is a header-only class,
+// not intended to be used via export from the ICU DLL.
+
+/**
+ * Iterator returned by USetCodePoints.
+ * @draft ICU 76
+ */
+class USetCodePointIterator {
+public:
+ /** @draft ICU 76 */
+ USetCodePointIterator(const USetCodePointIterator &other) = default;
+
+ /** @draft ICU 76 */
+ bool operator==(const USetCodePointIterator &other) const {
+ // No need to compare rangeCount & end given private constructor
+ // and assuming we don't compare iterators across the set being modified.
+ // And comparing rangeIndex is redundant with comparing c.
+ // We might even skip comparing uset.
+ // Unless we want operator==() to be "correct" for more than iteration.
+ return uset == other.uset && c == other.c;
+ }
+
+ /** @draft ICU 76 */
+ bool operator!=(const USetCodePointIterator &other) const { return !operator==(other); }
+
+ /** @draft ICU 76 */
+ UChar32 operator*() const { return c; }
+
+ /**
+ * Pre-increment.
+ * @draft ICU 76
+ */
+ USetCodePointIterator &operator++() {
+ if (c < end) {
+ ++c;
+ } else if (rangeIndex < rangeCount) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ int32_t result = uset_getItem(uset, rangeIndex, &c, &end, nullptr, 0, &errorCode);
+ if (U_SUCCESS(errorCode) && result == 0) {
+ ++rangeIndex;
+ } else {
+ c = end = U_SENTINEL;
+ }
+ } else {
+ c = end = U_SENTINEL;
+ }
+ return *this;
+ }
+
+ /**
+ * Post-increment.
+ * @draft ICU 76
+ */
+ USetCodePointIterator operator++(int) {
+ USetCodePointIterator result(*this);
+ operator++();
+ return result;
+ }
+
+private:
+ friend class USetCodePoints;
+
+ USetCodePointIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount)
+ : uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount),
+ c(U_SENTINEL), end(U_SENTINEL) {
+ // Fetch the first range.
+ operator++();
+ }
+
+ const USet *uset;
+ int32_t rangeIndex;
+ int32_t rangeCount;
+ UChar32 c, end;
+};
+
+/**
+ * C++ "range" for iterating over the code points of a USet.
+ *
+ * \code
+ * using U_HEADER_NESTED_NAMESPACE::USetCodePoints;
+ * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴]", -1, &errorCode));
+ * for (UChar32 c : USetCodePoints(uset.getAlias())) {
+ * printf("uset.codePoint U+%04lx\n", (long)c);
+ * }
+ * \endcode
+ *
+ * C++ UnicodeSet has member functions for iteration, including codePoints().
+ *
+ * @draft ICU 76
+ * @see USetRanges
+ * @see USetStrings
+ * @see USetElements
+ */
+class USetCodePoints {
+public:
+ /**
+ * Constructs a C++ "range" object over the code points of the USet.
+ * @draft ICU 76
+ */
+ USetCodePoints(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {}
+
+ /** @draft ICU 76 */
+ USetCodePoints(const USetCodePoints &other) = default;
+
+ /** @draft ICU 76 */
+ USetCodePointIterator begin() const {
+ return USetCodePointIterator(uset, 0, rangeCount);
+ }
+
+ /** @draft ICU 76 */
+ USetCodePointIterator end() const {
+ return USetCodePointIterator(uset, rangeCount, rangeCount);
+ }
+
+private:
+ const USet *uset;
+ int32_t rangeCount;
+};
+
+/**
+ * A contiguous range of code points in a USet/UnicodeSet.
+ * Returned by USetRangeIterator which is returned by USetRanges.
+ * Both the rangeStart and rangeEnd are in the range.
+ * (end() returns an iterator corresponding to rangeEnd+1.)
+ * @draft ICU 76
+ */
+struct CodePointRange {
+ /** @draft ICU 76 */
+ struct iterator {
+ /** @draft ICU 76 */
+ iterator(UChar32 c) : c(c) {}
+
+ /** @draft ICU 76 */
+ bool operator==(const iterator &other) const { return c == other.c; }
+ /** @draft ICU 76 */
+ bool operator!=(const iterator &other) const { return !operator==(other); }
+
+ /** @draft ICU 76 */
+ UChar32 operator*() const { return c; }
+
+ /**
+ * Pre-increment.
+ * @draft ICU 76
+ */
+ iterator &operator++() {
+ ++c;
+ return *this;
+ }
+
+ /**
+ * Post-increment.
+ * @draft ICU 76
+ */
+ iterator operator++(int) {
+ return c++;
+ }
+
+ /**
+ * The current code point in the range.
+ * @draft ICU 76
+ */
+ UChar32 c;
+ };
+
+ /** @draft ICU 76 */
+ CodePointRange(UChar32 start, UChar32 end) : rangeStart(start), rangeEnd(end) {}
+ /** @draft ICU 76 */
+ CodePointRange(const CodePointRange &other) = default;
+ /** @draft ICU 76 */
+ size_t size() const { return (rangeEnd + 1) - rangeStart; }
+ /** @draft ICU 76 */
+ iterator begin() const { return rangeStart; }
+ /** @draft ICU 76 */
+ iterator end() const { return rangeEnd + 1; }
+
+ /**
+ * Start of a USet/UnicodeSet range of code points.
+ * @draft ICU 76
+ */
+ UChar32 rangeStart;
+ /**
+ * Inclusive end of a USet/UnicodeSet range of code points.
+ * @draft ICU 76
+ */
+ UChar32 rangeEnd;
+};
+
+/**
+ * Iterator returned by USetRanges.
+ * @draft ICU 76
+ */
+class USetRangeIterator {
+public:
+ /** @draft ICU 76 */
+ USetRangeIterator(const USetRangeIterator &other) = default;
+
+ /** @draft ICU 76 */
+ bool operator==(const USetRangeIterator &other) const {
+ // No need to compare rangeCount given private constructor
+ // and assuming we don't compare iterators across the set being modified.
+ // We might even skip comparing uset.
+ // Unless we want operator==() to be "correct" for more than iteration.
+ return uset == other.uset && rangeIndex == other.rangeIndex;
+ }
+
+ /** @draft ICU 76 */
+ bool operator!=(const USetRangeIterator &other) const { return !operator==(other); }
+
+ /** @draft ICU 76 */
+ CodePointRange operator*() const {
+ if (rangeIndex < rangeCount) {
+ UChar32 start, end;
+ UErrorCode errorCode = U_ZERO_ERROR;
+ int32_t result = uset_getItem(uset, rangeIndex, &start, &end, nullptr, 0, &errorCode);
+ if (U_SUCCESS(errorCode) && result == 0) {
+ return CodePointRange(start, end);
+ }
+ }
+ return CodePointRange(U_SENTINEL, U_SENTINEL);
+ }
+
+ /**
+ * Pre-increment.
+ * @draft ICU 76
+ */
+ USetRangeIterator &operator++() {
+ ++rangeIndex;
+ return *this;
+ }
+
+ /**
+ * Post-increment.
+ * @draft ICU 76
+ */
+ USetRangeIterator operator++(int) {
+ USetRangeIterator result(*this);
+ ++rangeIndex;
+ return result;
+ }
+
+private:
+ friend class USetRanges;
+
+ USetRangeIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount)
+ : uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount) {}
+
+ const USet *uset;
+ int32_t rangeIndex;
+ int32_t rangeCount;
+};
+
+/**
+ * C++ "range" for iterating over the code point ranges of a USet.
+ *
+ * \code
+ * using U_HEADER_NESTED_NAMESPACE::USetRanges;
+ * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴]", -1, &errorCode));
+ * for (auto [start, end] : USetRanges(uset.getAlias())) {
+ * printf("uset.range U+%04lx..U+%04lx\n", (long)start, (long)end);
+ * }
+ * for (auto range : USetRanges(uset.getAlias())) {
+ * for (UChar32 c : range) {
+ * printf("uset.range.c U+%04lx\n", (long)c);
+ * }
+ * }
+ * \endcode
+ *
+ * C++ UnicodeSet has member functions for iteration, including ranges().
+ *
+ * @draft ICU 76
+ * @see USetCodePoints
+ * @see USetStrings
+ * @see USetElements
+ */
+class USetRanges {
+public:
+ /**
+ * Constructs a C++ "range" object over the code point ranges of the USet.
+ * @draft ICU 76
+ */
+ USetRanges(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {}
+
+ /** @draft ICU 76 */
+ USetRanges(const USetRanges &other) = default;
+
+ /** @draft ICU 76 */
+ USetRangeIterator begin() const {
+ return USetRangeIterator(uset, 0, rangeCount);
+ }
+
+ /** @draft ICU 76 */
+ USetRangeIterator end() const {
+ return USetRangeIterator(uset, rangeCount, rangeCount);
+ }
+
+private:
+ const USet *uset;
+ int32_t rangeCount;
+};
+
+/**
+ * Iterator returned by USetStrings.
+ * @draft ICU 76
+ */
+class USetStringIterator {
+public:
+ /** @draft ICU 76 */
+ USetStringIterator(const USetStringIterator &other) = default;
+
+ /** @draft ICU 76 */
+ bool operator==(const USetStringIterator &other) const {
+ // No need to compare count given private constructor
+ // and assuming we don't compare iterators across the set being modified.
+ // We might even skip comparing uset.
+ // Unless we want operator==() to be "correct" for more than iteration.
+ return uset == other.uset && index == other.index;
+ }
+
+ /** @draft ICU 76 */
+ bool operator!=(const USetStringIterator &other) const { return !operator==(other); }
+
+ /** @draft ICU 76 */
+ std::u16string_view operator*() const {
+ if (index < count) {
+ int32_t length;
+ const UChar *uchars = uset_getString(uset, index, &length);
+ // assert uchars != nullptr;
+ return {ConstChar16Ptr(uchars), static_cast<uint32_t>(length)};
+ }
+ return {};
+ }
+
+ /**
+ * Pre-increment.
+ * @draft ICU 76
+ */
+ USetStringIterator &operator++() {
+ ++index;
+ return *this;
+ }
+
+ /**
+ * Post-increment.
+ * @draft ICU 76
+ */
+ USetStringIterator operator++(int) {
+ USetStringIterator result(*this);
+ ++index;
+ return result;
+ }
+
+private:
+ friend class USetStrings;
+
+ USetStringIterator(const USet *uset, int32_t index, int32_t count)
+ : uset(uset), index(index), count(count) {}
+
+ const USet *uset;
+ int32_t index;
+ int32_t count;
+};
+
+/**
+ * C++ "range" for iterating over the empty and multi-character strings of a USet.
+ *
+ * \code
+ * using U_HEADER_NESTED_NAMESPACE::USetStrings;
+ * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode));
+ * for (auto s : USetStrings(uset.getAlias())) {
+ * UnicodeString us(s);
+ * std::string u8;
+ * printf("uset.string length %ld \"%s\"\n", (long)s.length(), us.toUTF8String(u8).c_str());
+ * }
+ * \endcode
+ *
+ * C++ UnicodeSet has member functions for iteration, including strings().
+ *
+ * @draft ICU 76
+ * @see USetCodePoints
+ * @see USetRanges
+ * @see USetElements
+ */
+class USetStrings {
+public:
+ /**
+ * Constructs a C++ "range" object over the strings of the USet.
+ * @draft ICU 76
+ */
+ USetStrings(const USet *uset) : uset(uset), count(uset_getStringCount(uset)) {}
+
+ /** @draft ICU 76 */
+ USetStrings(const USetStrings &other) = default;
+
+ /** @draft ICU 76 */
+ USetStringIterator begin() const {
+ return USetStringIterator(uset, 0, count);
+ }
+
+ /** @draft ICU 76 */
+ USetStringIterator end() const {
+ return USetStringIterator(uset, count, count);
+ }
+
+private:
+ const USet *uset;
+ int32_t count;
+};
+
+/**
+ * Iterator returned by USetElements.
+ * @draft ICU 76
+ */
+class USetElementIterator {
+public:
+ /** @draft ICU 76 */
+ USetElementIterator(const USetElementIterator &other) = default;
+
+ /** @draft ICU 76 */
+ bool operator==(const USetElementIterator &other) const {
+ // No need to compare rangeCount & end given private constructor
+ // and assuming we don't compare iterators across the set being modified.
+ // We might even skip comparing uset.
+ // Unless we want operator==() to be "correct" for more than iteration.
+ return uset == other.uset && c == other.c && index == other.index;
+ }
+
+ /** @draft ICU 76 */
+ bool operator!=(const USetElementIterator &other) const { return !operator==(other); }
+
+ /** @draft ICU 76 */
+ UnicodeString operator*() const {
+ if (c >= 0) {
+ return UnicodeString(c);
+ } else if (index < totalCount) {
+ int32_t length;
+ const UChar *uchars = uset_getString(uset, index - rangeCount, &length);
+ // assert uchars != nullptr;
+ return UnicodeString(uchars, length);
+ } else {
+ return UnicodeString();
+ }
+ }
+
+ /**
+ * Pre-increment.
+ * @draft ICU 76
+ */
+ USetElementIterator &operator++() {
+ if (c < end) {
+ ++c;
+ } else if (index < rangeCount) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ int32_t result = uset_getItem(uset, index, &c, &end, nullptr, 0, &errorCode);
+ if (U_SUCCESS(errorCode) && result == 0) {
+ ++index;
+ } else {
+ c = end = U_SENTINEL;
+ }
+ } else if (c >= 0) {
+ // assert index == rangeCount;
+ // Switch from the last range to the first string.
+ c = end = U_SENTINEL;
+ } else {
+ ++index;
+ }
+ return *this;
+ }
+
+ /**
+ * Post-increment.
+ * @draft ICU 76
+ */
+ USetElementIterator operator++(int) {
+ USetElementIterator result(*this);
+ operator++();
+ return result;
+ }
+
+private:
+ friend class USetElements;
+
+ USetElementIterator(const USet *uset, int32_t index, int32_t rangeCount, int32_t totalCount)
+ : uset(uset), index(index), rangeCount(rangeCount), totalCount(totalCount),
+ c(U_SENTINEL), end(U_SENTINEL) {
+ if (index < rangeCount) {
+ // Fetch the first range.
+ operator++();
+ }
+ // Otherwise don't move beyond the (index - rangeCount)-th string.
+ }
+
+ const USet *uset;
+ int32_t index;
+ /** Number of UnicodeSet/USet code point ranges. */
+ int32_t rangeCount;
+ /**
+ * Number of code point ranges plus number of strings.
+ * index starts from 0, counts ranges while less than rangeCount,
+ * then counts strings while at least rangeCount and less than totalCount.
+ *
+ * Note that totalCount is the same as uset_getItemCount(), but usually
+ * smaller than the number of elements returned by this iterator
+ * because we return each code point of each range.
+ */
+ int32_t totalCount;
+ UChar32 c, end;
+};
+
+/**
+ * A C++ "range" for iterating over all of the elements of a USet.
+ * Convenient all-in one iteration, but creates a UnicodeString for each
+ * code point or string.
+ *
+ * Code points are returned first, then empty and multi-character strings.
+ *
+ * \code
+ * using U_HEADER_NESTED_NAMESPACE::USetElements;
+ * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode));
+ * for (auto el : USetElements(uset.getAlias())) {
+ * std::string u8;
+ * printf("uset.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str());
+ * }
+ * \endcode
+ *
+ * C++ UnicodeSet has member functions for iteration, including begin() and end().
+ *
+ * @return an all-elements iterator.
+ * @draft ICU 76
+ * @see USetCodePoints
+ * @see USetRanges
+ * @see USetStrings
+ */
+class USetElements {
+public:
+ /**
+ * Constructs a C++ "range" object over all of the elements of the USet.
+ * @draft ICU 76
+ */
+ USetElements(const USet *uset)
+ : uset(uset), rangeCount(uset_getRangeCount(uset)),
+ stringCount(uset_getStringCount(uset)) {}
+
+ /** @draft ICU 76 */
+ USetElements(const USetElements &other) = default;
+
+ /** @draft ICU 76 */
+ USetElementIterator begin() const {
+ return USetElementIterator(uset, 0, rangeCount, rangeCount + stringCount);
+ }
+
+ /** @draft ICU 76 */
+ USetElementIterator end() const {
+ return USetElementIterator(uset, rangeCount + stringCount, rangeCount, rangeCount + stringCount);
+ }
+
+private:
+ const USet *uset;
+ int32_t rangeCount, stringCount;
+};
+
+} // namespace U_HEADER_ONLY_NAMESPACE
+
+#endif // U_HIDE_DRAFT_API
+#endif // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
+
+#endif // __USET_H__
diff --git a/thirdparty/icu4c/common/unicode/ustring.h b/thirdparty/icu4c/common/unicode/ustring.h
index 03c697c722..a3d08254d6 100644
--- a/thirdparty/icu4c/common/unicode/ustring.h
+++ b/thirdparty/icu4c/common/unicode/ustring.h
@@ -1107,8 +1107,8 @@ u_strToLower(UChar *dest, int32_t destCapacity,
* styles, using rules and dictionaries beyond the standard iterators.
* It may be more efficient to always provide an iterator to avoid
* opening and closing one for each string.
- * The standard titlecase iterator for the root locale implements the
- * algorithm of Unicode TR 21.
+ * If the break iterator passed in is null, the default Unicode algorithm
+ * will be used to determine the titlecase positions.
*
* This function uses only the setText(), first() and next() methods of the
* provided break iterator.
diff --git a/thirdparty/icu4c/common/unicode/utypes.h b/thirdparty/icu4c/common/unicode/utypes.h
index 6c7cc51345..0151ebd470 100644
--- a/thirdparty/icu4c/common/unicode/utypes.h
+++ b/thirdparty/icu4c/common/unicode/utypes.h
@@ -54,22 +54,37 @@
* integer and other types.
*/
+/** @{ API visibility control */
/**
* \def U_SHOW_CPLUSPLUS_API
+ * When defined to 1 (=default) and compiled with a C++ compiler, both C and C++ APIs are visible.
+ * Otherwise, only C APIs are visible; this is for C++ users who want to
+ * restrict their usage to binary stable C APIs exported by ICU DLLs.
+ * @internal
+ */
+/**
+ * \def U_SHOW_CPLUSPLUS_HEADER_API
+ * When defined to 1 (=default) and compiled with a C++ compiler, C++ header-only APIs are visible.
+ * This is for C++ users who restrict their usage to binary stable C APIs exported by ICU DLLs
+ * (U_SHOW_CPLUSPLUS_API=0)
+ * but who still want to use C++ header-only APIs which do not rely on ICU DLL exports.
* @internal
*/
#ifdef __cplusplus
# ifndef U_SHOW_CPLUSPLUS_API
# define U_SHOW_CPLUSPLUS_API 1
# endif
+# ifndef U_SHOW_CPLUSPLUS_HEADER_API
+# define U_SHOW_CPLUSPLUS_HEADER_API 1
+# endif
#else
# undef U_SHOW_CPLUSPLUS_API
# define U_SHOW_CPLUSPLUS_API 0
+# undef U_SHOW_CPLUSPLUS_HEADER_API
+# define U_SHOW_CPLUSPLUS_HEADER_API 0
#endif
-/** @{ API visibility control */
-
/**
* \def U_HIDE_DRAFT_API
* Define this to 1 to request that draft API be "hidden"
@@ -582,14 +597,13 @@ typedef enum UErrorCode {
U_MF_MISSING_SELECTOR_ANNOTATION_ERROR, /**< A selector expression evaluates to an unannotated operand. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
U_MF_DUPLICATE_DECLARATION_ERROR, /**< The same variable is declared in more than one .local or .input declaration. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
U_MF_OPERAND_MISMATCH_ERROR, /**< An operand provided to a function does not have the required form for that function @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
- U_MF_UNSUPPORTED_STATEMENT_ERROR, /**< A message includes a reserved statement. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
- U_MF_UNSUPPORTED_EXPRESSION_ERROR, /**< A message includes syntax reserved for future standardization or private implementation use. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
+ U_MF_DUPLICATE_VARIANT_ERROR, /**< A message includes a variant with the same key list as another variant. @internal ICU 76 technology preview @deprecated This API is for technology preview only. */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal formatting API error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
- U_FMT_PARSE_ERROR_LIMIT = 0x10121,
+ U_FMT_PARSE_ERROR_LIMIT = 0x10120,
#endif // U_HIDE_DEPRECATED_API
/*
@@ -713,13 +727,13 @@ typedef enum UErrorCode {
* @stable ICU 2.0
*/
static
- inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); }
+ inline UBool U_SUCCESS(UErrorCode code) { return code <= U_ZERO_ERROR; }
/**
* Does the error code indicate a failure?
* @stable ICU 2.0
*/
static
- inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); }
+ inline UBool U_FAILURE(UErrorCode code) { return code > U_ZERO_ERROR; }
#else
/**
* Does the error code indicate success?
diff --git a/thirdparty/icu4c/common/unicode/uvernum.h b/thirdparty/icu4c/common/unicode/uvernum.h
index eb7c50f385..a3cb882623 100644
--- a/thirdparty/icu4c/common/unicode/uvernum.h
+++ b/thirdparty/icu4c/common/unicode/uvernum.h
@@ -53,7 +53,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
-#define U_ICU_VERSION_MAJOR_NUM 75
+#define U_ICU_VERSION_MAJOR_NUM 76
/** The current ICU minor version as an integer.
* This value will change in the subsequent releases of ICU
@@ -79,7 +79,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
-#define U_ICU_VERSION_SUFFIX _75
+#define U_ICU_VERSION_SUFFIX _76
/**
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
@@ -132,7 +132,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
-#define U_ICU_VERSION "75.1"
+#define U_ICU_VERSION "76.1"
/**
* The current ICU library major version number as a string, for library name suffixes.
@@ -145,13 +145,13 @@
*
* @stable ICU 2.6
*/
-#define U_ICU_VERSION_SHORT "75"
+#define U_ICU_VERSION_SHORT "76"
#ifndef U_HIDE_INTERNAL_API
/** Data version in ICU4C.
* @internal ICU 4.4 Internal Use Only
**/
-#define U_ICU_DATA_VERSION "75.1"
+#define U_ICU_DATA_VERSION "76.1"
#endif /* U_HIDE_INTERNAL_API */
/*===========================================================================
diff --git a/thirdparty/icu4c/common/unicode/uversion.h b/thirdparty/icu4c/common/unicode/uversion.h
index 113568df8c..25d73a3aeb 100644
--- a/thirdparty/icu4c/common/unicode/uversion.h
+++ b/thirdparty/icu4c/common/unicode/uversion.h
@@ -124,6 +124,49 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
# if U_USING_ICU_NAMESPACE
U_NAMESPACE_USE
# endif
+
+#ifndef U_HIDE_DRAFT_API
+/**
+ * \def U_HEADER_NESTED_NAMESPACE
+ * Nested namespace used inside U_ICU_NAMESPACE for header-only APIs.
+ * Different when used inside ICU to prevent public use of internal instantiations:
+ * "header" when compiling calling code; "internal" when compiling ICU library code.
+ *
+ * When compiling for Windows, where DLL exports of APIs are explicit,
+ * this is always "header". Header-only types are not marked for export,
+ * which on Windows already avoids callers linking with library instantiations.
+ *
+ * @draft ICU 76
+ * @see U_HEADER_ONLY_NAMESPACE
+ */
+
+/**
+ * \def U_HEADER_ONLY_NAMESPACE
+ * Namespace used for header-only APIs.
+ * Different when used inside ICU to prevent public use of internal instantiations.
+ * "U_ICU_NAMESPACE::header" or "U_ICU_NAMESPACE::internal",
+ * see U_HEADER_NESTED_NAMESPACE for details.
+ *
+ * @draft ICU 76
+ */
+
+// The first test is the same as for defining U_EXPORT for Windows.
+#if defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllexport__) && \
+ UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllimport__))
+# define U_HEADER_NESTED_NAMESPACE header
+#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
+ defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \
+ defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION)
+# define U_HEADER_NESTED_NAMESPACE internal
+#else
+# define U_HEADER_NESTED_NAMESPACE header
+#endif
+
+#define U_HEADER_ONLY_NAMESPACE U_ICU_NAMESPACE::U_HEADER_NESTED_NAMESPACE
+
+namespace U_HEADER_ONLY_NAMESPACE {}
+#endif // U_HIDE_DRAFT_API
+
#endif /* __cplusplus */
/*===========================================================================*/