diff options
Diffstat (limited to 'thirdparty/icu4c/common/unicode/uniset.h')
-rw-r--r-- | thirdparty/icu4c/common/unicode/uniset.h | 118 |
1 files changed, 115 insertions, 3 deletions
diff --git a/thirdparty/icu4c/common/unicode/uniset.h b/thirdparty/icu4c/common/unicode/uniset.h index 127e4ce063..d070fd631a 100644 --- a/thirdparty/icu4c/common/unicode/uniset.h +++ b/thirdparty/icu4c/common/unicode/uniset.h @@ -313,7 +313,7 @@ private: char16_t *pat = nullptr; int32_t patLen = 0; - UVector* strings = nullptr; // maintained in sorted order + UVector* strings_ = nullptr; // maintained in sorted order UnicodeSetStringSpan *stringSpan = nullptr; /** @@ -1102,6 +1102,118 @@ public: */ UChar32 charAt(int32_t index) const; +#ifndef U_HIDE_DRAFT_API + /** + * Returns a C++ "range" for iterating over the code points of this set. + * + * \code + * UnicodeSet set(u"[abcçカ🚴]", errorCode); + * for (UChar32 c : set.codePoints()) { + * printf("set.codePoint U+%04lx\n", (long)c); + * } + * \endcode + * + * @return a "range" object for iterating over the code points of this set. + * @draft ICU 76 + * @see ranges + * @see strings + * @see begin + * @see end + */ + inline U_HEADER_NESTED_NAMESPACE::USetCodePoints codePoints() const { + return U_HEADER_NESTED_NAMESPACE::USetCodePoints(toUSet()); + } + + /** + * Returns a C++ "range" for iterating over the code point ranges of this set. + * + * \code + * UnicodeSet set(u"[abcçカ🚴]", errorCode); + * for (auto [start, end] : set.ranges()) { + * printf("set.range U+%04lx..U+%04lx\n", (long)start, (long)end); + * } + * for (auto range : set.ranges()) { + * for (UChar32 c : range) { + * printf("set.range.c U+%04lx\n", (long)c); + * } + * } + * \endcode + * + * @return a "range" object for iterating over the code point ranges of this set. + * @draft ICU 76 + * @see codePoints + * @see strings + * @see begin + * @see end + */ + inline U_HEADER_NESTED_NAMESPACE::USetRanges ranges() const { + return U_HEADER_NESTED_NAMESPACE::USetRanges(toUSet()); + } + + /** + * Returns a C++ "range" for iterating over the empty and multi-character strings of this set. + * Returns each string as a std::u16string_view without copying its contents. + * + * \code + * UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode); + * for (auto s : set.strings()) { + * UnicodeString us(s); + * std::string u8; + * printf("set.string length %ld \"%s\"\n", (long)s.length(), us.toUTF8String(u8).c_str()); + * } + * \endcode + * + * @return a "range" object for iterating over the strings of this set. + * @draft ICU 76 + * @see codePoints + * @see ranges + * @see begin + * @see end + */ + inline U_HEADER_NESTED_NAMESPACE::USetStrings strings() const { + return U_HEADER_NESTED_NAMESPACE::USetStrings(toUSet()); + } + + /** + * Returns a C++ iterator for iterating over all of the elements of this set. + * Convenient all-in one iteration, but creates a UnicodeString for each + * code point or string. + * (Similar to how Java UnicodeSet *is an* Iterable<String>.) + * + * Code points are returned first, then empty and multi-character strings. + * + * \code + * UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode); + * for (auto el : set) { + * std::string u8; + * printf("set.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str()); + * } + * \endcode + * + * @return an all-elements iterator. + * @draft ICU 76 + * @see end + * @see codePoints + * @see ranges + * @see strings + */ + inline U_HEADER_NESTED_NAMESPACE::USetElementIterator begin() const { + return U_HEADER_NESTED_NAMESPACE::USetElements(toUSet()).begin(); + } + + /** + * @return an exclusive-end sentinel for iterating over all of the elements of this set. + * @draft ICU 76 + * @see begin + * @see codePoints + * @see ranges + * @see strings + */ + inline U_HEADER_NESTED_NAMESPACE::USetElementIterator end() const { + return U_HEADER_NESTED_NAMESPACE::USetElements(toUSet()).end(); + } +#endif // U_HIDE_DRAFT_API + /** * Adds the specified range to this set if it is not already * present. If this set already contains the specified range, @@ -1731,7 +1843,7 @@ inline bool UnicodeSet::operator!=(const UnicodeSet& o) const { } inline UBool UnicodeSet::isFrozen() const { - return (UBool)(bmpSet!=nullptr || stringSpan!=nullptr); + return bmpSet != nullptr || stringSpan != nullptr; } inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const { @@ -1747,7 +1859,7 @@ inline UBool UnicodeSet::containsSome(const UnicodeString& s) const { } inline UBool UnicodeSet::isBogus() const { - return (UBool)(fFlags & kIsBogus); + return fFlags & kIsBogus; } inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) { |