diff --git a/packages/php-wasm/compile/Makefile b/packages/php-wasm/compile/Makefile index bad1ddd21a..97b114afe9 100644 --- a/packages/php-wasm/compile/Makefile +++ b/packages/php-wasm/compile/Makefile @@ -235,9 +235,26 @@ libcurl/jspi/dist/root/lib/lib/libcurl.a: base-image libz libopenssl docker cp $$(docker create playground-php-wasm:libcurl):/root/curl-7.69.1/lib/.libs ./libcurl/jspi/dist/root/lib/lib docker cp $$(docker create playground-php-wasm:libcurl):/root/curl-7.69.1/include/ ./libcurl/jspi/dist/root/lib +libintl_asyncify: libintl/asyncify/dist/root/lib/lib/libintl.a +libintl/asyncify/dist/root/lib/lib/libintl.a: base-image + mkdir -p ./libintl/asyncify/dist/root/lib + docker build -f ./libintl/Dockerfile -t playground-php-wasm:libintl . --progress=plain + docker cp $$(docker create playground-php-wasm:libintl):/root/lib/lib ./libintl/asyncify/dist/root/lib + docker cp $$(docker create playground-php-wasm:libintl):/root/lib/include ./libintl/asyncify/dist/root/lib + docker cp $$(docker create playground-php-wasm:libintl):/root/lib/data/. ./libintl/ + + +libintl_jspi: libintl/jspi/dist/root/lib/lib/libintl.a +libintl/jspi/dist/root/lib/lib/libintl.a: base-image + mkdir -p ./libintl/jspi/dist/root/lib + docker build -f ./libintl/Dockerfile -t playground-php-wasm:libintl . --progress=plain --build-arg JSPI=1 + docker cp $$(docker create playground-php-wasm:libintl):/root/lib/lib ./libintl/jspi/dist/root/lib + docker cp $$(docker create playground-php-wasm:libintl):/root/lib/include ./libintl/jspi/dist/root/lib + docker cp $$(docker create playground-php-wasm:libintl):/root/lib/data/. ./libintl/ + all: all_jspi all_asyncify -all_jspi: libz_jspi libzip_jspi libpng16_jspi libjpeg_jspi libwebp_jspi libxml2_jspi libopenssl_jspi libsqlite3_jspi libiconv_jspi bison2.7 oniguruma_jspi libcurl_jspi -all_asyncify: libz_asyncify libzip_asyncify libpng16_asyncify libjpeg_asyncify libwebp_asyncify libxml2_asyncify libopenssl_asyncify libsqlite3_asyncify libiconv_asyncify bison2.7 oniguruma_asyncify libcurl_asyncify +all_jspi: libz_jspi libzip_jspi libpng16_jspi libjpeg_jspi libwebp_jspi libxml2_jspi libopenssl_jspi libsqlite3_jspi libiconv_jspi bison2.7 oniguruma_jspi libcurl_jspi libintl_jspi +all_asyncify: libz_asyncify libzip_asyncify libpng16_asyncify libjpeg_asyncify libwebp_asyncify libxml2_asyncify libopenssl_asyncify libsqlite3_asyncify libiconv_asyncify bison2.7 oniguruma_asyncify libcurl_asyncify libintl_asyncify clean: rm -rf ./libz/jspi/dist @@ -261,3 +278,5 @@ clean: rm -rf ./bison2.7/dist rm -rf ./oniguruma/jspi/dist rm -rf ./oniguruma/asyncify/dist + rm -rf ./libintl/jspi/dist + rm -rf ./libintl/asyncify/dist diff --git a/packages/php-wasm/compile/build.js b/packages/php-wasm/compile/build.js index 6ec2509145..93fe9d89c6 100644 --- a/packages/php-wasm/compile/build.js +++ b/packages/php-wasm/compile/build.js @@ -61,6 +61,11 @@ const argParser = yargs(process.argv.slice(2)) choices: ['yes', 'no'], description: 'Build with mbregex support', }, + WITH_INTL: { + type: 'string', + choices: ['yes', 'no'], + description: 'Build with intl support', + }, WITH_CLI_SAPI: { type: 'string', choices: ['yes', 'no'], @@ -139,6 +144,7 @@ const platformDefaults = { WITH_GD: 'yes', WITH_MBSTRING: 'yes', WITH_MBREGEX: 'yes', + WITH_INTL: 'yes', WITH_OPENSSL: 'yes', WITH_WS_NETWORKING_PROXY: 'yes', }, @@ -205,6 +211,8 @@ await asyncSpawn( '--build-arg', getArg('WITH_MBREGEX'), '--build-arg', + getArg('WITH_INTL'), + '--build-arg', getArg('WITH_CLI_SAPI'), '--build-arg', getArg('WITH_OPENSSL'), @@ -256,6 +264,20 @@ await asyncSpawn( { cwd: sourceDir, stdio: 'inherit' } ); +// Copy data files +const libDir = path.resolve(process.cwd(), 'packages/php-wasm/compile'); +const publicDir = + platform === 'node' + ? `${path.dirname(outputDir)}` + : `${path.dirname(path.dirname(outputDir))}`; +if (getArg('WITH_INTL').endsWith('yes')) { + await asyncSpawn( + 'cp', + [`${libDir}/libintl/icudt74l.dat`, `${publicDir}/shared/icudt74l.dat`], + { cwd: sourceDir, stdio: 'inherit' } + ); +} + const _args = args; function asyncSpawn(...args) { diff --git a/packages/php-wasm/compile/libintl/Dockerfile b/packages/php-wasm/compile/libintl/Dockerfile new file mode 100644 index 0000000000..75cadc3e37 --- /dev/null +++ b/packages/php-wasm/compile/libintl/Dockerfile @@ -0,0 +1,50 @@ +FROM playground-php-wasm:base + + +ARG JSPI + + +RUN set -euxo pipefail && \ + wget https://github.com/unicode-org/icu/releases/download/release-74-2/icu4c-74_2-src.tgz && \ + tar -xvf icu4c-74_2-src.tgz && \ + rm -rf /root/icu/source/data/ && \ + rm icu4c-74_2-src.tgz + + +RUN set -euxo pipefail && \ + wget https://github.com/unicode-org/icu/releases/download/release-74-2/icu4c-74_2-data.zip && \ + unzip icu4c-74_2-data.zip -d /root/icu/source && \ + rm icu4c-74_2-data.zip + + +RUN set -euxo pipefail && \ + mkdir -p /root/native && \ + cd /root/native && \ + /root/icu/source/runConfigureICU Linux \ + --disable-shared \ + --enable-static && \ + make -j"$(nproc)" && \ + make install + + +RUN set -euxo pipefail && \ + cd /root/icu/source && \ + mkdir -p /root/lib && \ + source /root/emsdk/emsdk_env.sh && \ + emconfigure ./configure \ + --build=i386-pc-linux-gnu \ + --target=wasm32-unknown-emscripten \ + --prefix=/root/lib \ + --with-cross-build=/root/native \ + --with-data-packaging=archive \ + --disable-extras \ + --disable-shared \ + --enable-static && \ + export JSPI_FLAGS=$(if [ "$JSPI" = "1" ]; then echo "-sSUPPORT_LONGJMP=wasm -fwasm-exceptions"; else echo ""; fi) && \ + EMCC_FLAGS=" -sSIDE_MODULE -Wl,--wrap=select $JSPI_FLAGS " emmake make -j"$(nproc)" && \ + EMCC_FLAGS=" -sSIDE_MODULE -Wl,--wrap=select $JSPI_FLAGS " emmake make install -i; + + +RUN set -euxo pipefail && \ + mkdir -p /root/lib/data && \ + mv /root/lib/share/icu/74.2/icudt74l.dat /root/lib/data diff --git a/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/alphaindex.h b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/alphaindex.h new file mode 100644 index 0000000000..cbce212717 --- /dev/null +++ b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/alphaindex.h @@ -0,0 +1,766 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2011-2014 International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +*/ + +#ifndef INDEXCHARS_H +#define INDEXCHARS_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" +#include "unicode/locid.h" +#include "unicode/unistr.h" + +#if !UCONFIG_NO_COLLATION + +/** + * \file + * \brief C++ API: Index Characters + */ + +U_CDECL_BEGIN + +/** + * Constants for Alphabetic Index Label Types. + * The form of these enum constants anticipates having a plain C API + * for Alphabetic Indexes that will also use them. + * @stable ICU 4.8 + */ +typedef enum UAlphabeticIndexLabelType { + /** + * Normal Label, typically the starting letter of the names + * in the bucket with this label. + * @stable ICU 4.8 + */ + U_ALPHAINDEX_NORMAL = 0, + + /** + * Underflow Label. The bucket with this label contains names + * in scripts that sort before any of the bucket labels in this index. + * @stable ICU 4.8 + */ + U_ALPHAINDEX_UNDERFLOW = 1, + + /** + * Inflow Label. The bucket with this label contains names + * in scripts that sort between two of the bucket labels in this index. + * Inflow labels are created when an index contains normal labels for + * multiple scripts, and skips other scripts that sort between some of the + * included scripts. + * @stable ICU 4.8 + */ + U_ALPHAINDEX_INFLOW = 2, + + /** + * Overflow Label. The bucket with this label contains names in scripts + * that sort after all of the bucket labels in this index. + * @stable ICU 4.8 + */ + U_ALPHAINDEX_OVERFLOW = 3 +} UAlphabeticIndexLabelType; + + +struct UHashtable; +U_CDECL_END + +U_NAMESPACE_BEGIN + +// Forward Declarations + +class BucketList; +class Collator; +class RuleBasedCollator; +class StringEnumeration; +class UnicodeSet; +class UVector; + +/** + * AlphabeticIndex supports the creation of a UI index appropriate for a given language. + * It can support either direct use, or use with a client that doesn't support localized collation. + * The following is an example of what an index might look like in a UI: + * + *
+ *  ... A B C D E F G H I J K L M N O P Q R S T U V W X Y Z  ...
+ *
+ *  A
+ *     Addison
+ *     Albertson
+ *     Azensky
+ *  B
+ *     Baker
+ *  ...
+ * 
+ * + * The class can generate a list of labels for use as a UI "index", that is, a list of + * clickable characters (or character sequences) that allow the user to see a segment + * (bucket) of a larger "target" list. That is, each label corresponds to a bucket in + * the target list, where everything in the bucket is greater than or equal to the character + * (according to the locale's collation). Strings can be added to the index; + * they will be in sorted order in the right bucket. + *

+ * The class also supports having buckets for strings before the first (underflow), + * after the last (overflow), and between scripts (inflow). For example, if the index + * is constructed with labels for Russian and English, Greek characters would fall + * into an inflow bucket between the other two scripts. + *

+ * The AlphabeticIndex class is not intended for public subclassing. + * + *

Note: If you expect to have a lot of ASCII or Latin characters + * as well as characters from the user's language, + * then it is a good idea to call addLabels(Locale::getEnglish(), status).

+ * + *

Direct Use

+ *

The following shows an example of building an index directly. + * The "show..." methods below are just to illustrate usage. + * + *

+ * // Create a simple index.  "Item" is assumed to be an application
+ * // defined type that the application's UI and other processing knows about,
+ * //  and that has a name.
+ *
+ * UErrorCode status = U_ZERO_ERROR;
+ * AlphabeticIndex index = new AlphabeticIndex(desiredLocale, status);
+ * index->addLabels(additionalLocale, status);
+ * for (Item *item in some source of Items ) {
+ *     index->addRecord(item->name(), item, status);
+ * }
+ * ...
+ * // Show index at top. We could skip or gray out empty buckets
+ *
+ * while (index->nextBucket(status)) {
+ *     if (showAll || index->getBucketRecordCount() != 0) {
+ *         showLabelAtTop(UI, index->getBucketLabel());
+ *     }
+ * }
+ *  ...
+ * // Show the buckets with their contents, skipping empty buckets
+ *
+ * index->resetBucketIterator(status);
+ * while (index->nextBucket(status)) {
+ *    if (index->getBucketRecordCount() != 0) {
+ *        showLabelInList(UI, index->getBucketLabel());
+ *        while (index->nextRecord(status)) {
+ *            showIndexedItem(UI, static_cast(index->getRecordData()))
+ * 
+ * + * The caller can build different UIs using this class. + * For example, an index character could be omitted or grayed-out + * if its bucket is empty. Small buckets could also be combined based on size, such as: + * + *
+ * ... A-F G-N O-Z ...
+ * 
+ * + *

Client Support

+ *

Callers can also use the AlphabeticIndex::ImmutableIndex, or the AlphabeticIndex itself, + * to support sorting on a client that doesn't support AlphabeticIndex functionality. + * + *

The ImmutableIndex is both immutable and thread-safe. + * The corresponding AlphabeticIndex methods are not thread-safe because + * they "lazily" build the index buckets. + *

+ * + * @stable ICU 4.8 + */ +class U_I18N_API AlphabeticIndex: public UObject { +public: + /** + * An index "bucket" with a label string and type. + * It is referenced by getBucketIndex(), + * and returned by ImmutableIndex.getBucket(). + * + * The Bucket class is not intended for public subclassing. + * @stable ICU 51 + */ + class U_I18N_API Bucket : public UObject { + public: + /** + * Destructor. + * @stable ICU 51 + */ + virtual ~Bucket(); + + /** + * Returns the label string. + * + * @return the label string for the bucket + * @stable ICU 51 + */ + const UnicodeString &getLabel() const { return label_; } + /** + * Returns whether this bucket is a normal, underflow, overflow, or inflow bucket. + * + * @return the bucket label type + * @stable ICU 51 + */ + UAlphabeticIndexLabelType getLabelType() const { return labelType_; } + + private: + friend class AlphabeticIndex; + friend class BucketList; + + UnicodeString label_; + UnicodeString lowerBoundary_; + UAlphabeticIndexLabelType labelType_; + Bucket *displayBucket_; + int32_t displayIndex_; + UVector *records_; // Records are owned by the inputList_ vector. + + Bucket(const UnicodeString &label, // Parameter strings are copied. + const UnicodeString &lowerBoundary, + UAlphabeticIndexLabelType type); + }; + + /** + * Immutable, thread-safe version of AlphabeticIndex. + * This class provides thread-safe methods for bucketing, + * and random access to buckets and their properties, + * but does not offer adding records to the index. + * + * The ImmutableIndex class is not intended for public subclassing. + * + * @stable ICU 51 + */ + class U_I18N_API ImmutableIndex : public UObject { + public: + /** + * Destructor. + * @stable ICU 51 + */ + virtual ~ImmutableIndex(); + + /** + * Returns the number of index buckets and labels, including underflow/inflow/overflow. + * + * @return the number of index buckets + * @stable ICU 51 + */ + int32_t getBucketCount() const; + + /** + * Finds the index bucket for the given name and returns the number of that bucket. + * Use getBucket() to get the bucket's properties. + * + * @param name the string to be sorted into an index bucket + * @param errorCode Error code, will be set with the reason if the + * operation fails. + * @return the bucket number for the name + * @stable ICU 51 + */ + int32_t getBucketIndex(const UnicodeString &name, UErrorCode &errorCode) const; + + /** + * Returns the index-th bucket. Returns nullptr if the index is out of range. + * + * @param index bucket number + * @return the index-th bucket + * @stable ICU 51 + */ + const Bucket *getBucket(int32_t index) const; + + private: + friend class AlphabeticIndex; + + ImmutableIndex(BucketList *bucketList, Collator *collatorPrimaryOnly) + : buckets_(bucketList), collatorPrimaryOnly_(collatorPrimaryOnly) {} + + BucketList *buckets_; + Collator *collatorPrimaryOnly_; + }; + + /** + * Construct an AlphabeticIndex object for the specified locale. If the locale's + * data does not include index characters, a set of them will be + * synthesized based on the locale's exemplar characters. The locale + * determines the sorting order for both the index characters and the + * user item names appearing under each Index character. + * + * @param locale the desired locale. + * @param status Error code, will be set with the reason if the construction + * of the AlphabeticIndex object fails. + * @stable ICU 4.8 + */ + AlphabeticIndex(const Locale &locale, UErrorCode &status); + + /** + * Construct an AlphabeticIndex that uses a specific collator. + * + * The index will be created with no labels; the addLabels() function must be called + * after creation to add the desired labels to the index. + * + * The index adopts the collator, and is responsible for deleting it. + * The caller should make no further use of the collator after creating the index. + * + * @param collator The collator to use to order the contents of this index. + * @param status Error code, will be set with the reason if the + * operation fails. + * @stable ICU 51 + */ + AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status); + + /** + * Add Labels to this Index. The labels are additions to those + * that are already in the index; they do not replace the existing + * ones. + * @param additions The additional characters to add to the index, such as A-Z. + * @param status Error code, will be set with the reason if the + * operation fails. + * @return this, for chaining + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &addLabels(const UnicodeSet &additions, UErrorCode &status); + + /** + * Add the index characters from a Locale to the index. The labels + * are added to those that are already in the index; they do not replace the + * existing index characters. The collation order for this index is not + * changed; it remains that of the locale that was originally specified + * when creating this Index. + * + * @param locale The locale whose index characters are to be added. + * @param status Error code, will be set with the reason if the + * operation fails. + * @return this, for chaining + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &addLabels(const Locale &locale, UErrorCode &status); + + /** + * Destructor + * @stable ICU 4.8 + */ + virtual ~AlphabeticIndex(); + + /** + * Builds an immutable, thread-safe version of this instance, without data records. + * + * @return an immutable index instance + * @stable ICU 51 + */ + ImmutableIndex *buildImmutableIndex(UErrorCode &errorCode); + + /** + * Get the Collator that establishes the ordering of the items in this index. + * Ownership of the collator remains with the AlphabeticIndex instance. + * + * The returned collator is a reference to the internal collator used by this + * index. It may be safely used to compare the names of items or to get + * sort keys for names. However if any settings need to be changed, + * or other non-const methods called, a cloned copy must be made first. + * + * @return The collator + * @stable ICU 4.8 + */ + virtual const RuleBasedCollator &getCollator() const; + + + /** + * Get the default label used for abbreviated buckets *between* other index characters. + * For example, consider the labels when Latin (X Y Z) and Greek (Α Β Γ) are used: + * + * X Y Z ... Α Β Γ. + * + * @return inflow label + * @stable ICU 4.8 + */ + virtual const UnicodeString &getInflowLabel() const; + + /** + * Set the default label used for abbreviated buckets between other index characters. + * An inflow label will be automatically inserted if two otherwise-adjacent label characters + * are from different scripts, e.g. Latin and Cyrillic, and a third script, e.g. Greek, + * sorts between the two. The default inflow character is an ellipsis (...) + * + * @param inflowLabel the new Inflow label. + * @param status Error code, will be set with the reason if the operation fails. + * @return this + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &setInflowLabel(const UnicodeString &inflowLabel, UErrorCode &status); + + + /** + * Get the special label used for items that sort after the last normal label, + * and that would not otherwise have an appropriate label. + * + * @return the overflow label + * @stable ICU 4.8 + */ + virtual const UnicodeString &getOverflowLabel() const; + + + /** + * Set the label used for items that sort after the last normal label, + * and that would not otherwise have an appropriate label. + * + * @param overflowLabel the new overflow label. + * @param status Error code, will be set with the reason if the operation fails. + * @return this + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &setOverflowLabel(const UnicodeString &overflowLabel, UErrorCode &status); + + /** + * Get the special label used for items that sort before the first normal label, + * and that would not otherwise have an appropriate label. + * + * @return underflow label + * @stable ICU 4.8 + */ + virtual const UnicodeString &getUnderflowLabel() const; + + /** + * Set the label used for items that sort before the first normal label, + * and that would not otherwise have an appropriate label. + * + * @param underflowLabel the new underflow label. + * @param status Error code, will be set with the reason if the operation fails. + * @return this + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &setUnderflowLabel(const UnicodeString &underflowLabel, UErrorCode &status); + + + /** + * Get the limit on the number of labels permitted in the index. + * The number does not include over, under and inflow labels. + * + * @return maxLabelCount maximum number of labels. + * @stable ICU 4.8 + */ + virtual int32_t getMaxLabelCount() const; + + /** + * Set a limit on the number of labels permitted in the index. + * The number does not include over, under and inflow labels. + * Currently, if the number is exceeded, then every + * nth item is removed to bring the count down. + * A more sophisticated mechanism may be available in the future. + * + * @param maxLabelCount the maximum number of labels. + * @param status error code + * @return This, for chaining + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status); + + + /** + * Add a record to the index. Each record will be associated with an index Bucket + * based on the record's name. The list of records for each bucket will be sorted + * based on the collation ordering of the names in the index's locale. + * Records with duplicate names are permitted; they will be kept in the order + * that they were added. + * + * @param name The display name for the Record. The Record will be placed in + * a bucket based on this name. + * @param data An optional pointer to user data associated with this + * item. When iterating the contents of a bucket, both the + * data pointer the name will be available for each Record. + * @param status Error code, will be set with the reason if the operation fails. + * @return This, for chaining. + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &addRecord(const UnicodeString &name, const void *data, UErrorCode &status); + + /** + * Remove all Records from the Index. The set of Buckets, which define the headings under + * which records are classified, is not altered. + * + * @param status Error code, will be set with the reason if the operation fails. + * @return This, for chaining. + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &clearRecords(UErrorCode &status); + + + /** Get the number of labels in this index. + * Note: may trigger lazy index construction. + * + * @param status Error code, will be set with the reason if the operation fails. + * @return The number of labels in this index, including any under, over or + * in-flow labels. + * @stable ICU 4.8 + */ + virtual int32_t getBucketCount(UErrorCode &status); + + + /** Get the total number of Records in this index, that is, the number + * of pairs added. + * + * @param status Error code, will be set with the reason if the operation fails. + * @return The number of records in this index, that is, the total number + * of (name, data) items added with addRecord(). + * @stable ICU 4.8 + */ + virtual int32_t getRecordCount(UErrorCode &status); + + + + /** + * Given the name of a record, return the zero-based index of the Bucket + * in which the item should appear. The name need not be in the index. + * A Record will not be added to the index by this function. + * Bucket numbers are zero-based, in Bucket iteration order. + * + * @param itemName The name whose bucket position in the index is to be determined. + * @param status Error code, will be set with the reason if the operation fails. + * @return The bucket number for this name. + * @stable ICU 4.8 + * + */ + virtual int32_t getBucketIndex(const UnicodeString &itemName, UErrorCode &status); + + + /** + * Get the zero based index of the current Bucket from an iteration + * over the Buckets of this index. Return -1 if no iteration is in process. + * @return the index of the current Bucket + * @stable ICU 4.8 + */ + virtual int32_t getBucketIndex() const; + + + /** + * Advance the iteration over the Buckets of this index. Return false if + * there are no more Buckets. + * + * @param status Error code, will be set with the reason if the operation fails. + * U_ENUM_OUT_OF_SYNC_ERROR will be reported if the index is modified while + * an enumeration of its contents are in process. + * + * @return true if success, false if at end of iteration + * @stable ICU 4.8 + */ + virtual UBool nextBucket(UErrorCode &status); + + /** + * Return the name of the Label of the current bucket from an iteration over the buckets. + * If the iteration is before the first Bucket (nextBucket() has not been called), + * or after the last, return an empty string. + * + * @return the bucket label. + * @stable ICU 4.8 + */ + virtual const UnicodeString &getBucketLabel() const; + + /** + * Return the type of the label for the current Bucket (selected by the + * iteration over Buckets.) + * + * @return the label type. + * @stable ICU 4.8 + */ + virtual UAlphabeticIndexLabelType getBucketLabelType() const; + + /** + * Get the number of Records in the current Bucket. + * If the current bucket iteration position is before the first label or after the + * last, return 0. + * + * @return the number of Records. + * @stable ICU 4.8 + */ + virtual int32_t getBucketRecordCount() const; + + + /** + * Reset the Bucket iteration for this index. The next call to nextBucket() + * will restart the iteration at the first label. + * + * @param status Error code, will be set with the reason if the operation fails. + * @return this, for chaining. + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &resetBucketIterator(UErrorCode &status); + + /** + * Advance to the next record in the current Bucket. + * When nextBucket() is called, Record iteration is reset to just before the + * first Record in the new Bucket. + * + * @param status Error code, will be set with the reason if the operation fails. + * U_ENUM_OUT_OF_SYNC_ERROR will be reported if the index is modified while + * an enumeration of its contents are in process. + * @return true if successful, false when the iteration advances past the last item. + * @stable ICU 4.8 + */ + virtual UBool nextRecord(UErrorCode &status); + + /** + * Get the name of the current Record. + * Return an empty string if the Record iteration position is before first + * or after the last. + * + * @return The name of the current index item. + * @stable ICU 4.8 + */ + virtual const UnicodeString &getRecordName() const; + + + /** + * Return the data pointer of the Record currently being iterated over. + * Return nullptr if the current iteration position before the first item in this Bucket, + * or after the last. + * + * @return The current Record's data pointer. + * @stable ICU 4.8 + */ + virtual const void *getRecordData() const; + + + /** + * Reset the Record iterator position to before the first Record in the current Bucket. + * + * @return This, for chaining. + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &resetRecordIterator(); + +private: + /** + * No Copy constructor. + * @internal (private) + */ + AlphabeticIndex(const AlphabeticIndex &other) = delete; + + /** + * No assignment. + */ + AlphabeticIndex &operator =(const AlphabeticIndex & /*other*/) { return *this;} + + /** + * No Equality operators. + * @internal (private) + */ + virtual bool operator==(const AlphabeticIndex& other) const; + + /** + * Inequality operator. + * @internal (private) + */ + virtual bool operator!=(const AlphabeticIndex& other) const; + + // Common initialization, for use from all constructors. + void init(const Locale *locale, UErrorCode &status); + + /** + * This method is called to get the index exemplars. Normally these come from the locale directly, + * but if they aren't available, we have to synthesize them. + */ + void addIndexExemplars(const Locale &locale, UErrorCode &status); + /** + * Add Chinese index characters from the tailoring. + */ + UBool addChineseIndexCharacters(UErrorCode &errorCode); + + UVector *firstStringsInScript(UErrorCode &status); + + static UnicodeString separated(const UnicodeString &item); + + /** + * Determine the best labels to use. + * This is based on the exemplars, but we also process to make sure that they are unique, + * and sort differently, and that the overall list is small enough. + */ + void initLabels(UVector &indexCharacters, UErrorCode &errorCode) const; + BucketList *createBucketList(UErrorCode &errorCode) const; + void initBuckets(UErrorCode &errorCode); + void clearBuckets(); + void internalResetBucketIterator(); + +public: + + // The Record is declared public only to allow access from + // implementation code written in plain C. + // It is not intended for public use. + +#ifndef U_HIDE_INTERNAL_API + /** + * A (name, data) pair, to be sorted by name into one of the index buckets. + * The user data is not used by the index implementation. + * \cond + * @internal + */ + struct Record: public UMemory { + const UnicodeString name_; + const void *data_; + Record(const UnicodeString &name, const void *data); + ~Record(); + }; + /** \endcond */ +#endif /* U_HIDE_INTERNAL_API */ + +private: + + /** + * Holds all user records before they are distributed into buckets. + * Type of contents is (Record *) + * @internal (private) + */ + UVector *inputList_; + + int32_t labelsIterIndex_; // Index of next item to return. + int32_t itemsIterIndex_; + Bucket *currentBucket_; // While an iteration of the index in underway, + // point to the bucket for the current label. + // nullptr when no iteration underway. + + int32_t maxLabelCount_; // Limit on # of labels permitted in the index. + + UnicodeSet *initialLabels_; // Initial (unprocessed) set of Labels. Union + // of those explicitly set by the user plus + // those from locales. Raw values, before + // crunching into bucket labels. + + UVector *firstCharsInScripts_; // The first character from each script, + // in collation order. + + RuleBasedCollator *collator_; + RuleBasedCollator *collatorPrimaryOnly_; + + // Lazy evaluated: null means that we have not built yet. + BucketList *buckets_; + + UnicodeString inflowLabel_; + UnicodeString overflowLabel_; + UnicodeString underflowLabel_; + UnicodeString overflowComparisonString_; + + UnicodeString emptyString_; +}; + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_COLLATION + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/appendable.h b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/appendable.h new file mode 100644 index 0000000000..0e37f4562a --- /dev/null +++ b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/appendable.h @@ -0,0 +1,239 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2011-2012, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: appendable.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010dec07 +* created by: Markus W. Scherer +*/ + +#ifndef __APPENDABLE_H__ +#define __APPENDABLE_H__ + +/** + * \file + * \brief C++ API: Appendable class: Sink for Unicode code points and 16-bit code units (char16_ts). + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" + +U_NAMESPACE_BEGIN + +class UnicodeString; + +/** + * Base class for objects to which Unicode characters and strings can be appended. + * Combines elements of Java Appendable and ICU4C ByteSink. + * + * This class can be used in APIs where it does not matter whether the actual destination is + * a UnicodeString, a char16_t[] array, a UnicodeSet, or any other object + * that receives and processes characters and/or strings. + * + * Implementation classes must implement at least appendCodeUnit(char16_t). + * The base class provides default implementations for the other methods. + * + * The methods do not take UErrorCode parameters. + * If an error occurs (e.g., out-of-memory), + * in addition to returning false from failing operations, + * the implementation must prevent unexpected behavior (e.g., crashes) + * from further calls and should make the error condition available separately + * (e.g., store a UErrorCode, make/keep a UnicodeString bogus). + * @stable ICU 4.8 + */ +class U_COMMON_API Appendable : public UObject { +public: + /** + * Destructor. + * @stable ICU 4.8 + */ + ~Appendable(); + + /** + * Appends a 16-bit code unit. + * @param c code unit + * @return true if the operation succeeded + * @stable ICU 4.8 + */ + virtual UBool appendCodeUnit(char16_t c) = 0; + + /** + * Appends a code point. + * The default implementation calls appendCodeUnit(char16_t) once or twice. + * @param c code point 0..0x10ffff + * @return true if the operation succeeded + * @stable ICU 4.8 + */ + virtual UBool appendCodePoint(UChar32 c); + + /** + * Appends a string. + * The default implementation calls appendCodeUnit(char16_t) for each code unit. + * @param s string, must not be nullptr if length!=0 + * @param length string length, or -1 if NUL-terminated + * @return true if the operation succeeded + * @stable ICU 4.8 + */ + virtual UBool appendString(const char16_t *s, int32_t length); + + /** + * Tells the object that the caller is going to append roughly + * appendCapacity char16_ts. A subclass might use this to pre-allocate + * a larger buffer if necessary. + * The default implementation does nothing. (It always returns true.) + * @param appendCapacity estimated number of char16_ts that will be appended + * @return true if the operation succeeded + * @stable ICU 4.8 + */ + virtual UBool reserveAppendCapacity(int32_t appendCapacity); + + /** + * Returns a writable buffer for appending and writes the buffer's capacity to + * *resultCapacity. Guarantees *resultCapacity>=minCapacity. + * May return a pointer to the caller-owned scratch buffer which must have + * scratchCapacity>=minCapacity. + * The returned buffer is only valid until the next operation + * on this Appendable. + * + * After writing at most *resultCapacity char16_ts, call appendString() with the + * pointer returned from this function and the number of char16_ts written. + * Many appendString() implementations will avoid copying char16_ts if this function + * returned an internal buffer. + * + * Partial usage example: + * \code + * int32_t capacity; + * char16_t* buffer = app.getAppendBuffer(..., &capacity); + * ... Write n char16_ts into buffer, with n <= capacity. + * app.appendString(buffer, n); + * \endcode + * In many implementations, that call to append will avoid copying char16_ts. + * + * If the Appendable allocates or reallocates an internal buffer, it should use + * the desiredCapacityHint if appropriate. + * If a caller cannot provide a reasonable guess at the desired capacity, + * it should pass desiredCapacityHint=0. + * + * If a non-scratch buffer is returned, the caller may only pass + * a prefix to it to appendString(). + * That is, it is not correct to pass an interior pointer to appendString(). + * + * The default implementation always returns the scratch buffer. + * + * @param minCapacity required minimum capacity of the returned buffer; + * must be non-negative + * @param desiredCapacityHint desired capacity of the returned buffer; + * must be non-negative + * @param scratch default caller-owned buffer + * @param scratchCapacity capacity of the scratch buffer + * @param resultCapacity pointer to an integer which will be set to the + * capacity of the returned buffer + * @return a buffer with *resultCapacity>=minCapacity + * @stable ICU 4.8 + */ + virtual char16_t *getAppendBuffer(int32_t minCapacity, + int32_t desiredCapacityHint, + char16_t *scratch, int32_t scratchCapacity, + int32_t *resultCapacity); +}; + +/** + * An Appendable implementation which writes to a UnicodeString. + * + * This class is not intended for public subclassing. + * @stable ICU 4.8 + */ +class U_COMMON_API UnicodeStringAppendable : public Appendable { +public: + /** + * Aliases the UnicodeString (keeps its reference) for writing. + * @param s The UnicodeString to which this Appendable will write. + * @stable ICU 4.8 + */ + explicit UnicodeStringAppendable(UnicodeString &s) : str(s) {} + + /** + * Destructor. + * @stable ICU 4.8 + */ + ~UnicodeStringAppendable(); + + /** + * Appends a 16-bit code unit to the string. + * @param c code unit + * @return true if the operation succeeded + * @stable ICU 4.8 + */ + virtual UBool appendCodeUnit(char16_t c) override; + + /** + * Appends a code point to the string. + * @param c code point 0..0x10ffff + * @return true if the operation succeeded + * @stable ICU 4.8 + */ + virtual UBool appendCodePoint(UChar32 c) override; + + /** + * Appends a string to the UnicodeString. + * @param s string, must not be nullptr if length!=0 + * @param length string length, or -1 if NUL-terminated + * @return true if the operation succeeded + * @stable ICU 4.8 + */ + virtual UBool appendString(const char16_t *s, int32_t length) override; + + /** + * Tells the UnicodeString that the caller is going to append roughly + * appendCapacity char16_ts. + * @param appendCapacity estimated number of char16_ts that will be appended + * @return true if the operation succeeded + * @stable ICU 4.8 + */ + virtual UBool reserveAppendCapacity(int32_t appendCapacity) override; + + /** + * Returns a writable buffer for appending and writes the buffer's capacity to + * *resultCapacity. Guarantees *resultCapacity>=minCapacity. + * May return a pointer to the caller-owned scratch buffer which must have + * scratchCapacity>=minCapacity. + * The returned buffer is only valid until the next write operation + * on the UnicodeString. + * + * For details see Appendable::getAppendBuffer(). + * + * @param minCapacity required minimum capacity of the returned buffer; + * must be non-negative + * @param desiredCapacityHint desired capacity of the returned buffer; + * must be non-negative + * @param scratch default caller-owned buffer + * @param scratchCapacity capacity of the scratch buffer + * @param resultCapacity pointer to an integer which will be set to the + * capacity of the returned buffer + * @return a buffer with *resultCapacity>=minCapacity + * @stable ICU 4.8 + */ + virtual char16_t *getAppendBuffer(int32_t minCapacity, + int32_t desiredCapacityHint, + char16_t *scratch, int32_t scratchCapacity, + int32_t *resultCapacity) override; + +private: + UnicodeString &str; +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __APPENDABLE_H__ diff --git a/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/basictz.h b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/basictz.h new file mode 100644 index 0000000000..4f8e4cacb1 --- /dev/null +++ b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/basictz.h @@ -0,0 +1,248 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2007-2013, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +*/ +#ifndef BASICTZ_H +#define BASICTZ_H + +/** + * \file + * \brief C++ API: ICU TimeZone base class + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/timezone.h" +#include "unicode/tzrule.h" +#include "unicode/tztrans.h" + +U_NAMESPACE_BEGIN + +// forward declarations +class UVector; + +/** + * BasicTimeZone is an abstract class extending TimeZone. + * This class provides some additional methods to access time zone transitions and rules. + * All ICU TimeZone concrete subclasses extend this class. + * @stable ICU 3.8 + */ +class U_I18N_API BasicTimeZone: public TimeZone { +public: + /** + * Destructor. + * @stable ICU 3.8 + */ + virtual ~BasicTimeZone(); + + /** + * Clones this object polymorphically. + * The caller owns the result and should delete it when done. + * @return clone, or nullptr if an error occurred + * @stable ICU 3.8 + */ + virtual BasicTimeZone* clone() const override = 0; + + /** + * Gets the first time zone transition after the base time. + * @param base The base time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives the first transition after the base time. + * @return true if the transition is found. + * @stable ICU 3.8 + */ + virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const = 0; + + /** + * Gets the most recent time zone transition before the base time. + * @param base The base time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives the most recent transition before the base time. + * @return true if the transition is found. + * @stable ICU 3.8 + */ + virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const = 0; + + /** + * Checks if the time zone has equivalent transitions in the time range. + * This method returns true when all of transition times, from/to standard + * offsets and DST savings used by this time zone match the other in the + * time range. + * @param tz The BasicTimeZone object to be compared with. + * @param start The start time of the evaluated time range (inclusive) + * @param end The end time of the evaluated time range (inclusive) + * @param ignoreDstAmount + * When true, any transitions with only daylight saving amount + * changes will be ignored, except either of them is zero. + * For example, a transition from rawoffset 3:00/dstsavings 1:00 + * to rawoffset 2:00/dstsavings 2:00 is excluded from the comparison, + * but a transition from rawoffset 2:00/dstsavings 1:00 to + * rawoffset 3:00/dstsavings 0:00 is included. + * @param ec Output param to filled in with a success or an error. + * @return true if the other time zone has the equivalent transitions in the + * time range. + * @stable ICU 3.8 + */ + virtual UBool hasEquivalentTransitions(const BasicTimeZone& tz, UDate start, UDate end, + UBool ignoreDstAmount, UErrorCode& ec) const; + + /** + * Returns the number of TimeZoneRules which represents time transitions, + * for this time zone, that is, all TimeZoneRules for this time zone except + * InitialTimeZoneRule. The return value range is 0 or any positive value. + * @param status Receives error status code. + * @return The number of TimeZoneRules representing time transitions. + * @stable ICU 3.8 + */ + virtual int32_t countTransitionRules(UErrorCode& status) const = 0; + + /** + * Gets the InitialTimeZoneRule and the set of TimeZoneRule + * which represent time transitions for this time zone. On successful return, + * the argument initial points to non-nullptr InitialTimeZoneRule and + * the array trsrules is filled with 0 or multiple TimeZoneRule + * instances up to the size specified by trscount. The results are referencing the + * rule instance held by this time zone instance. Therefore, after this time zone + * is destructed, they are no longer available. + * @param initial Receives the initial timezone rule + * @param trsrules Receives the timezone transition rules + * @param trscount On input, specify the size of the array 'transitions' receiving + * the timezone transition rules. On output, actual number of + * rules filled in the array will be set. + * @param status Receives error status code. + * @stable ICU 3.8 + */ + virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial, + const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) const = 0; + + /** + * Gets the set of time zone rules valid at the specified time. Some known external time zone + * implementations are not capable to handle historic time zone rule changes. Also some + * implementations can only handle certain type of rule definitions. + * If this time zone does not use any daylight saving time within about 1 year from the specified + * time, only the InitialTimeZone is returned. Otherwise, the rule for standard + * time and daylight saving time transitions are returned in addition to the + * InitialTimeZoneRule. The standard and daylight saving time transition rules are + * represented by AnnualTimeZoneRule with DateTimeRule::DOW for its date + * rule and DateTimeRule::WALL_TIME for its time rule. Because daylight saving time + * rule is changing time to time in many time zones and also mapping a transition time rule to + * different type is lossy transformation, the set of rules returned by this method may be valid + * for short period of time. + * The time zone rule objects returned by this method is owned by the caller, so the caller is + * responsible for deleting them after use. + * @param date The date used for extracting time zone rules. + * @param initial Receives the InitialTimeZone, always not nullptr. + * @param std Receives the AnnualTimeZoneRule for standard time transitions. + * When this time time zone does not observe daylight saving times around the + * specified date, nullptr is set. + * @param dst Receives the AnnualTimeZoneRule for daylight saving time + * transitions. When this time zone does not observer daylight saving times + * around the specified date, nullptr is set. + * @param status Receives error status code. + * @stable ICU 3.8 + */ + virtual void getSimpleRulesNear(UDate date, InitialTimeZoneRule*& initial, + AnnualTimeZoneRule*& std, AnnualTimeZoneRule*& dst, UErrorCode& status) const; + + /** + * Get time zone offsets from local wall time. + * @stable ICU 69 + */ + virtual void getOffsetFromLocal( + UDate date, UTimeZoneLocalOption nonExistingTimeOpt, + UTimeZoneLocalOption duplicatedTimeOpt, int32_t& rawOffset, + int32_t& dstOffset, UErrorCode& status) const; + + +#ifndef U_HIDE_INTERNAL_API + /** + * The time type option bit flags used by getOffsetFromLocal + * @internal + */ + enum { + kStandard = 0x01, + kDaylight = 0x03, + kFormer = 0x04, /* UCAL_TZ_LOCAL_FORMER */ + kLatter = 0x0C /* UCAL_TZ_LOCAL_LATTER */ + }; + + /** + * Get time zone offsets from local wall time. + * @internal + */ + void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, + int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const; +#endif /* U_HIDE_INTERNAL_API */ + +protected: + +#ifndef U_HIDE_INTERNAL_API + /** + * A time type option bit mask used by getOffsetFromLocal. + * @internal + */ + static constexpr int32_t kStdDstMask = kDaylight; + /** + * A time type option bit mask used by getOffsetFromLocal. + * @internal + */ + static constexpr int32_t kFormerLatterMask = kLatter; +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Default constructor. + * @stable ICU 3.8 + */ + BasicTimeZone(); + + /** + * Construct a timezone with a given ID. + * @param id a system time zone ID + * @stable ICU 3.8 + */ + BasicTimeZone(const UnicodeString &id); + + /** + * Copy constructor. + * @param source the object to be copied. + * @stable ICU 3.8 + */ + BasicTimeZone(const BasicTimeZone& source); + + /** + * Copy assignment. + * @stable ICU 3.8 + */ + BasicTimeZone& operator=(const BasicTimeZone&) = default; + + /** + * Gets the set of TimeZoneRule instances applicable to the specified time and after. + * @param start The start date used for extracting time zone rules + * @param initial Output parameter, receives the InitialTimeZone. + * Always not nullptr (except in case of error) + * @param transitionRules Output parameter, a UVector of transition rules. + * May be nullptr, if there are no transition rules. + * The caller owns the returned vector; the UVector owns the rules. + * @param status Receives error status code + */ + void getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, UVector*& transitionRules, + UErrorCode& status) const; +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // BASICTZ_H + +//eof diff --git a/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/brkiter.h b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/brkiter.h new file mode 100644 index 0000000000..d953925bd7 --- /dev/null +++ b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/brkiter.h @@ -0,0 +1,672 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File brkiter.h +* +* Modification History: +* +* Date Name Description +* 02/18/97 aliu Added typedef for TextCount. Made DONE const. +* 05/07/97 aliu Fixed DLL declaration. +* 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK +* 08/11/98 helena Sync-up JDK1.2. +* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. +******************************************************************************** +*/ + +#ifndef BRKITER_H +#define BRKITER_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Break Iterator. + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#if UCONFIG_NO_BREAK_ITERATION + +U_NAMESPACE_BEGIN + +/* + * Allow the declaration of APIs with pointers to BreakIterator + * even when break iteration is removed from the build. + */ +class BreakIterator; + +U_NAMESPACE_END + +#else + +#include "unicode/uobject.h" +#include "unicode/unistr.h" +#include "unicode/chariter.h" +#include "unicode/locid.h" +#include "unicode/ubrk.h" +#include "unicode/strenum.h" +#include "unicode/utext.h" +#include "unicode/umisc.h" + +U_NAMESPACE_BEGIN + +class CharString; + +/** + * The BreakIterator class implements methods for finding the location + * of boundaries in text. BreakIterator is an abstract base class. + * Instances of BreakIterator maintain a current position and scan over + * text returning the index of characters where boundaries occur. + *

+ * Line boundary analysis determines where a text string can be broken + * when line-wrapping. The mechanism correctly handles punctuation and + * hyphenated words. + *

+ * Sentence boundary analysis allows selection with correct + * interpretation of periods within numbers and abbreviations, and + * trailing punctuation marks such as quotation marks and parentheses. + *

+ * Word boundary analysis is used by search and replace functions, as + * well as within text editing applications that allow the user to + * select words with a double click. Word selection provides correct + * interpretation of punctuation marks within and following + * words. Characters that are not part of a word, such as symbols or + * punctuation marks, have word-breaks on both sides. + *

+ * Character boundary analysis allows users to interact with + * characters as they expect to, for example, when moving the cursor + * through a text string. Character boundary analysis provides correct + * navigation of through character strings, regardless of how the + * character is stored. For example, an accented character might be + * stored as a base character and a diacritical mark. What users + * consider to be a character can differ between languages. + *

+ * The text boundary positions are found according to the rules + * described in Unicode Standard Annex #29, Text Boundaries, and + * Unicode Standard Annex #14, Line Breaking Properties. These + * are available at http://www.unicode.org/reports/tr14/ and + * http://www.unicode.org/reports/tr29/. + *

+ * In addition to the C++ API defined in this header file, a + * plain C API with equivalent functionality is defined in the + * file ubrk.h + *

+ * Code snippets illustrating the use of the Break Iterator APIs + * are available in the ICU User Guide, + * https://unicode-org.github.io/icu/userguide/boundaryanalysis/ + * and in the sample program icu/source/samples/break/break.cpp + * + */ +class U_COMMON_API BreakIterator : public UObject { +public: + /** + * destructor + * @stable ICU 2.0 + */ + virtual ~BreakIterator(); + + /** + * Return true if another object is semantically equal to this + * one. The other object should be an instance of the same subclass of + * BreakIterator. Objects of different subclasses are considered + * unequal. + *

+ * Return true if this BreakIterator is at the same position in the + * same text, and is the same class and type (word, line, etc.) of + * BreakIterator, as the argument. Text is considered the same if + * it contains the same characters, it need not be the same + * object, and styles are not considered. + * @stable ICU 2.0 + */ + virtual bool operator==(const BreakIterator&) const = 0; + + /** + * Returns the complement of the result of operator== + * @param rhs The BreakIterator to be compared for inequality + * @return the complement of the result of operator== + * @stable ICU 2.0 + */ + bool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); } + + /** + * Return a polymorphic copy of this object. This is an abstract + * method which subclasses implement. + * @stable ICU 2.0 + */ + virtual BreakIterator* clone() const = 0; + + /** + * Return a polymorphic class ID for this object. Different subclasses + * will return distinct unequal values. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID() const override = 0; + + /** + * Return a CharacterIterator over the text being analyzed. + * @stable ICU 2.0 + */ + virtual CharacterIterator& getText() const = 0; + + /** + * Get a UText for the text being analyzed. + * The returned UText is a shallow clone of the UText used internally + * by the break iterator implementation. It can safely be used to + * access the text without impacting any break iterator operations, + * but the underlying text itself must not be altered. + * + * @param fillIn A UText to be filled in. If nullptr, a new UText will be + * allocated to hold the result. + * @param status receives any error codes. + * @return The current UText for this break iterator. If an input + * UText was provided, it will always be returned. + * @stable ICU 3.4 + */ + virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0; + + /** + * Change the text over which this operates. The text boundary is + * reset to the start. + * + * The BreakIterator will retain a reference to the supplied string. + * The caller must not modify or delete the text while the BreakIterator + * retains the reference. + * + * @param text The UnicodeString used to change the text. + * @stable ICU 2.0 + */ + virtual void setText(const UnicodeString &text) = 0; + + /** + * Reset the break iterator to operate over the text represented by + * the UText. The iterator position is reset to the start. + * + * This function makes a shallow clone of the supplied UText. This means + * that the caller is free to immediately close or otherwise reuse the + * Utext that was passed as a parameter, but that the underlying text itself + * must not be altered while being referenced by the break iterator. + * + * All index positions returned by break iterator functions are + * native indices from the UText. For example, when breaking UTF-8 + * encoded text, the break positions returned by next(), previous(), etc. + * will be UTF-8 string indices, not UTF-16 positions. + * + * @param text The UText used to change the text. + * @param status receives any error codes. + * @stable ICU 3.4 + */ + virtual void setText(UText *text, UErrorCode &status) = 0; + + /** + * Change the text over which this operates. The text boundary is + * reset to the start. + * Note that setText(UText *) provides similar functionality to this function, + * and is more efficient. + * @param it The CharacterIterator used to change the text. + * @stable ICU 2.0 + */ + virtual void adoptText(CharacterIterator* it) = 0; + + enum { + /** + * DONE is returned by previous() and next() after all valid + * boundaries have been returned. + * @stable ICU 2.0 + */ + DONE = static_cast(-1) + }; + + /** + * Sets the current iteration position to the beginning of the text, position zero. + * @return The offset of the beginning of the text, zero. + * @stable ICU 2.0 + */ + virtual int32_t first() = 0; + + /** + * Set the iterator position to the index immediately BEYOND the last character in the text being scanned. + * @return The index immediately BEYOND the last character in the text being scanned. + * @stable ICU 2.0 + */ + virtual int32_t last() = 0; + + /** + * Set the iterator position to the boundary preceding the current boundary. + * @return The character index of the previous text boundary or DONE if all + * boundaries have been returned. + * @stable ICU 2.0 + */ + virtual int32_t previous() = 0; + + /** + * Advance the iterator to the boundary following the current boundary. + * @return The character index of the next text boundary or DONE if all + * boundaries have been returned. + * @stable ICU 2.0 + */ + virtual int32_t next() = 0; + + /** + * Return character index of the current iterator position within the text. + * @return The boundary most recently returned. + * @stable ICU 2.0 + */ + virtual int32_t current() const = 0; + + /** + * Advance the iterator to the first boundary following the specified offset. + * The value returned is always greater than the offset or + * the value BreakIterator.DONE + * @param offset the offset to begin scanning. + * @return The first boundary after the specified offset. + * @stable ICU 2.0 + */ + virtual int32_t following(int32_t offset) = 0; + + /** + * Set the iterator position to the first boundary preceding the specified offset. + * The value returned is always smaller than the offset or + * the value BreakIterator.DONE + * @param offset the offset to begin scanning. + * @return The first boundary before the specified offset. + * @stable ICU 2.0 + */ + virtual int32_t preceding(int32_t offset) = 0; + + /** + * Return true if the specified position is a boundary position. + * As a side effect, the current position of the iterator is set + * to the first boundary position at or following the specified offset. + * @param offset the offset to check. + * @return True if "offset" is a boundary position. + * @stable ICU 2.0 + */ + virtual UBool isBoundary(int32_t offset) = 0; + + /** + * Set the iterator position to the nth boundary from the current boundary + * @param n the number of boundaries to move by. A value of 0 + * does nothing. Negative values move to previous boundaries + * and positive values move to later boundaries. + * @return The new iterator position, or + * DONE if there are fewer than |n| boundaries in the specified direction. + * @stable ICU 2.0 + */ + virtual int32_t next(int32_t n) = 0; + + /** + * For RuleBasedBreakIterators, return the status tag from the break rule + * that determined the boundary at the current iteration position. + *

+ * For break iterator types that do not support a rule status, + * a default value of 0 is returned. + *

+ * @return the status from the break rule that determined the boundary at + * the current iteration position. + * @see RuleBaseBreakIterator::getRuleStatus() + * @see UWordBreak + * @stable ICU 52 + */ + virtual int32_t getRuleStatus() const; + + /** + * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) + * that determined the boundary at the current iteration position. + *

+ * For break iterator types that do not support rule status, + * no values are returned. + *

+ * The returned status value(s) are stored into an array provided by the caller. + * The values are stored in sorted (ascending) order. + * If the capacity of the output array is insufficient to hold the data, + * the output will be truncated to the available length, and a + * U_BUFFER_OVERFLOW_ERROR will be signaled. + *

+ * @see RuleBaseBreakIterator::getRuleStatusVec + * + * @param fillInVec an array to be filled in with the status values. + * @param capacity the length of the supplied vector. A length of zero causes + * the function to return the number of status values, in the + * normal way, without attempting to store any values. + * @param status receives error codes. + * @return The number of rule status values from rules that determined + * the boundary at the current iteration position. + * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value + * is the total number of status values that were available, + * not the reduced number that were actually returned. + * @see getRuleStatus + * @stable ICU 52 + */ + virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status); + + /** + * Create BreakIterator for word-breaks using the given locale. + * Returns an instance of a BreakIterator implementing word breaks. + * WordBreak is useful for word selection (ex. double click) + * @param where the locale. + * @param status the error code + * @return A BreakIterator for word-breaks. The UErrorCode& status + * parameter is used to return status information to the user. + * To check whether the construction succeeded or not, you should check + * the value of U_SUCCESS(err). If you wish more detailed information, you + * can check for informational error results which still indicate success. + * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For + * example, 'de_CH' was requested, but nothing was found there, so 'de' was + * used. U_USING_DEFAULT_WARNING indicates that the default locale data was + * used; neither the requested locale nor any of its fall back locales + * could be found. + * The caller owns the returned object and is responsible for deleting it. + * @stable ICU 2.0 + */ + static BreakIterator* U_EXPORT2 + createWordInstance(const Locale& where, UErrorCode& status); + + /** + * Create BreakIterator for line-breaks using specified locale. + * Returns an instance of a BreakIterator implementing line breaks. Line + * breaks are logically possible line breaks, actual line breaks are + * usually determined based on display width. + * LineBreak is useful for word wrapping text. + * @param where the locale. + * @param status The error code. + * @return A BreakIterator for line-breaks. The UErrorCode& status + * parameter is used to return status information to the user. + * To check whether the construction succeeded or not, you should check + * the value of U_SUCCESS(err). If you wish more detailed information, you + * can check for informational error results which still indicate success. + * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For + * example, 'de_CH' was requested, but nothing was found there, so 'de' was + * used. U_USING_DEFAULT_WARNING indicates that the default locale data was + * used; neither the requested locale nor any of its fall back locales + * could be found. + * The caller owns the returned object and is responsible for deleting it. + * @stable ICU 2.0 + */ + static BreakIterator* U_EXPORT2 + createLineInstance(const Locale& where, UErrorCode& status); + + /** + * Create BreakIterator for character-breaks using specified locale + * Returns an instance of a BreakIterator implementing character breaks. + * Character breaks are boundaries of combining character sequences. + * @param where the locale. + * @param status The error code. + * @return A BreakIterator for character-breaks. The UErrorCode& status + * parameter is used to return status information to the user. + * To check whether the construction succeeded or not, you should check + * the value of U_SUCCESS(err). If you wish more detailed information, you + * can check for informational error results which still indicate success. + * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For + * example, 'de_CH' was requested, but nothing was found there, so 'de' was + * used. U_USING_DEFAULT_WARNING indicates that the default locale data was + * used; neither the requested locale nor any of its fall back locales + * could be found. + * The caller owns the returned object and is responsible for deleting it. + * @stable ICU 2.0 + */ + static BreakIterator* U_EXPORT2 + createCharacterInstance(const Locale& where, UErrorCode& status); + + /** + * Create BreakIterator for sentence-breaks using specified locale + * Returns an instance of a BreakIterator implementing sentence breaks. + * @param where the locale. + * @param status The error code. + * @return A BreakIterator for sentence-breaks. The UErrorCode& status + * parameter is used to return status information to the user. + * To check whether the construction succeeded or not, you should check + * the value of U_SUCCESS(err). If you wish more detailed information, you + * can check for informational error results which still indicate success. + * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For + * example, 'de_CH' was requested, but nothing was found there, so 'de' was + * used. U_USING_DEFAULT_WARNING indicates that the default locale data was + * used; neither the requested locale nor any of its fall back locales + * could be found. + * The caller owns the returned object and is responsible for deleting it. + * @stable ICU 2.0 + */ + static BreakIterator* U_EXPORT2 + createSentenceInstance(const Locale& where, UErrorCode& status); + +#ifndef U_HIDE_DEPRECATED_API + /** + * Create BreakIterator for title-casing breaks using the specified locale + * Returns an instance of a BreakIterator implementing title breaks. + * The iterator returned locates title boundaries as described for + * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, + * please use a word boundary iterator. See {@link #createWordInstance }. + * + * @param where the locale. + * @param status The error code. + * @return A BreakIterator for title-breaks. The UErrorCode& status + * parameter is used to return status information to the user. + * To check whether the construction succeeded or not, you should check + * the value of U_SUCCESS(err). If you wish more detailed information, you + * can check for informational error results which still indicate success. + * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For + * example, 'de_CH' was requested, but nothing was found there, so 'de' was + * used. U_USING_DEFAULT_WARNING indicates that the default locale data was + * used; neither the requested locale nor any of its fall back locales + * could be found. + * The caller owns the returned object and is responsible for deleting it. + * @deprecated ICU 64 Use createWordInstance instead. + */ + static BreakIterator* U_EXPORT2 + createTitleInstance(const Locale& where, UErrorCode& status); +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Get the set of Locales for which TextBoundaries are installed. + *

Note: this will not return locales added through the register + * call. To see the registered locales too, use the getAvailableLocales + * function that returns a StringEnumeration object

+ * @param count the output parameter of number of elements in the locale list + * @return available locales + * @stable ICU 2.0 + */ + static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); + + /** + * Get name of the object for the desired Locale, in the desired language. + * @param objectLocale must be from getAvailableLocales. + * @param displayLocale specifies the desired locale for output. + * @param name the fill-in parameter of the return value + * Uses best match. + * @return user-displayable name + * @stable ICU 2.0 + */ + static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, + const Locale& displayLocale, + UnicodeString& name); + + /** + * Get name of the object for the desired Locale, in the language of the + * default locale. + * @param objectLocale must be from getMatchingLocales + * @param name the fill-in parameter of the return value + * @return user-displayable name + * @stable ICU 2.0 + */ + static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, + UnicodeString& name); + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * Deprecated functionality. Use clone() instead. + * + * Thread safe client-buffer-based cloning operation + * Do NOT call delete on a safeclone, since 'new' is not used to create it. + * @param stackBuffer user allocated space for the new clone. If nullptr new memory will be allocated. + * If buffer is not large enough, new memory will be allocated. + * @param BufferSize reference to size of allocated space. + * If BufferSize == 0, a sufficient size for use in cloning will + * be returned ('pre-flighting') + * If BufferSize is not enough for a stack-based safe clone, + * new memory will be allocated. + * @param status to indicate whether the operation went on smoothly or there were errors + * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were + * necessary. + * @return pointer to the new clone + * + * @deprecated ICU 52. Use clone() instead. + */ + virtual BreakIterator * createBufferClone(void *stackBuffer, + int32_t &BufferSize, + UErrorCode &status) = 0; +#endif // U_FORCE_HIDE_DEPRECATED_API + +#ifndef U_HIDE_DEPRECATED_API + + /** + * Determine whether the BreakIterator was created in user memory by + * createBufferClone(), and thus should not be deleted. Such objects + * must be closed by an explicit call to the destructor (not delete). + * @deprecated ICU 52. Always delete the BreakIterator. + */ + inline UBool isBufferClone(); + +#endif /* U_HIDE_DEPRECATED_API */ + +#if !UCONFIG_NO_SERVICE + /** + * Register a new break iterator of the indicated kind, to use in the given locale. + * The break iterator will be adopted. Clones of the iterator will be returned + * if a request for a break iterator of the given kind matches or falls back to + * this locale. + * Because ICU may choose to cache BreakIterators internally, this must + * be called at application startup, prior to any calls to + * BreakIterator::createXXXInstance to avoid undefined behavior. + * @param toAdopt the BreakIterator instance to be adopted + * @param locale the Locale for which this instance is to be registered + * @param kind the type of iterator for which this instance is to be registered + * @param status the in/out status code, no special meanings are assigned + * @return a registry key that can be used to unregister this instance + * @stable ICU 2.4 + */ + static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt, + const Locale& locale, + UBreakIteratorType kind, + UErrorCode& status); + + /** + * Unregister a previously-registered BreakIterator using the key returned from the + * register call. Key becomes invalid after a successful call and should not be used again. + * The BreakIterator corresponding to the key will be deleted. + * Because ICU may choose to cache BreakIterators internally, this should + * be called during application shutdown, after all calls to + * BreakIterator::createXXXInstance to avoid undefined behavior. + * @param key the registry key returned by a previous call to registerInstance + * @param status the in/out status code, no special meanings are assigned + * @return true if the iterator for the key was successfully unregistered + * @stable ICU 2.4 + */ + static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status); + + /** + * Return a StringEnumeration over the locales available at the time of the call, + * including registered locales. + * @return a StringEnumeration over the locales available at the time of the call + * @stable ICU 2.4 + */ + static StringEnumeration* U_EXPORT2 getAvailableLocales(); +#endif + + /** + * Returns the locale for this break iterator. Two flavors are available: valid and + * actual locale. + * @stable ICU 2.8 + */ + Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; + +#ifndef U_HIDE_INTERNAL_API + /** Get the locale for this break iterator object. You can choose between valid and actual locale. + * @param type type of the locale we're looking for (valid or actual) + * @param status error code for the operation + * @return the locale + * @internal + */ + const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const; +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Set the subject text string upon which the break iterator is operating + * without changing any other aspect of the matching state. + * The new and previous text strings must have the same content. + * + * This function is intended for use in environments where ICU is operating on + * strings that may move around in memory. It provides a mechanism for notifying + * ICU that the string has been relocated, and providing a new UText to access the + * string in its new position. + * + * Note that the break iterator implementation never copies the underlying text + * of a string being processed, but always operates directly on the original text + * provided by the user. Refreshing simply drops the references to the old text + * and replaces them with references to the new. + * + * Caution: this function is normally used only by very specialized, + * system-level code. One example use case is with garbage collection that moves + * the text in memory. + * + * @param input The new (moved) text string. + * @param status Receives errors detected by this function. + * @return *this + * + * @stable ICU 49 + */ + virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0; + + private: + static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status); + static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status); + static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status); + + friend class ICUBreakIteratorFactory; + friend class ICUBreakIteratorService; + +protected: + // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API + // or else the compiler will create a public ones. + /** @internal */ + BreakIterator(); + /** @internal */ + BreakIterator (const BreakIterator &other); +#ifndef U_HIDE_INTERNAL_API + /** @internal */ + BreakIterator (const Locale& valid, const Locale &actual); + /** @internal. Assignment Operator, used by RuleBasedBreakIterator. */ + BreakIterator &operator = (const BreakIterator &other); +#endif /* U_HIDE_INTERNAL_API */ + +private: + + /** @internal (private) */ + CharString* actualLocale = nullptr; + CharString* validLocale = nullptr; + CharString* requestLocale = nullptr; +}; + +#ifndef U_HIDE_DEPRECATED_API + +inline UBool BreakIterator::isBufferClone() +{ + return false; +} + +#endif /* U_HIDE_DEPRECATED_API */ + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // BRKITER_H +//eof diff --git a/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/bytestream.h b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/bytestream.h new file mode 100644 index 0000000000..bea41461bc --- /dev/null +++ b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/bytestream.h @@ -0,0 +1,307 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// Copyright (C) 2009-2012, International Business Machines +// Corporation and others. All Rights Reserved. +// +// Copyright 2007 Google Inc. All Rights Reserved. +// Author: sanjay@google.com (Sanjay Ghemawat) +// +// Abstract interface that consumes a sequence of bytes (ByteSink). +// +// Used so that we can write a single piece of code that can operate +// on a variety of output string types. +// +// Various implementations of this interface are provided: +// ByteSink: +// CheckedArrayByteSink Write to a flat array, with bounds checking +// StringByteSink Write to an STL string + +// This code is a contribution of Google code, and the style used here is +// a compromise between the original Google code and the ICU coding guidelines. +// For example, data types are ICU-ified (size_t,int->int32_t), +// and API comments doxygen-ified, but function names and behavior are +// as in the original, if possible. +// Assertion-style error handling, not available in ICU, was changed to +// parameter "pinning" similar to UnicodeString. +// +// In addition, this is only a partial port of the original Google code, +// limited to what was needed so far. The (nearly) complete original code +// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib +// (see ICU ticket 6765, r25517). + +#ifndef __BYTESTREAM_H__ +#define __BYTESTREAM_H__ + +/** + * \file + * \brief C++ API: Interface for writing bytes, and implementation classes. + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" +#include "unicode/std_string.h" + +U_NAMESPACE_BEGIN + +/** + * A ByteSink can be filled with bytes. + * @stable ICU 4.2 + */ +class U_COMMON_API ByteSink : public UMemory { +public: + /** + * Default constructor. + * @stable ICU 4.2 + */ + ByteSink() { } + /** + * Virtual destructor. + * @stable ICU 4.2 + */ + virtual ~ByteSink(); + + /** + * Append "bytes[0,n-1]" to this. + * @param bytes the pointer to the bytes + * @param n the number of bytes; must be non-negative + * @stable ICU 4.2 + */ + virtual void Append(const char* bytes, int32_t n) = 0; + + /** + * Appends n bytes to this. Same as Append(). + * Call AppendU8() with u8"string literals" which are const char * in C++11 + * but const char8_t * in C++20. + * If the compiler does support char8_t as a distinct type, + * then an AppendU8() overload for that is defined and will be chosen. + * + * @param bytes the pointer to the bytes + * @param n the number of bytes; must be non-negative + * @stable ICU 67 + */ + inline void AppendU8(const char* bytes, int32_t n) { + Append(bytes, n); + } + +#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) + /** + * Appends n bytes to this. Same as Append() but for a const char8_t * pointer. + * Call AppendU8() with u8"string literals" which are const char * in C++11 + * but const char8_t * in C++20. + * If the compiler does support char8_t as a distinct type, + * then this AppendU8() overload for that is defined and will be chosen. + * + * @param bytes the pointer to the bytes + * @param n the number of bytes; must be non-negative + * @stable ICU 67 + */ + inline void AppendU8(const char8_t* bytes, int32_t n) { + Append(reinterpret_cast(bytes), n); + } +#endif + + /** + * Returns a writable buffer for appending and writes the buffer's capacity to + * *result_capacity. Guarantees *result_capacity>=min_capacity. + * May return a pointer to the caller-owned scratch buffer which must have + * scratch_capacity>=min_capacity. + * The returned buffer is only valid until the next operation + * on this ByteSink. + * + * After writing at most *result_capacity bytes, call Append() with the + * pointer returned from this function and the number of bytes written. + * Many Append() implementations will avoid copying bytes if this function + * returned an internal buffer. + * + * Partial usage example: + * int32_t capacity; + * char* buffer = sink->GetAppendBuffer(..., &capacity); + * ... Write n bytes into buffer, with n <= capacity. + * sink->Append(buffer, n); + * In many implementations, that call to Append will avoid copying bytes. + * + * If the ByteSink allocates or reallocates an internal buffer, it should use + * the desired_capacity_hint if appropriate. + * If a caller cannot provide a reasonable guess at the desired capacity, + * it should pass desired_capacity_hint=0. + * + * If a non-scratch buffer is returned, the caller may only pass + * a prefix to it to Append(). + * That is, it is not correct to pass an interior pointer to Append(). + * + * The default implementation always returns the scratch buffer. + * + * @param min_capacity required minimum capacity of the returned buffer; + * must be non-negative + * @param desired_capacity_hint desired capacity of the returned buffer; + * must be non-negative + * @param scratch default caller-owned buffer + * @param scratch_capacity capacity of the scratch buffer + * @param result_capacity pointer to an integer which will be set to the + * capacity of the returned buffer + * @return a buffer with *result_capacity>=min_capacity + * @stable ICU 4.2 + */ + virtual char* GetAppendBuffer(int32_t min_capacity, + int32_t desired_capacity_hint, + char* scratch, int32_t scratch_capacity, + int32_t* result_capacity); + + /** + * Flush internal buffers. + * Some byte sinks use internal buffers or provide buffering + * and require calling Flush() at the end of the stream. + * The ByteSink should be ready for further Append() calls after Flush(). + * The default implementation of Flush() does nothing. + * @stable ICU 4.2 + */ + virtual void Flush(); + +private: + ByteSink(const ByteSink &) = delete; + ByteSink &operator=(const ByteSink &) = delete; +}; + +// ------------------------------------------------------------- +// Some standard implementations + +/** + * Implementation of ByteSink that writes to a flat byte array, + * with bounds-checking: + * This sink will not write more than capacity bytes to outbuf. + * If more than capacity bytes are Append()ed, then excess bytes are ignored, + * and Overflowed() will return true. + * Overflow does not cause a runtime error. + * @stable ICU 4.2 + */ +class U_COMMON_API CheckedArrayByteSink : public ByteSink { +public: + /** + * Constructs a ByteSink that will write to outbuf[0..capacity-1]. + * @param outbuf buffer to write to + * @param capacity size of the buffer + * @stable ICU 4.2 + */ + CheckedArrayByteSink(char* outbuf, int32_t capacity); + /** + * Destructor. + * @stable ICU 4.2 + */ + virtual ~CheckedArrayByteSink(); + /** + * Returns the sink to its original state, without modifying the buffer. + * Useful for reusing both the buffer and the sink for multiple streams. + * Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0 + * and Overflowed()=false. + * @return *this + * @stable ICU 4.6 + */ + virtual CheckedArrayByteSink& Reset(); + /** + * Append "bytes[0,n-1]" to this. + * @param bytes the pointer to the bytes + * @param n the number of bytes; must be non-negative + * @stable ICU 4.2 + */ + virtual void Append(const char* bytes, int32_t n) override; + /** + * Returns a writable buffer for appending and writes the buffer's capacity to + * *result_capacity. For details see the base class documentation. + * @param min_capacity required minimum capacity of the returned buffer; + * must be non-negative + * @param desired_capacity_hint desired capacity of the returned buffer; + * must be non-negative + * @param scratch default caller-owned buffer + * @param scratch_capacity capacity of the scratch buffer + * @param result_capacity pointer to an integer which will be set to the + * capacity of the returned buffer + * @return a buffer with *result_capacity>=min_capacity + * @stable ICU 4.2 + */ + virtual char* GetAppendBuffer(int32_t min_capacity, + int32_t desired_capacity_hint, + char* scratch, int32_t scratch_capacity, + int32_t* result_capacity) override; + /** + * Returns the number of bytes actually written to the sink. + * @return number of bytes written to the buffer + * @stable ICU 4.2 + */ + int32_t NumberOfBytesWritten() const { return size_; } + /** + * Returns true if any bytes were discarded, i.e., if there was an + * attempt to write more than 'capacity' bytes. + * @return true if more than 'capacity' bytes were Append()ed + * @stable ICU 4.2 + */ + UBool Overflowed() const { return overflowed_; } + /** + * Returns the number of bytes appended to the sink. + * If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten() + * else they return the same number. + * @return number of bytes written to the buffer + * @stable ICU 4.6 + */ + int32_t NumberOfBytesAppended() const { return appended_; } +private: + char* outbuf_; + const int32_t capacity_; + int32_t size_; + int32_t appended_; + UBool overflowed_; + + CheckedArrayByteSink() = delete; + CheckedArrayByteSink(const CheckedArrayByteSink &) = delete; + CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete; +}; + +/** + * Implementation of ByteSink that writes to a "string". + * The StringClass is usually instantiated with a std::string. + * @stable ICU 4.2 + */ +template +class StringByteSink : public ByteSink { + public: + /** + * Constructs a ByteSink that will append bytes to the dest string. + * @param dest pointer to string object to append to + * @stable ICU 4.2 + */ + StringByteSink(StringClass* dest) : dest_(dest) { } + /** + * Constructs a ByteSink that reserves append capacity and will append bytes to the dest string. + * + * @param dest pointer to string object to append to + * @param initialAppendCapacity capacity beyond dest->length() to be reserve()d + * @stable ICU 60 + */ + StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) { + if (initialAppendCapacity > 0 && + static_cast(initialAppendCapacity) > dest->capacity() - dest->length()) { + dest->reserve(dest->length() + initialAppendCapacity); + } + } + /** + * Append "bytes[0,n-1]" to this. + * @param data the pointer to the bytes + * @param n the number of bytes; must be non-negative + * @stable ICU 4.2 + */ + virtual void Append(const char* data, int32_t n) override { dest_->append(data, n); } + private: + StringClass* dest_; + + StringByteSink() = delete; + StringByteSink(const StringByteSink &) = delete; + StringByteSink &operator=(const StringByteSink &) = delete; +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __BYTESTREAM_H__ diff --git a/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/bytestrie.h b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/bytestrie.h new file mode 100644 index 0000000000..c07dfada94 --- /dev/null +++ b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/bytestrie.h @@ -0,0 +1,568 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2012, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: bytestrie.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010sep25 +* created by: Markus W. Scherer +*/ + +#ifndef __BYTESTRIE_H__ +#define __BYTESTRIE_H__ + +/** + * \file + * \brief C++ API: Trie for mapping byte sequences to integer values. + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/stringpiece.h" +#include "unicode/uobject.h" +#include "unicode/ustringtrie.h" + +class BytesTrieTest; + +U_NAMESPACE_BEGIN + +class ByteSink; +class BytesTrieBuilder; +class CharString; +class UVector32; + +/** + * Light-weight, non-const reader class for a BytesTrie. + * Traverses a byte-serialized data structure with minimal state, + * for mapping byte sequences to non-negative integer values. + * + * This class owns the serialized trie data only if it was constructed by + * the builder's build() method. + * The public constructor and the copy constructor only alias the data (only copy the pointer). + * There is no assignment operator. + * + * This class is not intended for public subclassing. + * @stable ICU 4.8 + */ +class U_COMMON_API BytesTrie : public UMemory { +public: + /** + * Constructs a BytesTrie reader instance. + * + * The trieBytes must contain a copy of a byte sequence from the BytesTrieBuilder, + * starting with the first byte of that sequence. + * The BytesTrie object will not read more bytes than + * the BytesTrieBuilder generated in the corresponding build() call. + * + * The array is not copied/cloned and must not be modified while + * the BytesTrie object is in use. + * + * @param trieBytes The byte array that contains the serialized trie. + * @stable ICU 4.8 + */ + BytesTrie(const void *trieBytes) + : ownedArray_(nullptr), bytes_(static_cast(trieBytes)), + pos_(bytes_), remainingMatchLength_(-1) {} + + /** + * Destructor. + * @stable ICU 4.8 + */ + ~BytesTrie(); + + /** + * Copy constructor, copies the other trie reader object and its state, + * but not the byte array which will be shared. (Shallow copy.) + * @param other Another BytesTrie object. + * @stable ICU 4.8 + */ + BytesTrie(const BytesTrie &other) + : ownedArray_(nullptr), bytes_(other.bytes_), + pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {} + + /** + * Resets this trie to its initial state. + * @return *this + * @stable ICU 4.8 + */ + BytesTrie &reset() { + pos_=bytes_; + remainingMatchLength_=-1; + return *this; + } + + /** + * Returns the state of this trie as a 64-bit integer. + * The state value is never 0. + * + * @return opaque state value + * @see resetToState64 + * @stable ICU 65 + */ + uint64_t getState64() const { + return (static_cast(remainingMatchLength_ + 2) << kState64RemainingShift) | + static_cast(pos_ - bytes_); + } + + /** + * Resets this trie to the saved state. + * Unlike resetToState(State), the 64-bit state value + * must be from getState64() from the same trie object or + * from one initialized the exact same way. + * Because of no validation, this method is faster. + * + * @param state The opaque trie state value from getState64(). + * @return *this + * @see getState64 + * @see resetToState + * @see reset + * @stable ICU 65 + */ + BytesTrie &resetToState64(uint64_t state) { + remainingMatchLength_ = static_cast(state >> kState64RemainingShift) - 2; + pos_ = bytes_ + (state & kState64PosMask); + return *this; + } + + /** + * BytesTrie state object, for saving a trie's current state + * and resetting the trie back to this state later. + * @stable ICU 4.8 + */ + class State : public UMemory { + public: + /** + * Constructs an empty State. + * @stable ICU 4.8 + */ + State() { bytes=nullptr; } + private: + friend class BytesTrie; + + const uint8_t *bytes; + const uint8_t *pos; + int32_t remainingMatchLength; + }; + + /** + * Saves the state of this trie. + * @param state The State object to hold the trie's state. + * @return *this + * @see resetToState + * @stable ICU 4.8 + */ + const BytesTrie &saveState(State &state) const { + state.bytes=bytes_; + state.pos=pos_; + state.remainingMatchLength=remainingMatchLength_; + return *this; + } + + /** + * Resets this trie to the saved state. + * If the state object contains no state, or the state of a different trie, + * then this trie remains unchanged. + * @param state The State object which holds a saved trie state. + * @return *this + * @see saveState + * @see reset + * @stable ICU 4.8 + */ + BytesTrie &resetToState(const State &state) { + if(bytes_==state.bytes && bytes_!=nullptr) { + pos_=state.pos; + remainingMatchLength_=state.remainingMatchLength; + } + return *this; + } + + /** + * Determines whether the byte sequence so far matches, whether it has a value, + * and whether another input byte can continue a matching byte sequence. + * @return The match/value Result. + * @stable ICU 4.8 + */ + UStringTrieResult current() const; + + /** + * Traverses the trie from the initial state for this input byte. + * Equivalent to reset().next(inByte). + * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff. + * Values below -0x100 and above 0xff will never match. + * @return The match/value Result. + * @stable ICU 4.8 + */ + inline UStringTrieResult first(int32_t inByte) { + remainingMatchLength_=-1; + if(inByte<0) { + inByte+=0x100; + } + return nextImpl(bytes_, inByte); + } + + /** + * Traverses the trie from the current state for this input byte. + * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff. + * Values below -0x100 and above 0xff will never match. + * @return The match/value Result. + * @stable ICU 4.8 + */ + UStringTrieResult next(int32_t inByte); + + /** + * Traverses the trie from the current state for this byte sequence. + * Equivalent to + * \code + * Result result=current(); + * for(each c in s) + * if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH; + * result=next(c); + * return result; + * \endcode + * @param s A string or byte sequence. Can be nullptr if length is 0. + * @param length The length of the byte sequence. Can be -1 if NUL-terminated. + * @return The match/value Result. + * @stable ICU 4.8 + */ + UStringTrieResult next(const char *s, int32_t length); + + /** + * Returns a matching byte sequence's value if called immediately after + * current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE. + * getValue() can be called multiple times. + * + * Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE! + * @return The value for the byte sequence so far. + * @stable ICU 4.8 + */ + inline int32_t getValue() const { + const uint8_t *pos=pos_; + int32_t leadByte=*pos++; + // U_ASSERT(leadByte>=kMinValueLead); + return readValue(pos, leadByte>>1); + } + + /** + * Determines whether all byte sequences reachable from the current state + * map to the same value. + * @param uniqueValue Receives the unique value, if this function returns true. + * (output-only) + * @return true if all byte sequences reachable from the current state + * map to the same value. + * @stable ICU 4.8 + */ + inline UBool hasUniqueValue(int32_t &uniqueValue) const { + const uint8_t *pos=pos_; + // Skip the rest of a pending linear-match node. + return pos!=nullptr && findUniqueValue(pos+remainingMatchLength_+1, false, uniqueValue); + } + + /** + * Finds each byte which continues the byte sequence from the current state. + * That is, each byte b for which it would be next(b)!=USTRINGTRIE_NO_MATCH now. + * @param out Each next byte is appended to this object. + * (Only uses the out.Append(s, length) method.) + * @return the number of bytes which continue the byte sequence from here + * @stable ICU 4.8 + */ + int32_t getNextBytes(ByteSink &out) const; + + /** + * Iterator for all of the (byte sequence, value) pairs in a BytesTrie. + * @stable ICU 4.8 + */ + class U_COMMON_API Iterator : public UMemory { + public: + /** + * Iterates from the root of a byte-serialized BytesTrie. + * @param trieBytes The trie bytes. + * @param maxStringLength If 0, the iterator returns full strings/byte sequences. + * Otherwise, the iterator returns strings with this maximum length. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @stable ICU 4.8 + */ + Iterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode); + + /** + * Iterates from the current state of the specified BytesTrie. + * @param trie The trie whose state will be copied for iteration. + * @param maxStringLength If 0, the iterator returns full strings/byte sequences. + * Otherwise, the iterator returns strings with this maximum length. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @stable ICU 4.8 + */ + Iterator(const BytesTrie &trie, int32_t maxStringLength, UErrorCode &errorCode); + + /** + * Destructor. + * @stable ICU 4.8 + */ + ~Iterator(); + + /** + * Resets this iterator to its initial state. + * @return *this + * @stable ICU 4.8 + */ + Iterator &reset(); + + /** + * @return true if there are more elements. + * @stable ICU 4.8 + */ + UBool hasNext() const; + + /** + * Finds the next (byte sequence, value) pair if there is one. + * + * If the byte sequence is truncated to the maximum length and does not + * have a real value, then the value is set to -1. + * In this case, this "not a real value" is indistinguishable from + * a real value of -1. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return true if there is another element. + * @stable ICU 4.8 + */ + UBool next(UErrorCode &errorCode); + + /** + * @return The NUL-terminated byte sequence for the last successful next(). + * @stable ICU 4.8 + */ + StringPiece getString() const; + /** + * @return The value for the last successful next(). + * @stable ICU 4.8 + */ + int32_t getValue() const { return value_; } + + private: + UBool truncateAndStop(); + + const uint8_t *branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode); + + const uint8_t *bytes_; + const uint8_t *pos_; + const uint8_t *initialPos_; + int32_t remainingMatchLength_; + int32_t initialRemainingMatchLength_; + + CharString *str_; + int32_t maxLength_; + int32_t value_; + + // The stack stores pairs of integers for backtracking to another + // outbound edge of a branch node. + // The first integer is an offset from bytes_. + // The second integer has the str_->length() from before the node in bits 15..0, + // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.) + // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24, + // but the code looks more confusing that way.) + UVector32 *stack_; + }; + +private: + friend class BytesTrieBuilder; + friend class ::BytesTrieTest; + + /** + * Constructs a BytesTrie reader instance. + * Unlike the public constructor which just aliases an array, + * this constructor adopts the builder's array. + * This constructor is only called by the builder. + */ + BytesTrie(void *adoptBytes, const void *trieBytes) + : ownedArray_(static_cast(adoptBytes)), + bytes_(static_cast(trieBytes)), + pos_(bytes_), remainingMatchLength_(-1) {} + + // No assignment operator. + BytesTrie &operator=(const BytesTrie &other) = delete; + + inline void stop() { + pos_=nullptr; + } + + // Reads a compact 32-bit integer. + // pos is already after the leadByte, and the lead byte is already shifted right by 1. + static int32_t readValue(const uint8_t *pos, int32_t leadByte); + static inline const uint8_t *skipValue(const uint8_t *pos, int32_t leadByte) { + // U_ASSERT(leadByte>=kMinValueLead); + if(leadByte>=(kMinTwoByteValueLead<<1)) { + if(leadByte<(kMinThreeByteValueLead<<1)) { + ++pos; + } else if(leadByte<(kFourByteValueLead<<1)) { + pos+=2; + } else { + pos+=3+((leadByte>>1)&1); + } + } + return pos; + } + static inline const uint8_t *skipValue(const uint8_t *pos) { + int32_t leadByte=*pos++; + return skipValue(pos, leadByte); + } + + // Reads a jump delta and jumps. + static const uint8_t *jumpByDelta(const uint8_t *pos); + + static inline const uint8_t *skipDelta(const uint8_t *pos) { + int32_t delta=*pos++; + if(delta>=kMinTwoByteDeltaLead) { + if(delta(USTRINGTRIE_INTERMEDIATE_VALUE - (node & kValueIsFinal)); + } + + // Handles a branch node for both next(byte) and next(string). + UStringTrieResult branchNext(const uint8_t *pos, int32_t length, int32_t inByte); + + // Requires remainingLength_<0. + UStringTrieResult nextImpl(const uint8_t *pos, int32_t inByte); + + // Helper functions for hasUniqueValue(). + // Recursively finds a unique value (or whether there is not a unique one) + // from a branch. + static const uint8_t *findUniqueValueFromBranch(const uint8_t *pos, int32_t length, + UBool haveUniqueValue, int32_t &uniqueValue); + // Recursively finds a unique value (or whether there is not a unique one) + // starting from a position on a node lead byte. + static UBool findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue); + + // Helper functions for getNextBytes(). + // getNextBytes() when pos is on a branch node. + static void getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out); + static void append(ByteSink &out, int c); + + // BytesTrie data structure + // + // The trie consists of a series of byte-serialized nodes for incremental + // string/byte sequence matching. The root node is at the beginning of the trie data. + // + // Types of nodes are distinguished by their node lead byte ranges. + // After each node, except a final-value node, another node follows to + // encode match values or continue matching further bytes. + // + // Node types: + // - Value node: Stores a 32-bit integer in a compact, variable-length format. + // The value is for the string/byte sequence so far. + // One node bit indicates whether the value is final or whether + // matching continues with the next node. + // - Linear-match node: Matches a number of bytes. + // - Branch node: Branches to other nodes according to the current input byte. + // The node byte is the length of the branch (number of bytes to select from) + // minus 1. It is followed by a sub-node: + // - If the length is at most kMaxBranchLinearSubNodeLength, then + // there are length-1 (key, value) pairs and then one more comparison byte. + // If one of the key bytes matches, then the value is either a final value for + // the string/byte sequence so far, or a "jump" delta to the next node. + // If the last byte matches, then matching continues with the next node. + // (Values have the same encoding as value nodes.) + // - If the length is greater than kMaxBranchLinearSubNodeLength, then + // there is one byte and one "jump" delta. + // If the input byte is less than the sub-node byte, then "jump" by delta to + // the next sub-node which will have a length of length/2. + // (The delta has its own compact encoding.) + // Otherwise, skip the "jump" delta to the next sub-node + // which will have a length of length-length/2. + + // Node lead byte values. + + // 00..0f: Branch node. If node!=0 then the length is node+1, otherwise + // the length is one more than the next byte. + + // For a branch sub-node with at most this many entries, we drop down + // to a linear search. + static const int32_t kMaxBranchLinearSubNodeLength=5; + + // 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node. + static const int32_t kMinLinearMatch=0x10; + static const int32_t kMaxLinearMatchLength=0x10; + + // 20..ff: Variable-length value node. + // If odd, the value is final. (Otherwise, intermediate value or jump delta.) + // Then shift-right by 1 bit. + // The remaining lead byte value indicates the number of following bytes (0..4) + // and contains the value's top bits. + static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength; // 0x20 + // It is a final value if bit 0 is set. + static const int32_t kValueIsFinal=1; + + // Compact value: After testing bit 0, shift right by 1 and then use the following thresholds. + static const int32_t kMinOneByteValueLead=kMinValueLead/2; // 0x10 + static const int32_t kMaxOneByteValue=0x40; // At least 6 bits in the first byte. + + static const int32_t kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1; // 0x51 + static const int32_t kMaxTwoByteValue=0x1aff; + + static const int32_t kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1; // 0x6c + static const int32_t kFourByteValueLead=0x7e; + + // A little more than Unicode code points. (0x11ffff) + static const int32_t kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1; + + static const int32_t kFiveByteValueLead=0x7f; + + // Compact delta integers. + static const int32_t kMaxOneByteDelta=0xbf; + static const int32_t kMinTwoByteDeltaLead=kMaxOneByteDelta+1; // 0xc0 + static const int32_t kMinThreeByteDeltaLead=0xf0; + static const int32_t kFourByteDeltaLead=0xfe; + static const int32_t kFiveByteDeltaLead=0xff; + + static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1; // 0x2fff + static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1; // 0xdffff + + // For getState64(): + // The remainingMatchLength_ is -1..14=(kMaxLinearMatchLength=0x10)-2 + // so we need at least 5 bits for that. + // We add 2 to store it as a positive value 1..16=kMaxLinearMatchLength. + static constexpr int32_t kState64RemainingShift = 59; + static constexpr uint64_t kState64PosMask = (UINT64_C(1) << kState64RemainingShift) - 1; + + uint8_t *ownedArray_; + + // Fixed value referencing the BytesTrie bytes. + const uint8_t *bytes_; + + // Iterator variables. + + // Pointer to next trie byte to read. nullptr if no more matches. + const uint8_t *pos_; + // Remaining length of a linear-match node, minus 1. Negative if not in such a node. + int32_t remainingMatchLength_; +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __BYTESTRIE_H__ diff --git a/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/bytestriebuilder.h b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/bytestriebuilder.h new file mode 100644 index 0000000000..ec9c625473 --- /dev/null +++ b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/bytestriebuilder.h @@ -0,0 +1,193 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: bytestriebuilder.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010sep25 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C++ API: Builder for icu::BytesTrie + */ + +#ifndef __BYTESTRIEBUILDER_H__ +#define __BYTESTRIEBUILDER_H__ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/bytestrie.h" +#include "unicode/stringpiece.h" +#include "unicode/stringtriebuilder.h" + +class BytesTrieTest; + +U_NAMESPACE_BEGIN + +class BytesTrieElement; +class CharString; +/** + * Builder class for BytesTrie. + * + * This class is not intended for public subclassing. + * @stable ICU 4.8 + */ +class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder { +public: + /** + * Constructs an empty builder. + * @param errorCode Standard ICU error code. + * @stable ICU 4.8 + */ + BytesTrieBuilder(UErrorCode &errorCode); + + /** + * Destructor. + * @stable ICU 4.8 + */ + virtual ~BytesTrieBuilder(); + + /** + * Adds a (byte sequence, value) pair. + * The byte sequence must be unique. + * The bytes will be copied; the builder does not keep + * a reference to the input StringPiece or its data(). + * @param s The input byte sequence. + * @param value The value associated with this byte sequence. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return *this + * @stable ICU 4.8 + */ + BytesTrieBuilder &add(StringPiece s, int32_t value, UErrorCode &errorCode); + + /** + * Builds a BytesTrie for the add()ed data. + * Once built, no further data can be add()ed until clear() is called. + * + * A BytesTrie cannot be empty. At least one (byte sequence, value) pair + * must have been add()ed. + * + * This method passes ownership of the builder's internal result array to the new trie object. + * Another call to any build() variant will re-serialize the trie. + * After clear() has been called, a new array will be used as well. + * @param buildOption Build option, see UStringTrieBuildOption. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return A new BytesTrie for the add()ed data. + * @stable ICU 4.8 + */ + BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); + + /** + * Builds a BytesTrie for the add()ed data and byte-serializes it. + * Once built, no further data can be add()ed until clear() is called. + * + * A BytesTrie cannot be empty. At least one (byte sequence, value) pair + * must have been add()ed. + * + * Multiple calls to buildStringPiece() return StringPieces referring to the + * builder's same byte array, without rebuilding. + * If buildStringPiece() is called after build(), the trie will be + * re-serialized into a new array (because build() passes on ownership). + * If build() is called after buildStringPiece(), the trie object returned + * by build() will become the owner of the underlying string for the + * previously returned StringPiece. + * After clear() has been called, a new array will be used as well. + * @param buildOption Build option, see UStringTrieBuildOption. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data. + * @stable ICU 4.8 + */ + StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode); + + /** + * Removes all (byte sequence, value) pairs. + * New data can then be add()ed and a new trie can be built. + * @return *this + * @stable ICU 4.8 + */ + BytesTrieBuilder &clear(); + +private: + friend class ::BytesTrieTest; + + BytesTrieBuilder(const BytesTrieBuilder &other) = delete; // no copy constructor + BytesTrieBuilder &operator=(const BytesTrieBuilder &other) = delete; // no assignment operator + + void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode); + + virtual int32_t getElementStringLength(int32_t i) const override; + virtual char16_t getElementUnit(int32_t i, int32_t byteIndex) const override; + virtual int32_t getElementValue(int32_t i) const override; + + virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const override; + + virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const override; + virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const override; + virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, char16_t byte) const override; + + virtual UBool matchNodesCanHaveValues() const override { return false; } + + virtual int32_t getMaxBranchLinearSubNodeLength() const override { return BytesTrie::kMaxBranchLinearSubNodeLength; } + virtual int32_t getMinLinearMatch() const override { return BytesTrie::kMinLinearMatch; } + virtual int32_t getMaxLinearMatchLength() const override { return BytesTrie::kMaxLinearMatchLength; } + + /** + * @internal (private) + */ + class BTLinearMatchNode : public LinearMatchNode { + public: + BTLinearMatchNode(const char *units, int32_t len, Node *nextNode); + virtual bool operator==(const Node &other) const override; + virtual void write(StringTrieBuilder &builder) override; + private: + const char *s; + }; + + virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, + Node *nextNode) const override; + + UBool ensureCapacity(int32_t length); + virtual int32_t write(int32_t byte) override; + int32_t write(const char *b, int32_t length); + virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) override; + virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) override; + virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) override; + virtual int32_t writeDeltaTo(int32_t jumpTarget) override; + static int32_t internalEncodeDelta(int32_t i, char intBytes[]); + + CharString *strings; // Pointer not object so we need not #include internal charstr.h. + BytesTrieElement *elements; + int32_t elementsCapacity; + int32_t elementsLength; + + // Byte serialization of the trie. + // Grows from the back: bytesLength measures from the end of the buffer! + char *bytes; + int32_t bytesCapacity; + int32_t bytesLength; +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __BYTESTRIEBUILDER_H__ diff --git a/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/calendar.h b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/calendar.h new file mode 100644 index 0000000000..4499e281f9 --- /dev/null +++ b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/calendar.h @@ -0,0 +1,2576 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 1997-2014, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File CALENDAR.H +* +* Modification History: +* +* Date Name Description +* 04/22/97 aliu Expanded and corrected comments and other header +* contents. +* 05/01/97 aliu Made equals(), before(), after() arguments const. +* 05/20/97 aliu Replaced fAreFieldsSet with fAreFieldsInSync and +* fAreAllFieldsSet. +* 07/27/98 stephen Sync up with JDK 1.2 +* 11/15/99 weiv added YEAR_WOY and DOW_LOCAL +* to EDateFields +* 8/19/2002 srl Removed Javaisms +* 11/07/2003 srl Update, clean up documentation. +******************************************************************************** +*/ + +#ifndef CALENDAR_H +#define CALENDAR_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +/** + * \file + * \brief C++ API: Calendar object + */ +#if !UCONFIG_NO_FORMATTING + +#include "unicode/uobject.h" +#include "unicode/locid.h" +#include "unicode/timezone.h" +#include "unicode/ucal.h" +#include "unicode/umisc.h" + +U_NAMESPACE_BEGIN + +class ICUServiceFactory; + +// Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API, +// it is a return type for a virtual method (@internal) +/** + * @internal + */ +typedef int32_t UFieldResolutionTable[12][8]; + +class BasicTimeZone; +class CharString; +/** + * `Calendar` is an abstract base class for converting between + * a `UDate` object and a set of integer fields such as + * `YEAR`, `MONTH`, `DAY`, `HOUR`, and so on. + * (A `UDate` object represents a specific instant in + * time with millisecond precision. See UDate + * for information about the `UDate` class.) + * + * Subclasses of `Calendar` interpret a `UDate` + * according to the rules of a specific calendar system. + * The most commonly used subclass of `Calendar` is + * `GregorianCalendar`. Other subclasses could represent + * the various types of lunar calendars in use in many parts of the world. + * + * **NOTE**: (ICU 2.6) The subclass interface should be considered unstable - + * it WILL change. + * + * Like other locale-sensitive classes, `Calendar` provides a + * static method, `createInstance`, for getting a generally useful + * object of this type. `Calendar`'s `createInstance` method + * returns the appropriate `Calendar` subclass whose + * time fields have been initialized with the current date and time: + * + * Calendar *rightNow = Calendar::createInstance(errCode); + * + * A `Calendar` object can produce all the time field values + * needed to implement the date-time formatting for a particular language + * and calendar style (for example, Japanese-Gregorian, Japanese-Traditional). + * + * When computing a `UDate` from time fields, some special circumstances + * may arise: there may be insufficient information to compute the + * `UDate` (such as only year and month but no day in the month), + * there may be inconsistent information (such as "Tuesday, July 15, 1996" + * -- July 15, 1996 is actually a Monday), or the input time might be ambiguous + * because of time zone transition. + * + * **Insufficient information.** The calendar will use default + * information to specify the missing fields. This may vary by calendar; for + * the Gregorian calendar, the default for a field is the same as that of the + * start of the epoch: i.e., YEAR = 1970, MONTH = JANUARY, DATE = 1, etc. + * + * **Inconsistent information.** If fields conflict, the calendar + * will give preference to fields set more recently. For example, when + * determining the day, the calendar will look for one of the following + * combinations of fields. The most recent combination, as determined by the + * most recently set single field, will be used. + * + * MONTH + DAY_OF_MONTH + * MONTH + WEEK_OF_MONTH + DAY_OF_WEEK + * MONTH + DAY_OF_WEEK_IN_MONTH + DAY_OF_WEEK + * DAY_OF_YEAR + * DAY_OF_WEEK + WEEK_OF_YEAR + * + * For the time of day: + * + * HOUR_OF_DAY + * AM_PM + HOUR + * + * **Ambiguous Wall Clock Time.** When time offset from UTC has + * changed, it produces an ambiguous time slot around the transition. For example, + * many US locations observe daylight saving time. On the date switching to daylight + * saving time in US, wall clock time jumps from 12:59 AM (standard) to 2:00 AM + * (daylight). Therefore, wall clock time from 1:00 AM to 1:59 AM do not exist on + * the date. When the input wall time fall into this missing time slot, the ICU + * Calendar resolves the time using the UTC offset before the transition by default. + * In this example, 1:30 AM is interpreted as 1:30 AM standard time (non-exist), + * so the final result will be 2:30 AM daylight time. + * + * On the date switching back to standard time, wall clock time is moved back one + * hour at 2:00 AM. So wall clock time from 1:00 AM to 1:59 AM occur twice. In this + * case, the ICU Calendar resolves the time using the UTC offset after the transition + * by default. For example, 1:30 AM on the date is resolved as 1:30 AM standard time. + * + * Ambiguous wall clock time resolution behaviors can be customized by Calendar APIs + * {@link #setRepeatedWallTimeOption} and {@link #setSkippedWallTimeOption}. + * These methods are available in ICU 49 or later versions. + * + * **Note:** for some non-Gregorian calendars, different + * fields may be necessary for complete disambiguation. For example, a full + * specification of the historical Arabic astronomical calendar requires year, + * month, day-of-month *and* day-of-week in some cases. + * + * **Note:** There are certain possible ambiguities in + * interpretation of certain singular times, which are resolved in the + * following ways: + * + * 1. 24:00:00 "belongs" to the following day. That is, + * 23:59 on Dec 31, 1969 < 24:00 on Jan 1, 1970 < 24:01:00 on Jan 1, 1970 + * 2. Although historically not precise, midnight also belongs to "am", + * and noon belongs to "pm", so on the same day, + * 12:00 am (midnight) < 12:01 am, and 12:00 pm (noon) < 12:01 pm + * + * The date or time format strings are not part of the definition of a + * calendar, as those must be modifiable or overridable by the user at + * runtime. Use `DateFormat` to format dates. + * + * `Calendar` provides an API for field "rolling", where fields + * can be incremented or decremented, but wrap around. For example, rolling the + * month up in the date December 12, **1996** results in + * January 12, **1996**. + * + * `Calendar` also provides a date arithmetic function for + * adding the specified (signed) amount of time to a particular time field. + * For example, subtracting 5 days from the date `September 12, 1996` + * results in `September 7, 1996`. + * + * ***Supported range*** + * + * The allowable range of `Calendar` has been narrowed. `GregorianCalendar` used + * to attempt to support the range of dates with millisecond values from + * `Long.MIN_VALUE` to `Long.MAX_VALUE`. The new `Calendar` protocol specifies the + * maximum range of supportable dates as those having Julian day numbers + * of `-0x7F000000` to `+0x7F000000`. This corresponds to years from ~5,800,000 BCE + * to ~5,800,000 CE. Programmers should use the protected constants in `Calendar` to + * specify an extremely early or extremely late date. + * + *

+ * The Japanese calendar uses a combination of era name and year number. + * When an emperor of Japan abdicates and a new emperor ascends the throne, + * a new era is declared and year number is reset to 1. Even if the date of + * abdication is scheduled ahead of time, the new era name might not be + * announced until just before the date. In such case, ICU4C may include + * a start date of future era without actual era name, but not enabled + * by default. ICU4C users who want to test the behavior of the future era + * can enable the tentative era by: + *

    + *
  • Environment variable ICU_ENABLE_TENTATIVE_ERA=true.
  • + *
+ * + * @stable ICU 2.0 + */ +class U_I18N_API Calendar : public UObject { +public: +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * Field IDs for date and time. Used to specify date/time fields. ERA is calendar + * specific. Example ranges given are for illustration only; see specific Calendar + * subclasses for actual ranges. + * @deprecated ICU 2.6. Use C enum UCalendarDateFields defined in ucal.h + */ + enum EDateFields { +#ifndef U_HIDE_DEPRECATED_API +/* + * ERA may be defined on other platforms. To avoid any potential problems undefined it here. + */ +#ifdef ERA +#undef ERA +#endif + ERA, // Example: 0..1 + YEAR, // Example: 1..big number + MONTH, // Example: 0..11 + WEEK_OF_YEAR, // Example: 1..53 + WEEK_OF_MONTH, // Example: 1..4 + DATE, // Example: 1..31 + DAY_OF_YEAR, // Example: 1..365 + DAY_OF_WEEK, // Example: 1..7 + DAY_OF_WEEK_IN_MONTH, // Example: 1..4, may be specified as -1 + AM_PM, // Example: 0..1 + HOUR, // Example: 0..11 + HOUR_OF_DAY, // Example: 0..23 + MINUTE, // Example: 0..59 + SECOND, // Example: 0..59 + MILLISECOND, // Example: 0..999 + ZONE_OFFSET, // Example: -12*U_MILLIS_PER_HOUR..12*U_MILLIS_PER_HOUR + DST_OFFSET, // Example: 0 or U_MILLIS_PER_HOUR + YEAR_WOY, // 'Y' Example: 1..big number - Year of Week of Year + DOW_LOCAL, // 'e' Example: 1..7 - Day of Week / Localized + + EXTENDED_YEAR, + JULIAN_DAY, + MILLISECONDS_IN_DAY, + IS_LEAP_MONTH, + + FIELD_COUNT = UCAL_FIELD_COUNT // See ucal.h for other fields. +#endif /* U_HIDE_DEPRECATED_API */ + }; +#endif // U_FORCE_HIDE_DEPRECATED_API + +#ifndef U_HIDE_DEPRECATED_API + /** + * Useful constant for days of week. Note: Calendar day-of-week is 1-based. Clients + * who create locale resources for the field of first-day-of-week should be aware of + * this. For instance, in US locale, first-day-of-week is set to 1, i.e., SUNDAY. + * @deprecated ICU 2.6. Use C enum UCalendarDaysOfWeek defined in ucal.h + */ + enum EDaysOfWeek { + SUNDAY = 1, + MONDAY, + TUESDAY, + WEDNESDAY, + THURSDAY, + FRIDAY, + SATURDAY + }; + + /** + * Useful constants for month. Note: Calendar month is 0-based. + * @deprecated ICU 2.6. Use C enum UCalendarMonths defined in ucal.h + */ + enum EMonths { + JANUARY, + FEBRUARY, + MARCH, + APRIL, + MAY, + JUNE, + JULY, + AUGUST, + SEPTEMBER, + OCTOBER, + NOVEMBER, + DECEMBER, + UNDECIMBER + }; + + /** + * Useful constants for hour in 12-hour clock. Used in GregorianCalendar. + * @deprecated ICU 2.6. Use C enum UCalendarAMPMs defined in ucal.h + */ + enum EAmpm { + AM, + PM + }; +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * destructor + * @stable ICU 2.0 + */ + virtual ~Calendar(); + + /** + * Create and return a polymorphic copy of this calendar. + * + * @return a polymorphic copy of this calendar. + * @stable ICU 2.0 + */ + virtual Calendar* clone() const = 0; + + /** + * Creates a Calendar using the default timezone and locale. Clients are responsible + * for deleting the object returned. + * + * @param success Indicates the success/failure of Calendar creation. Filled in + * with U_ZERO_ERROR if created successfully, set to a failure result + * otherwise. U_MISSING_RESOURCE_ERROR will be returned if the resource data + * requests a calendar type which has not been installed. + * @return A Calendar if created successfully. nullptr otherwise. + * @stable ICU 2.0 + */ + static Calendar* U_EXPORT2 createInstance(UErrorCode& success); + + /** + * Creates a Calendar using the given timezone and the default locale. + * The Calendar takes ownership of zoneToAdopt; the + * client must not delete it. + * + * @param zoneToAdopt The given timezone to be adopted. + * @param success Indicates the success/failure of Calendar creation. Filled in + * with U_ZERO_ERROR if created successfully, set to a failure result + * otherwise. + * @return A Calendar if created successfully. nullptr otherwise. + * @stable ICU 2.0 + */ + static Calendar* U_EXPORT2 createInstance(TimeZone* zoneToAdopt, UErrorCode& success); + + /** + * Creates a Calendar using the given timezone and the default locale. The TimeZone + * is _not_ adopted; the client is still responsible for deleting it. + * + * @param zone The timezone. + * @param success Indicates the success/failure of Calendar creation. Filled in + * with U_ZERO_ERROR if created successfully, set to a failure result + * otherwise. + * @return A Calendar if created successfully. nullptr otherwise. + * @stable ICU 2.0 + */ + static Calendar* U_EXPORT2 createInstance(const TimeZone& zone, UErrorCode& success); + + /** + * Creates a Calendar using the default timezone and the given locale. + * + * @param aLocale The given locale. + * @param success Indicates the success/failure of Calendar creation. Filled in + * with U_ZERO_ERROR if created successfully, set to a failure result + * otherwise. + * @return A Calendar if created successfully. nullptr otherwise. + * @stable ICU 2.0 + */ + static Calendar* U_EXPORT2 createInstance(const Locale& aLocale, UErrorCode& success); + + /** + * Creates a Calendar using the given timezone and given locale. + * The Calendar takes ownership of zoneToAdopt; the + * client must not delete it. + * + * @param zoneToAdopt The given timezone to be adopted. + * @param aLocale The given locale. + * @param success Indicates the success/failure of Calendar creation. Filled in + * with U_ZERO_ERROR if created successfully, set to a failure result + * otherwise. + * @return A Calendar if created successfully. nullptr otherwise. + * @stable ICU 2.0 + */ + static Calendar* U_EXPORT2 createInstance(TimeZone* zoneToAdopt, const Locale& aLocale, UErrorCode& success); + + /** + * Gets a Calendar using the given timezone and given locale. The TimeZone + * is _not_ adopted; the client is still responsible for deleting it. + * + * @param zone The given timezone. + * @param aLocale The given locale. + * @param success Indicates the success/failure of Calendar creation. Filled in + * with U_ZERO_ERROR if created successfully, set to a failure result + * otherwise. + * @return A Calendar if created successfully. nullptr otherwise. + * @stable ICU 2.0 + */ + static Calendar* U_EXPORT2 createInstance(const TimeZone& zone, const Locale& aLocale, UErrorCode& success); + + /** + * Returns a list of the locales for which Calendars are installed. + * + * @param count Number of locales returned. + * @return An array of Locale objects representing the set of locales for which + * Calendars are installed. The system retains ownership of this list; + * the caller must NOT delete it. Does not include user-registered Calendars. + * @stable ICU 2.0 + */ + static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); + + + /** + * Given a key and a locale, returns an array of string values in a preferred + * order that would make a difference. These are all and only those values where + * the open (creation) of the service with the locale formed from the input locale + * plus input keyword and that value has different behavior than creation with the + * input locale alone. + * @param key one of the keys supported by this service. For now, only + * "calendar" is supported. + * @param locale the locale + * @param commonlyUsed if set to true it will return only commonly used values + * with the given locale in preferred order. Otherwise, + * it will return all the available values for the locale. + * @param status ICU Error Code + * @return a string enumeration over keyword values for the given key and the locale. + * @stable ICU 4.2 + */ + static StringEnumeration* U_EXPORT2 getKeywordValuesForLocale(const char* key, + const Locale& locale, UBool commonlyUsed, UErrorCode& status); + + /** + * Returns the current UTC (GMT) time measured in milliseconds since 0:00:00 on 1/1/70 + * (derived from the system time). + * + * @return The current UTC time in milliseconds. + * @stable ICU 2.0 + */ + static UDate U_EXPORT2 getNow(); + + /** + * Gets this Calendar's time as milliseconds. May involve recalculation of time due + * to previous calls to set time field values. The time specified is non-local UTC + * (GMT) time. Although this method is const, this object may actually be changed + * (semantically const). + * + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @return The current time in UTC (GMT) time, or zero if the operation + * failed. + * @stable ICU 2.0 + */ + inline UDate getTime(UErrorCode& status) const { return getTimeInMillis(status); } + + /** + * Sets this Calendar's current time with the given UDate. The time specified should + * be in non-local UTC (GMT) time. + * + * @param date The given UDate in UTC (GMT) time. + * @param status Output param set to success/failure code on exit. If any value + * set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @stable ICU 2.0 + */ + inline void setTime(UDate date, UErrorCode& status) { setTimeInMillis(date, status); } + + /** + * Compares the equality of two Calendar objects. Objects of different subclasses + * are considered unequal. This comparison is very exacting; two Calendar objects + * must be in exactly the same state to be considered equal. To compare based on the + * represented time, use equals() instead. + * + * @param that The Calendar object to be compared with. + * @return true if the given Calendar is the same as this Calendar; false + * otherwise. + * @stable ICU 2.0 + */ + virtual bool operator==(const Calendar& that) const; + + /** + * Compares the inequality of two Calendar objects. + * + * @param that The Calendar object to be compared with. + * @return true if the given Calendar is not the same as this Calendar; false + * otherwise. + * @stable ICU 2.0 + */ + bool operator!=(const Calendar& that) const {return !operator==(that);} + + /** + * Returns true if the given Calendar object is equivalent to this + * one. An equivalent Calendar will behave exactly as this one + * does, but it may be set to a different time. By contrast, for + * the operator==() method to return true, the other Calendar must + * be set to the same time. + * + * @param other the Calendar to be compared with this Calendar + * @stable ICU 2.4 + */ + virtual UBool isEquivalentTo(const Calendar& other) const; + + /** + * Compares the Calendar time, whereas Calendar::operator== compares the equality of + * Calendar objects. + * + * @param when The Calendar to be compared with this Calendar. Although this is a + * const parameter, the object may be modified physically + * (semantically const). + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @return True if the current time of this Calendar is equal to the time of + * Calendar when; false otherwise. + * @stable ICU 2.0 + */ + UBool equals(const Calendar& when, UErrorCode& status) const; + + /** + * Returns true if this Calendar's current time is before "when"'s current time. + * + * @param when The Calendar to be compared with this Calendar. Although this is a + * const parameter, the object may be modified physically + * (semantically const). + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @return True if the current time of this Calendar is before the time of + * Calendar when; false otherwise. + * @stable ICU 2.0 + */ + UBool before(const Calendar& when, UErrorCode& status) const; + + /** + * Returns true if this Calendar's current time is after "when"'s current time. + * + * @param when The Calendar to be compared with this Calendar. Although this is a + * const parameter, the object may be modified physically + * (semantically const). + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @return True if the current time of this Calendar is after the time of + * Calendar when; false otherwise. + * @stable ICU 2.0 + */ + UBool after(const Calendar& when, UErrorCode& status) const; + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * UDate Arithmetic function. Adds the specified (signed) amount of time to the given + * time field, based on the calendar's rules. For example, to subtract 5 days from + * the current time of the calendar, call add(Calendar::DATE, -5). When adding on + * the month or Calendar::MONTH field, other fields like date might conflict and + * need to be changed. For instance, adding 1 month on the date 01/31/96 will result + * in 02/29/96. + * Adding a positive value always means moving forward in time, so for the Gregorian calendar, + * starting with 100 BC and adding +1 to year results in 99 BC (even though this actually reduces + * the numeric value of the field itself). + * + * @param field Specifies which date field to modify. + * @param amount The amount of time to be added to the field, in the natural unit + * for that field (e.g., days for the day fields, hours for the hour + * field.) + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @deprecated ICU 2.6. use add(UCalendarDateFields field, int32_t amount, UErrorCode& status) instead. + */ + virtual void add(EDateFields field, int32_t amount, UErrorCode& status); +#endif // U_FORCE_HIDE_DEPRECATED_API + + /** + * UDate Arithmetic function. Adds the specified (signed) amount of time to the given + * time field, based on the calendar's rules. For example, to subtract 5 days from + * the current time of the calendar, call add(Calendar::DATE, -5). When adding on + * the month or Calendar::MONTH field, other fields like date might conflict and + * need to be changed. For instance, adding 1 month on the date 01/31/96 will result + * in 02/29/96. + * Adding a positive value always means moving forward in time, so for the Gregorian calendar, + * starting with 100 BC and adding +1 to year results in 99 BC (even though this actually reduces + * the numeric value of the field itself). + * + * @param field Specifies which date field to modify. + * @param amount The amount of time to be added to the field, in the natural unit + * for that field (e.g., days for the day fields, hours for the hour + * field.) + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @stable ICU 2.6. + */ + virtual void add(UCalendarDateFields field, int32_t amount, UErrorCode& status); + +#ifndef U_HIDE_DEPRECATED_API + /** + * Time Field Rolling function. Rolls (up/down) a single unit of time on the given + * time field. For example, to roll the current date up by one day, call + * roll(Calendar::DATE, true). When rolling on the year or Calendar::YEAR field, it + * will roll the year value in the range between getMinimum(Calendar::YEAR) and the + * value returned by getMaximum(Calendar::YEAR). When rolling on the month or + * Calendar::MONTH field, other fields like date might conflict and, need to be + * changed. For instance, rolling the month up on the date 01/31/96 will result in + * 02/29/96. Rolling up always means rolling forward in time (unless the limit of the + * field is reached, in which case it may pin or wrap), so for Gregorian calendar, + * starting with 100 BC and rolling the year up results in 99 BC. + * When eras have a definite beginning and end (as in the Chinese calendar, or as in + * most eras in the Japanese calendar) then rolling the year past either limit of the + * era will cause the year to wrap around. When eras only have a limit at one end, + * then attempting to roll the year past that limit will result in pinning the year + * at that limit. Note that for most calendars in which era 0 years move forward in + * time (such as Buddhist, Hebrew, or Islamic), it is possible for add or roll to + * result in negative years for era 0 (that is the only way to represent years before + * the calendar epoch). + * When rolling on the hour-in-day or Calendar::HOUR_OF_DAY field, it will roll the + * hour value in the range between 0 and 23, which is zero-based. + *

+ * NOTE: Do not use this method -- use roll(EDateFields, int, UErrorCode&) instead. + * + * @param field The time field. + * @param up Indicates if the value of the specified time field is to be rolled + * up or rolled down. Use true if rolling up, false otherwise. + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @deprecated ICU 2.6. Use roll(UCalendarDateFields field, UBool up, UErrorCode& status) instead. + */ + inline void roll(EDateFields field, UBool up, UErrorCode& status); +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Time Field Rolling function. Rolls (up/down) a single unit of time on the given + * time field. For example, to roll the current date up by one day, call + * roll(Calendar::DATE, true). When rolling on the year or Calendar::YEAR field, it + * will roll the year value in the range between getMinimum(Calendar::YEAR) and the + * value returned by getMaximum(Calendar::YEAR). When rolling on the month or + * Calendar::MONTH field, other fields like date might conflict and, need to be + * changed. For instance, rolling the month up on the date 01/31/96 will result in + * 02/29/96. Rolling up always means rolling forward in time (unless the limit of the + * field is reached, in which case it may pin or wrap), so for Gregorian calendar, + * starting with 100 BC and rolling the year up results in 99 BC. + * When eras have a definite beginning and end (as in the Chinese calendar, or as in + * most eras in the Japanese calendar) then rolling the year past either limit of the + * era will cause the year to wrap around. When eras only have a limit at one end, + * then attempting to roll the year past that limit will result in pinning the year + * at that limit. Note that for most calendars in which era 0 years move forward in + * time (such as Buddhist, Hebrew, or Islamic), it is possible for add or roll to + * result in negative years for era 0 (that is the only way to represent years before + * the calendar epoch). + * When rolling on the hour-in-day or Calendar::HOUR_OF_DAY field, it will roll the + * hour value in the range between 0 and 23, which is zero-based. + *

+ * NOTE: Do not use this method -- use roll(UCalendarDateFields, int, UErrorCode&) instead. + * + * @param field The time field. + * @param up Indicates if the value of the specified time field is to be rolled + * up or rolled down. Use true if rolling up, false otherwise. + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @stable ICU 2.6. + */ + inline void roll(UCalendarDateFields field, UBool up, UErrorCode& status); + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * Time Field Rolling function. Rolls by the given amount on the given + * time field. For example, to roll the current date up by one day, call + * roll(Calendar::DATE, +1, status). When rolling on the month or + * Calendar::MONTH field, other fields like date might conflict and, need to be + * changed. For instance, rolling the month up on the date 01/31/96 will result in + * 02/29/96. Rolling by a positive value always means rolling forward in time (unless + * the limit of the field is reached, in which case it may pin or wrap), so for + * Gregorian calendar, starting with 100 BC and rolling the year by + 1 results in 99 BC. + * When eras have a definite beginning and end (as in the Chinese calendar, or as in + * most eras in the Japanese calendar) then rolling the year past either limit of the + * era will cause the year to wrap around. When eras only have a limit at one end, + * then attempting to roll the year past that limit will result in pinning the year + * at that limit. Note that for most calendars in which era 0 years move forward in + * time (such as Buddhist, Hebrew, or Islamic), it is possible for add or roll to + * result in negative years for era 0 (that is the only way to represent years before + * the calendar epoch). + * When rolling on the hour-in-day or Calendar::HOUR_OF_DAY field, it will roll the + * hour value in the range between 0 and 23, which is zero-based. + *

+ * The only difference between roll() and add() is that roll() does not change + * the value of more significant fields when it reaches the minimum or maximum + * of its range, whereas add() does. + * + * @param field The time field. + * @param amount Indicates amount to roll. + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid, this will be set to + * an error status. + * @deprecated ICU 2.6. Use roll(UCalendarDateFields field, int32_t amount, UErrorCode& status) instead. + */ + virtual void roll(EDateFields field, int32_t amount, UErrorCode& status); +#endif // U_FORCE_HIDE_DEPRECATED_API + + /** + * Time Field Rolling function. Rolls by the given amount on the given + * time field. For example, to roll the current date up by one day, call + * roll(Calendar::DATE, +1, status). When rolling on the month or + * Calendar::MONTH field, other fields like date might conflict and, need to be + * changed. For instance, rolling the month up on the date 01/31/96 will result in + * 02/29/96. Rolling by a positive value always means rolling forward in time (unless + * the limit of the field is reached, in which case it may pin or wrap), so for + * Gregorian calendar, starting with 100 BC and rolling the year by + 1 results in 99 BC. + * When eras have a definite beginning and end (as in the Chinese calendar, or as in + * most eras in the Japanese calendar) then rolling the year past either limit of the + * era will cause the year to wrap around. When eras only have a limit at one end, + * then attempting to roll the year past that limit will result in pinning the year + * at that limit. Note that for most calendars in which era 0 years move forward in + * time (such as Buddhist, Hebrew, or Islamic), it is possible for add or roll to + * result in negative years for era 0 (that is the only way to represent years before + * the calendar epoch). + * When rolling on the hour-in-day or Calendar::HOUR_OF_DAY field, it will roll the + * hour value in the range between 0 and 23, which is zero-based. + *

+ * The only difference between roll() and add() is that roll() does not change + * the value of more significant fields when it reaches the minimum or maximum + * of its range, whereas add() does. + * + * @param field The time field. + * @param amount Indicates amount to roll. + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid, this will be set to + * an error status. + * @stable ICU 2.6. + */ + virtual void roll(UCalendarDateFields field, int32_t amount, UErrorCode& status); + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * Return the difference between the given time and the time this + * calendar object is set to. If this calendar is set + * before the given time, the returned value will be + * positive. If this calendar is set after the given + * time, the returned value will be negative. The + * field parameter specifies the units of the return + * value. For example, if fieldDifference(when, + * Calendar::MONTH) returns 3, then this calendar is set to + * 3 months before when, and possibly some addition + * time less than one month. + * + *

As a side effect of this call, this calendar is advanced + * toward when by the given amount. That is, calling + * this method has the side effect of calling add(field, + * n), where n is the return value. + * + *

Usage: To use this method, call it first with the largest + * field of interest, then with progressively smaller fields. For + * example: + * + *

+     * int y = cal->fieldDifference(when, Calendar::YEAR, err);
+     * int m = cal->fieldDifference(when, Calendar::MONTH, err);
+     * int d = cal->fieldDifference(when, Calendar::DATE, err);
+ * + * computes the difference between cal and + * when in years, months, and days. + * + *

Note: fieldDifference() is + * asymmetrical. That is, in the following code: + * + *

+     * cal->setTime(date1, err);
+     * int m1 = cal->fieldDifference(date2, Calendar::MONTH, err);
+     * int d1 = cal->fieldDifference(date2, Calendar::DATE, err);
+     * cal->setTime(date2, err);
+     * int m2 = cal->fieldDifference(date1, Calendar::MONTH, err);
+     * int d2 = cal->fieldDifference(date1, Calendar::DATE, err);
+ * + * one might expect that m1 == -m2 && d1 == -d2. + * However, this is not generally the case, because of + * irregularities in the underlying calendar system (e.g., the + * Gregorian calendar has a varying number of days per month). + * + * @param when the date to compare this calendar's time to + * @param field the field in which to compute the result + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid, this will be set to + * an error status. + * @return the difference, either positive or negative, between + * this calendar's time and when, in terms of + * field. + * @deprecated ICU 2.6. Use fieldDifference(UDate when, UCalendarDateFields field, UErrorCode& status). + */ + virtual int32_t fieldDifference(UDate when, EDateFields field, UErrorCode& status); +#endif // U_FORCE_HIDE_DEPRECATED_API + + /** + * Return the difference between the given time and the time this + * calendar object is set to. If this calendar is set + * before the given time, the returned value will be + * positive. If this calendar is set after the given + * time, the returned value will be negative. The + * field parameter specifies the units of the return + * value. For example, if fieldDifference(when, + * Calendar::MONTH) returns 3, then this calendar is set to + * 3 months before when, and possibly some addition + * time less than one month. + * + *

As a side effect of this call, this calendar is advanced + * toward when by the given amount. That is, calling + * this method has the side effect of calling add(field, + * n), where n is the return value. + * + *

Usage: To use this method, call it first with the largest + * field of interest, then with progressively smaller fields. For + * example: + * + *

+     * int y = cal->fieldDifference(when, Calendar::YEAR, err);
+     * int m = cal->fieldDifference(when, Calendar::MONTH, err);
+     * int d = cal->fieldDifference(when, Calendar::DATE, err);
+ * + * computes the difference between cal and + * when in years, months, and days. + * + *

Note: fieldDifference() is + * asymmetrical. That is, in the following code: + * + *

+     * cal->setTime(date1, err);
+     * int m1 = cal->fieldDifference(date2, Calendar::MONTH, err);
+     * int d1 = cal->fieldDifference(date2, Calendar::DATE, err);
+     * cal->setTime(date2, err);
+     * int m2 = cal->fieldDifference(date1, Calendar::MONTH, err);
+     * int d2 = cal->fieldDifference(date1, Calendar::DATE, err);
+ * + * one might expect that m1 == -m2 && d1 == -d2. + * However, this is not generally the case, because of + * irregularities in the underlying calendar system (e.g., the + * Gregorian calendar has a varying number of days per month). + * + * @param when the date to compare this calendar's time to + * @param field the field in which to compute the result + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid, this will be set to + * an error status. + * @return the difference, either positive or negative, between + * this calendar's time and when, in terms of + * field. + * @stable ICU 2.6. + */ + virtual int32_t fieldDifference(UDate when, UCalendarDateFields field, UErrorCode& status); + + /** + * Sets the calendar's time zone to be the one passed in. The Calendar takes ownership + * of the TimeZone; the caller is no longer responsible for deleting it. If the + * given time zone is nullptr, this function has no effect. + * + * @param value The given time zone. + * @stable ICU 2.0 + */ + void adoptTimeZone(TimeZone* value); + + /** + * Sets the calendar's time zone to be the same as the one passed in. The TimeZone + * passed in is _not_ adopted; the client is still responsible for deleting it. + * + * @param zone The given time zone. + * @stable ICU 2.0 + */ + void setTimeZone(const TimeZone& zone); + + /** + * Returns a reference to the time zone owned by this calendar. The returned reference + * is only valid until clients make another call to adoptTimeZone or setTimeZone, + * or this Calendar is destroyed. + * + * @return The time zone object associated with this calendar. + * @stable ICU 2.0 + */ + const TimeZone& getTimeZone() const; + + /** + * Returns the time zone owned by this calendar. The caller owns the returned object + * and must delete it when done. After this call, the new time zone associated + * with this Calendar is the default TimeZone as returned by TimeZone::createDefault(). + * + * @return The time zone object which was associated with this calendar. + * @stable ICU 2.0 + */ + TimeZone* orphanTimeZone(); + + /** + * Queries if the current date for this Calendar is in Daylight Savings Time. + * + * @param status Fill-in parameter which receives the status of this operation. + * @return True if the current date for this Calendar is in Daylight Savings Time, + * false, otherwise. + * @stable ICU 2.0 + */ + virtual UBool inDaylightTime(UErrorCode& status) const; + + /** + * Specifies whether or not date/time interpretation is to be lenient. With lenient + * interpretation, a date such as "February 942, 1996" will be treated as being + * equivalent to the 941st day after February 1, 1996. With strict interpretation, + * such dates will cause an error when computing time from the time field values + * representing the dates. + * + * @param lenient True specifies date/time interpretation to be lenient. + * + * @see DateFormat#setLenient + * @stable ICU 2.0 + */ + void setLenient(UBool lenient); + + /** + * Tells whether date/time interpretation is to be lenient. + * + * @return True tells that date/time interpretation is to be lenient. + * @stable ICU 2.0 + */ + UBool isLenient() const; + + /** + * Sets the behavior for handling wall time repeating multiple times + * at negative time zone offset transitions. For example, 1:30 AM on + * November 6, 2011 in US Eastern time (America/New_York) occurs twice; + * 1:30 AM EDT, then 1:30 AM EST one hour later. When UCAL_WALLTIME_FIRST + * is used, the wall time 1:30AM in this example will be interpreted as 1:30 AM EDT + * (first occurrence). When UCAL_WALLTIME_LAST is used, it will be + * interpreted as 1:30 AM EST (last occurrence). The default value is + * UCAL_WALLTIME_LAST. + *

+ * Note:When UCAL_WALLTIME_NEXT_VALID is not a valid + * option for this. When the argument is neither UCAL_WALLTIME_FIRST + * nor UCAL_WALLTIME_LAST, this method has no effect and will keep + * the current setting. + * + * @param option the behavior for handling repeating wall time, either + * UCAL_WALLTIME_FIRST or UCAL_WALLTIME_LAST. + * @see #getRepeatedWallTimeOption + * @stable ICU 49 + */ + void setRepeatedWallTimeOption(UCalendarWallTimeOption option); + + /** + * Gets the behavior for handling wall time repeating multiple times + * at negative time zone offset transitions. + * + * @return the behavior for handling repeating wall time, either + * UCAL_WALLTIME_FIRST or UCAL_WALLTIME_LAST. + * @see #setRepeatedWallTimeOption + * @stable ICU 49 + */ + UCalendarWallTimeOption getRepeatedWallTimeOption() const; + + /** + * Sets the behavior for handling skipped wall time at positive time zone offset + * transitions. For example, 2:30 AM on March 13, 2011 in US Eastern time (America/New_York) + * does not exist because the wall time jump from 1:59 AM EST to 3:00 AM EDT. When + * UCAL_WALLTIME_FIRST is used, 2:30 AM is interpreted as 30 minutes before 3:00 AM + * EDT, therefore, it will be resolved as 1:30 AM EST. When UCAL_WALLTIME_LAST + * is used, 2:30 AM is interpreted as 31 minutes after 1:59 AM EST, therefore, it will be + * resolved as 3:30 AM EDT. When UCAL_WALLTIME_NEXT_VALID is used, 2:30 AM will + * be resolved as next valid wall time, that is 3:00 AM EDT. The default value is + * UCAL_WALLTIME_LAST. + *

+ * Note:This option is effective only when this calendar is lenient. + * When the calendar is strict, such non-existing wall time will cause an error. + * + * @param option the behavior for handling skipped wall time at positive time zone + * offset transitions, one of UCAL_WALLTIME_FIRST, UCAL_WALLTIME_LAST and + * UCAL_WALLTIME_NEXT_VALID. + * @see #getSkippedWallTimeOption + * + * @stable ICU 49 + */ + void setSkippedWallTimeOption(UCalendarWallTimeOption option); + + /** + * Gets the behavior for handling skipped wall time at positive time zone offset + * transitions. + * + * @return the behavior for handling skipped wall time, one of + * UCAL_WALLTIME_FIRST, UCAL_WALLTIME_LAST + * and UCAL_WALLTIME_NEXT_VALID. + * @see #setSkippedWallTimeOption + * @stable ICU 49 + */ + UCalendarWallTimeOption getSkippedWallTimeOption() const; + + /** + * Sets what the first day of the week is; e.g., Sunday in US, Monday in France. + * + * @param value The given first day of the week. + * @stable ICU 2.6. + */ + void setFirstDayOfWeek(UCalendarDaysOfWeek value); + +#ifndef U_HIDE_DEPRECATED_API + /** + * Gets what the first day of the week is; e.g., Sunday in US, Monday in France. + * + * @return The first day of the week. + * @deprecated ICU 2.6 use the overload with error code + */ + EDaysOfWeek getFirstDayOfWeek() const; +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Gets what the first day of the week is; e.g., Sunday in US, Monday in France. + * + * @param status error code + * @return The first day of the week. + * @stable ICU 2.6 + */ + UCalendarDaysOfWeek getFirstDayOfWeek(UErrorCode &status) const; + + /** + * Sets what the minimal days required in the first week of the year are; For + * example, if the first week is defined as one that contains the first day of the + * first month of a year, call the method with value 1. If it must be a full week, + * use value 7. + * + * @param value The given minimal days required in the first week of the year. + * @stable ICU 2.0 + */ + void setMinimalDaysInFirstWeek(uint8_t value); + + /** + * Gets what the minimal days required in the first week of the year are; e.g., if + * the first week is defined as one that contains the first day of the first month + * of a year, getMinimalDaysInFirstWeek returns 1. If the minimal days required must + * be a full week, getMinimalDaysInFirstWeek returns 7. + * + * @return The minimal days required in the first week of the year. + * @stable ICU 2.0 + */ + uint8_t getMinimalDaysInFirstWeek() const; + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * Gets the minimum value for the given time field. e.g., for Gregorian + * DAY_OF_MONTH, 1. + * + * @param field The given time field. + * @return The minimum value for the given time field. + * @deprecated ICU 2.6. Use getMinimum(UCalendarDateFields field) instead. + */ + virtual int32_t getMinimum(EDateFields field) const; +#endif // U_FORCE_HIDE_DEPRECATED_API + + /** + * Gets the minimum value for the given time field. e.g., for Gregorian + * DAY_OF_MONTH, 1. + * + * @param field The given time field. + * @return The minimum value for the given time field. + * @stable ICU 2.6. + */ + virtual int32_t getMinimum(UCalendarDateFields field) const; + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * Gets the maximum value for the given time field. e.g. for Gregorian DAY_OF_MONTH, + * 31. + * + * @param field The given time field. + * @return The maximum value for the given time field. + * @deprecated ICU 2.6. Use getMaximum(UCalendarDateFields field) instead. + */ + virtual int32_t getMaximum(EDateFields field) const; +#endif // U_FORCE_HIDE_DEPRECATED_API + + /** + * Gets the maximum value for the given time field. e.g. for Gregorian DAY_OF_MONTH, + * 31. + * + * @param field The given time field. + * @return The maximum value for the given time field. + * @stable ICU 2.6. + */ + virtual int32_t getMaximum(UCalendarDateFields field) const; + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * Gets the highest minimum value for the given field if varies. Otherwise same as + * getMinimum(). For Gregorian, no difference. + * + * @param field The given time field. + * @return The highest minimum value for the given time field. + * @deprecated ICU 2.6. Use getGreatestMinimum(UCalendarDateFields field) instead. + */ + virtual int32_t getGreatestMinimum(EDateFields field) const; +#endif // U_FORCE_HIDE_DEPRECATED_API + + /** + * Gets the highest minimum value for the given field if varies. Otherwise same as + * getMinimum(). For Gregorian, no difference. + * + * @param field The given time field. + * @return The highest minimum value for the given time field. + * @stable ICU 2.6. + */ + virtual int32_t getGreatestMinimum(UCalendarDateFields field) const; + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * Gets the lowest maximum value for the given field if varies. Otherwise same as + * getMaximum(). e.g., for Gregorian DAY_OF_MONTH, 28. + * + * @param field The given time field. + * @return The lowest maximum value for the given time field. + * @deprecated ICU 2.6. Use getLeastMaximum(UCalendarDateFields field) instead. + */ + virtual int32_t getLeastMaximum(EDateFields field) const; +#endif // U_FORCE_HIDE_DEPRECATED_API + + /** + * Gets the lowest maximum value for the given field if varies. Otherwise same as + * getMaximum(). e.g., for Gregorian DAY_OF_MONTH, 28. + * + * @param field The given time field. + * @return The lowest maximum value for the given time field. + * @stable ICU 2.6. + */ + virtual int32_t getLeastMaximum(UCalendarDateFields field) const; + +#ifndef U_HIDE_DEPRECATED_API + /** + * Return the minimum value that this field could have, given the current date. + * For the Gregorian calendar, this is the same as getMinimum() and getGreatestMinimum(). + * + * The version of this function on Calendar uses an iterative algorithm to determine the + * actual minimum value for the field. There is almost always a more efficient way to + * accomplish this (in most cases, you can simply return getMinimum()). GregorianCalendar + * overrides this function with a more efficient implementation. + * + * @param field the field to determine the minimum of + * @param status Fill-in parameter which receives the status of this operation. + * @return the minimum of the given field for the current date of this Calendar + * @deprecated ICU 2.6. Use getActualMinimum(UCalendarDateFields field, UErrorCode& status) instead. + */ + int32_t getActualMinimum(EDateFields field, UErrorCode& status) const; +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Return the minimum value that this field could have, given the current date. + * For the Gregorian calendar, this is the same as getMinimum() and getGreatestMinimum(). + * + * The version of this function on Calendar uses an iterative algorithm to determine the + * actual minimum value for the field. There is almost always a more efficient way to + * accomplish this (in most cases, you can simply return getMinimum()). GregorianCalendar + * overrides this function with a more efficient implementation. + * + * @param field the field to determine the minimum of + * @param status Fill-in parameter which receives the status of this operation. + * @return the minimum of the given field for the current date of this Calendar + * @stable ICU 2.6. + */ + virtual int32_t getActualMinimum(UCalendarDateFields field, UErrorCode& status) const; + + /** + * Return the maximum value that this field could have, given the current date. + * For example, with the date "Feb 3, 1997" and the DAY_OF_MONTH field, the actual + * maximum would be 28; for "Feb 3, 1996" it s 29. Similarly for a Hebrew calendar, + * for some years the actual maximum for MONTH is 12, and for others 13. + * + * The version of this function on Calendar uses an iterative algorithm to determine the + * actual maximum value for the field. There is almost always a more efficient way to + * accomplish this (in most cases, you can simply return getMaximum()). GregorianCalendar + * overrides this function with a more efficient implementation. + * + * @param field the field to determine the maximum of + * @param status Fill-in parameter which receives the status of this operation. + * @return the maximum of the given field for the current date of this Calendar + * @stable ICU 2.6. + */ + virtual int32_t getActualMaximum(UCalendarDateFields field, UErrorCode& status) const; + + /** + * Gets the value for a given time field. Recalculate the current time field values + * if the time value has been changed by a call to setTime(). Return zero for unset + * fields if any fields have been explicitly set by a call to set(). To force a + * recomputation of all fields regardless of the previous state, call complete(). + * This method is semantically const, but may alter the object in memory. + * + * @param field The given time field. + * @param status Fill-in parameter which receives the status of the operation. + * @return The value for the given time field, or zero if the field is unset, + * and set() has been called for any other field. + * @stable ICU 2.6. + */ + int32_t get(UCalendarDateFields field, UErrorCode& status) const; + + /** + * Determines if the given time field has a value set. This can affect in the + * resolving of time in Calendar. Unset fields have a value of zero, by definition. + * + * @param field The given time field. + * @return True if the given time field has a value set; false otherwise. + * @stable ICU 2.6. + */ + UBool isSet(UCalendarDateFields field) const; + + /** + * Sets the given time field with the given value. + * + * @param field The given time field. + * @param value The value to be set for the given time field. + * @stable ICU 2.6. + */ + void set(UCalendarDateFields field, int32_t value); + + /** + * Sets the values for the fields YEAR, MONTH, and DATE. Other field values are + * retained; call clear() first if this is not desired. + * + * @param year The value used to set the YEAR time field. + * @param month The value used to set the MONTH time field. Month value is 0-based. + * e.g., 0 for January. + * @param date The value used to set the DATE time field. + * @stable ICU 2.0 + */ + void set(int32_t year, int32_t month, int32_t date); + + /** + * Sets the values for the fields YEAR, MONTH, DATE, HOUR_OF_DAY, and MINUTE. Other + * field values are retained; call clear() first if this is not desired. + * + * @param year The value used to set the YEAR time field. + * @param month The value used to set the MONTH time field. Month value is + * 0-based. E.g., 0 for January. + * @param date The value used to set the DATE time field. + * @param hour The value used to set the HOUR_OF_DAY time field. + * @param minute The value used to set the MINUTE time field. + * @stable ICU 2.0 + */ + void set(int32_t year, int32_t month, int32_t date, int32_t hour, int32_t minute); + + /** + * Sets the values for the fields YEAR, MONTH, DATE, HOUR_OF_DAY, MINUTE, and SECOND. + * Other field values are retained; call clear() first if this is not desired. + * + * @param year The value used to set the YEAR time field. + * @param month The value used to set the MONTH time field. Month value is + * 0-based. E.g., 0 for January. + * @param date The value used to set the DATE time field. + * @param hour The value used to set the HOUR_OF_DAY time field. + * @param minute The value used to set the MINUTE time field. + * @param second The value used to set the SECOND time field. + * @stable ICU 2.0 + */ + void set(int32_t year, int32_t month, int32_t date, int32_t hour, int32_t minute, int32_t second); + + /** + * Clears the values of all the time fields, making them both unset and assigning + * them a value of zero. The field values will be determined during the next + * resolving of time into time fields. + * @stable ICU 2.0 + */ + void clear(); + + /** + * Clears the value in the given time field, both making it unset and assigning it a + * value of zero. This field value will be determined during the next resolving of + * time into time fields. Clearing UCAL_ORDINAL_MONTH or UCAL_MONTH will + * clear both fields. + * + * @param field The time field to be cleared. + * @stable ICU 2.6. + */ + void clear(UCalendarDateFields field); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual method. This method is to + * implement a simple version of RTTI, since not all C++ compilers support genuine + * RTTI. Polymorphic operator==() and clone() methods call this method. + *

+ * Concrete subclasses of Calendar must implement getDynamicClassID() and also a + * static method and data member: + * + * static UClassID getStaticClassID() { return (UClassID)&fgClassID; } + * static char fgClassID; + * + * @return The class ID for this object. All objects of a given class have the + * same class ID. Objects of other classes have different class IDs. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID() const override = 0; + + /** + * Returns the calendar type name string for this Calendar object. + * The returned string is the legacy ICU calendar attribute value, + * for example, "gregorian" or "japanese". + * + * See type="old type name" for the calendar attribute of locale IDs + * at http://www.unicode.org/reports/tr35/#Key_Type_Definitions + * + * Sample code for getting the LDML/BCP 47 calendar key value: + * \code + * const char *calType = cal->getType(); + * if (0 == strcmp(calType, "unknown")) { + * // deal with unknown calendar type + * } else { + * string localeID("root@calendar="); + * localeID.append(calType); + * char langTag[100]; + * UErrorCode errorCode = U_ZERO_ERROR; + * int32_t length = uloc_toLanguageTag(localeID.c_str(), langTag, (int32_t)sizeof(langTag), true, &errorCode); + * if (U_FAILURE(errorCode)) { + * // deal with errors & overflow + * } + * string lang(langTag, length); + * size_t caPos = lang.find("-ca-"); + * lang.erase(0, caPos + 4); + * // lang now contains the LDML calendar type + * } + * \endcode + * + * @return legacy calendar type name string + * @stable ICU 49 + */ + virtual const char * getType() const = 0; + + /** + * Returns whether the given day of the week is a weekday, a weekend day, + * or a day that transitions from one to the other, for the locale and + * calendar system associated with this Calendar (the locale's region is + * often the most determinant factor). If a transition occurs at midnight, + * then the days before and after the transition will have the + * type UCAL_WEEKDAY or UCAL_WEEKEND. If a transition occurs at a time + * other than midnight, then the day of the transition will have + * the type UCAL_WEEKEND_ONSET or UCAL_WEEKEND_CEASE. In this case, the + * method getWeekendTransition() will return the point of + * transition. + * @param dayOfWeek The day of the week whose type is desired (UCAL_SUNDAY..UCAL_SATURDAY). + * @param status The error code for the operation. + * @return The UCalendarWeekdayType for the day of the week. + * @stable ICU 4.4 + */ + virtual UCalendarWeekdayType getDayOfWeekType(UCalendarDaysOfWeek dayOfWeek, UErrorCode &status) const; + + /** + * Returns the time during the day at which the weekend begins or ends in + * this calendar system. If getDayOfWeekType() returns UCAL_WEEKEND_ONSET + * for the specified dayOfWeek, return the time at which the weekend begins. + * If getDayOfWeekType() returns UCAL_WEEKEND_CEASE for the specified dayOfWeek, + * return the time at which the weekend ends. If getDayOfWeekType() returns + * some other UCalendarWeekdayType for the specified dayOfWeek, is it an error condition + * (U_ILLEGAL_ARGUMENT_ERROR). + * @param dayOfWeek The day of the week for which the weekend transition time is + * desired (UCAL_SUNDAY..UCAL_SATURDAY). + * @param status The error code for the operation. + * @return The milliseconds after midnight at which the weekend begins or ends. + * @stable ICU 4.4 + */ + virtual int32_t getWeekendTransition(UCalendarDaysOfWeek dayOfWeek, UErrorCode &status) const; + + /** + * Returns true if the given UDate is in the weekend in + * this calendar system. + * @param date The UDate in question. + * @param status The error code for the operation. + * @return true if the given UDate is in the weekend in + * this calendar system, false otherwise. + * @stable ICU 4.4 + */ + virtual UBool isWeekend(UDate date, UErrorCode &status) const; + + /** + * Returns true if this Calendar's current date-time is in the weekend in + * this calendar system. + * @return true if this Calendar's current date-time is in the weekend in + * this calendar system, false otherwise. + * @stable ICU 4.4 + */ + virtual UBool isWeekend() const; + + /** + * Returns true if the date is in a leap year. Recalculate the current time + * field values if the time value has been changed by a call to * setTime(). + * This method is semantically const, but may alter the object in memory. + * A "leap year" is a year that contains more days than other years (for + * solar or lunar calendars) or more months than other years (for lunisolar + * calendars like Hebrew or Chinese), as defined in the ECMAScript Temporal + * proposal. + * + * @param status ICU Error Code + * @return True if the date in the fields is in a Temporal proposal + * defined leap year. False otherwise. + * @stable ICU 73 + */ + virtual bool inTemporalLeapYear(UErrorCode& status) const; + + /** + * Gets The Temporal monthCode value corresponding to the month for the date. + * The value is a string identifier that starts with the literal grapheme + * "M" followed by two graphemes representing the zero-padded month number + * of the current month in a normal (non-leap) year and suffixed by an + * optional literal grapheme "L" if this is a leap month in a lunisolar + * calendar. The 25 possible values are "M01" .. "M13" and "M01L" .. "M12L". + * For the Hebrew calendar, the values are "M01" .. "M12" for non-leap year, and + * "M01" .. "M05", "M05L", "M06" .. "M12" for leap year. + * For the Chinese calendar, the values are "M01" .. "M12" for non-leap year and + * in leap year with another monthCode in "M01L" .. "M12L". + * For Coptic and Ethiopian calendar, the Temporal monthCode values for any + * years are "M01" to "M13". + * + * @param status ICU Error Code + * @return One of 25 possible strings in {"M01".."M13", "M01L".."M12L"}. + * @stable ICU 73 + */ + virtual const char* getTemporalMonthCode(UErrorCode& status) const; + + /** + * Sets The Temporal monthCode which is a string identifier that starts + * with the literal grapheme "M" followed by two graphemes representing + * the zero-padded month number of the current month in a normal + * (non-leap) year and suffixed by an optional literal grapheme "L" if this + * is a leap month in a lunisolar calendar. The 25 possible values are + * "M01" .. "M13" and "M01L" .. "M12L". For Hebrew calendar, the values are + * "M01" .. "M12" for non-leap years, and "M01" .. "M05", "M05L", "M06" + * .. "M12" for leap year. + * For the Chinese calendar, the values are "M01" .. "M12" for non-leap year and + * in leap year with another monthCode in "M01L" .. "M12L". + * For Coptic and Ethiopian calendar, the Temporal monthCode values for any + * years are "M01" to "M13". + * + * @param temporalMonth The value to be set for temporal monthCode. + * @param status ICU Error Code + * + * @stable ICU 73 + */ + virtual void setTemporalMonthCode(const char* temporalMonth, UErrorCode& status); + +protected: + + /** + * Constructs a Calendar with the default time zone as returned by + * TimeZone::createInstance(), and the default locale. + * + * @param success Indicates the status of Calendar object construction. Returns + * U_ZERO_ERROR if constructed successfully. + * @stable ICU 2.0 + */ + Calendar(UErrorCode& success); + + /** + * Copy constructor + * + * @param source Calendar object to be copied from + * @stable ICU 2.0 + */ + Calendar(const Calendar& source); + + /** + * Default assignment operator + * + * @param right Calendar object to be copied + * @stable ICU 2.0 + */ + Calendar& operator=(const Calendar& right); + + /** + * Constructs a Calendar with the given time zone and locale. Clients are no longer + * responsible for deleting the given time zone object after it's adopted. + * + * @param zone The given time zone. + * @param aLocale The given locale. + * @param success Indicates the status of Calendar object construction. Returns + * U_ZERO_ERROR if constructed successfully. + * @stable ICU 2.0 + */ + Calendar(TimeZone* zone, const Locale& aLocale, UErrorCode& success); + + /** + * Constructs a Calendar with the given time zone and locale. + * + * @param zone The given time zone. + * @param aLocale The given locale. + * @param success Indicates the status of Calendar object construction. Returns + * U_ZERO_ERROR if constructed successfully. + * @stable ICU 2.0 + */ + Calendar(const TimeZone& zone, const Locale& aLocale, UErrorCode& success); + + /** + * Converts Calendar's time field values to GMT as milliseconds. + * + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @stable ICU 2.0 + */ + virtual void computeTime(UErrorCode& status); + + /** + * Converts GMT as milliseconds to time field values. This allows you to sync up the + * time field values with a new time that is set for the calendar. This method + * does NOT recompute the time first; to recompute the time, then the fields, use + * the method complete(). + * + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @stable ICU 2.0 + */ + virtual void computeFields(UErrorCode& status); + + /** + * Gets this Calendar's current time as a long. + * + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @return the current time as UTC milliseconds from the epoch. + * @stable ICU 2.0 + */ + double getTimeInMillis(UErrorCode& status) const; + + /** + * Sets this Calendar's current time from the given long value. + * @param millis the new time in UTC milliseconds from the epoch. + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @stable ICU 2.0 + */ + void setTimeInMillis( double millis, UErrorCode& status ); + + /** + * Recomputes the current time from currently set fields, and then fills in any + * unset fields in the time field list. + * + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @stable ICU 2.0 + */ + void complete(UErrorCode& status); + +#ifndef U_HIDE_DEPRECATED_API + /** + * Gets the value for a given time field. Subclasses can use this function to get + * field values without forcing recomputation of time. + * + * @param field The given time field. + * @return The value for the given time field. + * @deprecated ICU 2.6. Use internalGet(UCalendarDateFields field) instead. + */ + inline int32_t internalGet(EDateFields field) const {return fFields[field];} +#endif /* U_HIDE_DEPRECATED_API */ + +#ifndef U_HIDE_INTERNAL_API + /** + * Gets the value for a given time field. Subclasses can use this function to get + * field values without forcing recomputation of time. If the field's stamp is UNSET, + * the defaultValue is used. + * + * @param field The given time field. + * @param defaultValue a default value used if the field is unset. + * @return The value for the given time field. + * @internal + */ + inline int32_t internalGet(UCalendarDateFields field, int32_t defaultValue) const {return fStamp[field]>kUnset ? fFields[field] : defaultValue;} + + /** + * Gets the value for a given time field. Subclasses can use this function to get + * field values without forcing recomputation of time. + * + * @param field The given time field. + * @return The value for the given time field. + * @internal + */ + inline int32_t internalGet(UCalendarDateFields field) const {return fFields[field];} + + /** + * The year in this calendar is counting from 1 backward if the era is 0. + * @return The year in era 0 of this calendar is counting backward from 1. + * @internal + */ + virtual bool isEra0CountingBackward() const { return false; } +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Use this function instead of internalGet(UCAL_MONTH). The implementation + * check the timestamp of UCAL_MONTH and UCAL_ORDINAL_MONTH and use the + * one set later. The subclass should override it to conver the value of UCAL_ORDINAL_MONTH + * to UCAL_MONTH correctly if UCAL_ORDINAL_MONTH has higher priority. + * + * @return The value for the UCAL_MONTH. + * @internal + */ + virtual int32_t internalGetMonth(UErrorCode& status) const; + + /** + * Use this function instead of internalGet(UCAL_MONTH, defaultValue). The implementation + * check the timestamp of UCAL_MONTH and UCAL_ORDINAL_MONTH and use the + * one set later. The subclass should override it to conver the value of UCAL_ORDINAL_MONTH + * to UCAL_MONTH correctly if UCAL_ORDINAL_MONTH has higher priority. + * + * @param defaultValue a default value used if the UCAL_MONTH and + * UCAL_ORDINAL are both unset. + * @param status Output param set to failure code on function return + * when this function fails. + * @return The value for the UCAL_MONTH. + * @internal + */ + virtual int32_t internalGetMonth(int32_t defaultValue, UErrorCode& status) const; + +#ifndef U_HIDE_DEPRECATED_API + /** + * Sets the value for a given time field. This is a fast internal method for + * subclasses. It does not affect the areFieldsInSync, isTimeSet, or areAllFieldsSet + * flags. + * + * @param field The given time field. + * @param value The value for the given time field. + * @deprecated ICU 2.6. Use internalSet(UCalendarDateFields field, int32_t value) instead. + */ + void internalSet(EDateFields field, int32_t value); +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Sets the value for a given time field. This is a fast internal method for + * subclasses. It does not affect the areFieldsInSync, isTimeSet, or areAllFieldsSet + * flags. + * + * @param field The given time field. + * @param value The value for the given time field. + * @stable ICU 2.6. + */ + inline void internalSet(UCalendarDateFields field, int32_t value); + + /** + * Prepare this calendar for computing the actual minimum or maximum. + * This method modifies this calendar's fields; it is called on a + * temporary calendar. + * @internal + */ + virtual void prepareGetActual(UCalendarDateFields field, UBool isMinimum, UErrorCode &status); + + /** + * Limit enums. Not in sync with UCalendarLimitType (refers to internal fields). + * @internal + */ + enum ELimitType { +#ifndef U_HIDE_INTERNAL_API + UCAL_LIMIT_MINIMUM = 0, + UCAL_LIMIT_GREATEST_MINIMUM, + UCAL_LIMIT_LEAST_MAXIMUM, + UCAL_LIMIT_MAXIMUM, + UCAL_LIMIT_COUNT +#endif /* U_HIDE_INTERNAL_API */ + }; + + /** + * Subclass API for defining limits of different types. + * Subclasses must implement this method to return limits for the + * following fields: + * + *

UCAL_ERA
+     * UCAL_YEAR
+     * UCAL_MONTH
+     * UCAL_WEEK_OF_YEAR
+     * UCAL_WEEK_OF_MONTH
+     * UCAL_DATE (DAY_OF_MONTH on Java)
+     * UCAL_DAY_OF_YEAR
+     * UCAL_DAY_OF_WEEK_IN_MONTH
+     * UCAL_YEAR_WOY
+     * UCAL_EXTENDED_YEAR
+ * + * @param field one of the above field numbers + * @param limitType one of MINIMUM, GREATEST_MINIMUM, + * LEAST_MAXIMUM, or MAXIMUM + * @internal + */ + virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const = 0; + + /** + * Return a limit for a field. + * @param field the field, from 0..UCAL_MAX_FIELD + * @param limitType the type specifier for the limit + * @see #ELimitType + * @internal + */ + virtual int32_t getLimit(UCalendarDateFields field, ELimitType limitType) const; + + /** + * Return the Julian day number of day before the first day of the + * given month in the given extended year. Subclasses should override + * this method to implement their calendar system. + * @param eyear the extended year + * @param month the zero-based month, or 0 if useMonth is false + * @param useMonth if false, compute the day before the first day of + * the given year, otherwise, compute the day before the first day of + * the given month + * @param status Output param set to failure code on function return + * when this function fails. + * @return the Julian day number of the day before the first + * day of the given month and year + * @internal + */ + virtual int64_t handleComputeMonthStart(int32_t eyear, int32_t month, + UBool useMonth, UErrorCode& status) const = 0; + + /** + * Return the number of days in the given month of the given extended + * year of this calendar system. Subclasses should override this + * method if they can provide a more correct or more efficient + * implementation than the default implementation in Calendar. + * @internal + */ + virtual int32_t handleGetMonthLength(int32_t extendedYear, int32_t month, UErrorCode& status) const ; + + /** + * Return the number of days in the given extended year of this + * calendar system. Subclasses should override this method if they can + * provide a more correct or more efficient implementation than the + * default implementation in Calendar. + * @internal + */ + virtual int32_t handleGetYearLength(int32_t eyear, UErrorCode& status) const; + + /** + * Return the extended year defined by the current fields. This will + * use the UCAL_EXTENDED_YEAR field or the UCAL_YEAR and supra-year fields (such + * as UCAL_ERA) specific to the calendar system, depending on which set of + * fields is newer. + * @param status ICU Error Code + * @return the extended year + * @internal + */ + virtual int32_t handleGetExtendedYear(UErrorCode& status) = 0; + + /** + * Subclasses may override this. This method calls + * handleGetMonthLength() to obtain the calendar-specific month + * length. + * @param bestField which field to use to calculate the date + * @param status ICU Error Code + * @return julian day specified by calendar fields. + * @internal + */ + virtual int32_t handleComputeJulianDay(UCalendarDateFields bestField, UErrorCode &status); + + /** + * Subclasses must override this to convert from week fields + * (YEAR_WOY and WEEK_OF_YEAR) to an extended year in the case + * where YEAR, EXTENDED_YEAR are not set. + * The Calendar implementation assumes yearWoy is in extended gregorian form + * @return the extended year, UCAL_EXTENDED_YEAR + * @internal + */ + virtual int32_t handleGetExtendedYearFromWeekFields(int32_t yearWoy, int32_t woy, UErrorCode& status); + + /** + * Validate a single field of this calendar. Subclasses should + * override this method to validate any calendar-specific fields. + * Generic fields can be handled by `Calendar::validateField()`. + * @internal + */ + virtual void validateField(UCalendarDateFields field, UErrorCode &status); + +#ifndef U_HIDE_INTERNAL_API + /** + * Compute the Julian day from fields. Will determine whether to use + * the JULIAN_DAY field directly, or other fields. + * @param status ICU Error Code + * @return the julian day + * @internal + */ + int32_t computeJulianDay(UErrorCode &status); + + /** + * Compute the milliseconds in the day from the fields. This is a + * value from 0 to 23:59:59.999 inclusive, unless fields are out of + * range, in which case it can be an arbitrary value. This value + * reflects local zone wall time. + * @internal + */ + double computeMillisInDay(); + + /** + * This method can assume EXTENDED_YEAR has been set. + * @param millis milliseconds of the date fields + * @param millisInDay milliseconds of the time fields; may be out + * or range. + * @param ec Output param set to failure code on function return + * when this function fails. + * @internal + */ + int32_t computeZoneOffset(double millis, double millisInDay, UErrorCode &ec); + + + /** + * Determine the best stamp in a range. + * @param start first enum to look at + * @param end last enum to look at + * @param bestSoFar stamp prior to function call + * @return the stamp value of the best stamp + * @internal + */ + int32_t newestStamp(UCalendarDateFields start, UCalendarDateFields end, int32_t bestSoFar) const; + + /** + * Marker for end of resolve set (row or group). Value for field resolution tables. + * + * @see #resolveFields + * @internal + */ + static constexpr int32_t kResolveSTOP = -1; + /** + * Value to be bitwised "ORed" against resolve table field values for remapping. + * Example: (UCAL_DATE | kResolveRemap) in 1st column will cause 'UCAL_DATE' to be returned, + * but will not examine the value of UCAL_DATE. + * Value for field resolution tables. + * + * @see #resolveFields + * @internal + */ + static constexpr int32_t kResolveRemap = 32; + + /** + * Precedence table for Dates + * @see #resolveFields + * @internal + */ + static const UFieldResolutionTable kDatePrecedence[]; + + /** + * Precedence table for Year + * @see #resolveFields + * @internal + */ + static const UFieldResolutionTable kYearPrecedence[]; + + /** + * Precedence table for Day of Week + * @see #resolveFields + * @internal + */ + static const UFieldResolutionTable kDOWPrecedence[]; + + /** + * Precedence table for Months + * @see #resolveFields + * @internal + */ + static const UFieldResolutionTable kMonthPrecedence[]; + + /** + * Given a precedence table, return the newest field combination in + * the table, or UCAL_FIELD_COUNT if none is found. + * + *

The precedence table is a 3-dimensional array of integers. It + * may be thought of as an array of groups. Each group is an array of + * lines. Each line is an array of field numbers. Within a line, if + * all fields are set, then the time stamp of the line is taken to be + * the stamp of the most recently set field. If any field of a line is + * unset, then the line fails to match. Within a group, the line with + * the newest time stamp is selected. The first field of the line is + * returned to indicate which line matched. + * + *

In some cases, it may be desirable to map a line to field that + * whose stamp is NOT examined. For example, if the best field is + * DAY_OF_WEEK then the DAY_OF_WEEK_IN_MONTH algorithm may be used. In + * order to do this, insert the value kResolveRemap | F at + * the start of the line, where F is the desired return + * field value. This field will NOT be examined; it only determines + * the return value if the other fields in the line are the newest. + * + *

If all lines of a group contain at least one unset field, then no + * line will match, and the group as a whole will fail to match. In + * that case, the next group will be processed. If all groups fail to + * match, then UCAL_FIELD_COUNT is returned. + * @internal + */ + UCalendarDateFields resolveFields(const UFieldResolutionTable *precedenceTable) const; +#endif /* U_HIDE_INTERNAL_API */ + + + /** + * @internal + */ + virtual const UFieldResolutionTable* getFieldResolutionTable() const; + +#ifndef U_HIDE_INTERNAL_API + /** + * Return the field that is newer, either defaultField, or + * alternateField. If neither is newer or neither is set, return defaultField. + * @internal + */ + UCalendarDateFields newerField(UCalendarDateFields defaultField, UCalendarDateFields alternateField) const; +#endif /* U_HIDE_INTERNAL_API */ + + +private: + /** + * Helper function for calculating limits by trial and error + * @param field The field being investigated + * @param startValue starting (least max) value of field + * @param endValue ending (greatest max) value of field + * @param status return type + * @internal (private) + */ + int32_t getActualHelper(UCalendarDateFields field, int32_t startValue, int32_t endValue, UErrorCode &status) const; + +protected: + /** + * Get the current time without recomputing. + * + * @return the current time without recomputing. + * @stable ICU 2.0 + */ + UDate internalGetTime() const { return fTime; } + + /** + * Set the current time without affecting flags or fields. + * + * @param time The time to be set + * @return the current time without recomputing. + * @stable ICU 2.0 + */ + void internalSetTime(UDate time) { fTime = time; } + + /** + * The time fields containing values into which the millis is computed. + * @stable ICU 2.0 + */ + int32_t fFields[UCAL_FIELD_COUNT]; + +protected: + /** Special values of stamp[] + * @stable ICU 2.0 + */ + enum { + kUnset = 0, + kInternallySet, + kMinimumUserStamp + }; + +private: + /** + * Pseudo-time-stamps which specify when each field was set. There + * are two special values, UNSET and INTERNALLY_SET. Values from + * MINIMUM_USER_SET to STAMP_MAX are legal user set values. + */ + int8_t fStamp[UCAL_FIELD_COUNT]; + +protected: + /** + * Subclasses may override this method to compute several fields + * specific to each calendar system. These are: + * + *

  • ERA + *
  • YEAR + *
  • MONTH + *
  • DAY_OF_MONTH + *
  • DAY_OF_YEAR + *
  • EXTENDED_YEAR
+ * + * Subclasses can refer to the DAY_OF_WEEK and DOW_LOCAL fields, which + * will be set when this method is called. Subclasses can also call + * the getGregorianXxx() methods to obtain Gregorian calendar + * equivalents for the given Julian day. + * + *

In addition, subclasses should compute any subclass-specific + * fields, that is, fields from BASE_FIELD_COUNT to + * getFieldCount() - 1. + * + *

The default implementation in Calendar implements + * a pure proleptic Gregorian calendar. + * @internal + */ + virtual void handleComputeFields(int32_t julianDay, UErrorCode &status); + +#ifndef U_HIDE_INTERNAL_API + /** + * Return the extended year on the Gregorian calendar as computed by + * computeGregorianFields(). + * @internal + */ + int32_t getGregorianYear() const { + return fGregorianYear; + } + + /** + * Return the month (0-based) on the Gregorian calendar as computed by + * computeGregorianFields(). + * @internal + */ + int32_t getGregorianMonth() const { + return fGregorianMonth; + } + + /** + * Return the day of year (1-based) on the Gregorian calendar as + * computed by computeGregorianFields(). + * @internal + */ + int32_t getGregorianDayOfYear() const { + return fGregorianDayOfYear; + } + + /** + * Return the day of month (1-based) on the Gregorian calendar as + * computed by computeGregorianFields(). + * @internal + */ + int32_t getGregorianDayOfMonth() const { + return fGregorianDayOfMonth; + } +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Called by computeJulianDay. Returns the default month (0-based) for the year, + * taking year and era into account. Defaults to 0 for Gregorian, which doesn't care. + * @param eyear The extended year + * @param status Output param set to failure code on function return + * when this function fails. + * @internal + */ + virtual int32_t getDefaultMonthInYear(int32_t eyear, UErrorCode& status); + + + /** + * Called by computeJulianDay. Returns the default day (1-based) for the month, + * taking currently-set year and era into account. Defaults to 1 for Gregorian. + * @param eyear the extended year + * @param month the month in the year + * @param status Output param set to failure code on function return + * when this function fails. + * @internal + */ + virtual int32_t getDefaultDayInMonth(int32_t eyear, int32_t month, UErrorCode& status); + + //------------------------------------------------------------------------- + // Protected utility methods for use by subclasses. These are very handy + // for implementing add, roll, and computeFields. + //------------------------------------------------------------------------- + + /** + * Adjust the specified field so that it is within + * the allowable range for the date to which this calendar is set. + * For example, in a Gregorian calendar pinning the {@link #UCalendarDateFields DAY_OF_MONTH} + * field for a calendar set to April 31 would cause it to be set + * to April 30. + *

+ * Subclassing: + *
+ * This utility method is intended for use by subclasses that need to implement + * their own overrides of {@link #roll roll} and {@link #add add}. + *

+ * Note: + * pinField is implemented in terms of + * {@link #getActualMinimum getActualMinimum} + * and {@link #getActualMaximum getActualMaximum}. If either of those methods uses + * a slow, iterative algorithm for a particular field, it would be + * unwise to attempt to call pinField for that field. If you + * really do need to do so, you should override this method to do + * something more efficient for that field. + *

+ * @param field The calendar field whose value should be pinned. + * @param status Output param set to failure code on function return + * when this function fails. + * + * @see #getActualMinimum + * @see #getActualMaximum + * @stable ICU 2.0 + */ + virtual void pinField(UCalendarDateFields field, UErrorCode& status); + + /** + * Return the week number of a day, within a period. This may be the week number in + * a year or the week number in a month. Usually this will be a value >= 1, but if + * some initial days of the period are excluded from week 1, because + * {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek} is > 1, then + * the week number will be zero for those + * initial days. This method requires the day number and day of week for some + * known date in the period in order to determine the day of week + * on the desired day. + *

+ * Subclassing: + *
+ * This method is intended for use by subclasses in implementing their + * {@link #computeTime computeTime} and/or {@link #computeFields computeFields} methods. + * It is often useful in {@link #getActualMinimum getActualMinimum} and + * {@link #getActualMaximum getActualMaximum} as well. + *

+ * This variant is handy for computing the week number of some other + * day of a period (often the first or last day of the period) when its day + * of the week is not known but the day number and day of week for some other + * day in the period (e.g. the current date) is known. + *

+ * @param desiredDay The {@link #UCalendarDateFields DAY_OF_YEAR} or + * {@link #UCalendarDateFields DAY_OF_MONTH} whose week number is desired. + * Should be 1 for the first day of the period. + * + * @param dayOfPeriod The {@link #UCalendarDateFields DAY_OF_YEAR} + * or {@link #UCalendarDateFields DAY_OF_MONTH} for a day in the period whose + * {@link #UCalendarDateFields DAY_OF_WEEK} is specified by the + * knownDayOfWeek parameter. + * Should be 1 for first day of period. + * + * @param dayOfWeek The {@link #UCalendarDateFields DAY_OF_WEEK} for the day + * corresponding to the knownDayOfPeriod parameter. + * 1-based with 1=Sunday. + * + * @return The week number (one-based), or zero if the day falls before + * the first week because + * {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek} + * is more than one. + * + * @stable ICU 2.8 + */ + int32_t weekNumber(int32_t desiredDay, int32_t dayOfPeriod, int32_t dayOfWeek); + + +#ifndef U_HIDE_INTERNAL_API + /** + * Return the week number of a day, within a period. This may be the week number in + * a year, or the week number in a month. Usually this will be a value >= 1, but if + * some initial days of the period are excluded from week 1, because + * {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek} is > 1, + * then the week number will be zero for those + * initial days. This method requires the day of week for the given date in order to + * determine the result. + *

+ * Subclassing: + *
+ * This method is intended for use by subclasses in implementing their + * {@link #computeTime computeTime} and/or {@link #computeFields computeFields} methods. + * It is often useful in {@link #getActualMinimum getActualMinimum} and + * {@link #getActualMaximum getActualMaximum} as well. + *

+ * @param dayOfPeriod The {@link #UCalendarDateFields DAY_OF_YEAR} or + * {@link #UCalendarDateFields DAY_OF_MONTH} whose week number is desired. + * Should be 1 for the first day of the period. + * + * @param dayOfWeek The {@link #UCalendarDateFields DAY_OF_WEEK} for the day + * corresponding to the dayOfPeriod parameter. + * 1-based with 1=Sunday. + * + * @return The week number (one-based), or zero if the day falls before + * the first week because + * {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek} + * is more than one. + * @internal + */ + inline int32_t weekNumber(int32_t dayOfPeriod, int32_t dayOfWeek); + + /** + * returns the local DOW, valid range 0..6 + * @internal + */ + int32_t getLocalDOW(UErrorCode& status); +#endif /* U_HIDE_INTERNAL_API */ + +private: + + /** + * The next available value for fStamp[] + */ + int8_t fNextStamp = kMinimumUserStamp; + + /** + * Recalculates the time stamp array (fStamp). + * Resets fNextStamp to lowest next stamp value. + */ + void recalculateStamp(); + + /** + * The current time set for the calendar. + */ + UDate fTime = 0; + + /** + * Time zone affects the time calculation done by Calendar. Calendar subclasses use + * the time zone data to produce the local time. Always set; never nullptr. + */ + TimeZone* fZone = nullptr; + + /** + * The flag which indicates if the current time is set in the calendar. + */ + bool fIsTimeSet:1; + + /** + * True if the fields are in sync with the currently set time of this Calendar. + * If false, then the next attempt to get the value of a field will + * force a recomputation of all fields from the current value of the time + * field. + *

+ * This should really be named areFieldsInSync, but the old name is retained + * for backward compatibility. + */ + bool fAreFieldsSet:1; + + /** + * True if all of the fields have been set. This is initially false, and set to + * true by computeFields(). + */ + bool fAreAllFieldsSet:1; + + /** + * True if all fields have been virtually set, but have not yet been + * computed. This occurs only in setTimeInMillis(). A calendar set + * to this state will compute all fields from the time if it becomes + * necessary, but otherwise will delay such computation. + */ + bool fAreFieldsVirtuallySet:1; + + /** + * @see #setLenient + */ + bool fLenient:1; + + /** + * Option for repeated wall time + * @see #setRepeatedWallTimeOption + */ + UCalendarWallTimeOption fRepeatedWallTime:3; // Somehow MSVC need 3 bits for UCalendarWallTimeOption + + /** + * Option for skipped wall time + * @see #setSkippedWallTimeOption + */ + UCalendarWallTimeOption fSkippedWallTime:3; // Somehow MSVC need 3 bits for UCalendarWallTimeOption + + /** + * Both firstDayOfWeek and minimalDaysInFirstWeek are locale-dependent. They are + * used to figure out the week count for a specific date for a given locale. These + * must be set when a Calendar is constructed. For example, in US locale, + * firstDayOfWeek is SUNDAY; minimalDaysInFirstWeek is 1. They are used to figure + * out the week count for a specific date for a given locale. These must be set when + * a Calendar is constructed. + */ + UCalendarDaysOfWeek fFirstDayOfWeek:4; // Somehow MSVC need 4 bits for + // UCalendarDaysOfWeek + UCalendarDaysOfWeek fWeekendOnset:4; // Somehow MSVC need 4 bits for + // UCalendarDaysOfWeek + UCalendarDaysOfWeek fWeekendCease:4; // Somehow MSVC need 4 bits for + // UCalendarDaysOfWeek + uint8_t fMinimalDaysInFirstWeek; + int32_t fWeekendOnsetMillis; + int32_t fWeekendCeaseMillis; + + /** + * Sets firstDayOfWeek and minimalDaysInFirstWeek. Called at Calendar construction + * time. + * + * @param desiredLocale The given locale. + * @param type The calendar type identifier, e.g: gregorian, buddhist, etc. + * @param success Indicates the status of setting the week count data from + * the resource for the given locale. Returns U_ZERO_ERROR if + * constructed successfully. + */ + void setWeekData(const Locale& desiredLocale, const char *type, UErrorCode& success); + + /** + * Recompute the time and update the status fields isTimeSet + * and areFieldsSet. Callers should check isTimeSet and only + * call this method if isTimeSet is false. + * + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + */ + void updateTime(UErrorCode& status); + + /** + * The Gregorian year, as computed by computeGregorianFields() and + * returned by getGregorianYear(). + * @see #computeGregorianFields + */ + int32_t fGregorianYear; + + /** + * The Gregorian month, as computed by computeGregorianFields() and + * returned by getGregorianMonth(). + * @see #computeGregorianFields + */ + int8_t fGregorianMonth; + + /** + * The Gregorian day of the month, as computed by + * computeGregorianFields() and returned by getGregorianDayOfMonth(). + * @see #computeGregorianFields + */ + int8_t fGregorianDayOfMonth; + + /** + * The Gregorian day of the year, as computed by + * computeGregorianFields() and returned by getGregorianDayOfYear(). + * @see #computeGregorianFields + */ + int16_t fGregorianDayOfYear; + + /* calculations */ + +protected: + + /** + * Compute the Gregorian calendar year, month, and day of month from the + * Julian day. These values are not stored in fields, but in member + * variables gregorianXxx. They are used for time zone computations and by + * subclasses that are Gregorian derivatives. Subclasses may call this + * method to perform a Gregorian calendar millis->fields computation. + */ + void computeGregorianFields(int32_t julianDay, UErrorCode &ec); + +private: + + /** + * Compute the fields WEEK_OF_YEAR, YEAR_WOY, WEEK_OF_MONTH, + * DAY_OF_WEEK_IN_MONTH, and DOW_LOCAL from EXTENDED_YEAR, YEAR, + * DAY_OF_WEEK, and DAY_OF_YEAR. The latter fields are computed by the + * subclass based on the calendar system. + * + *

The YEAR_WOY field is computed simplistically. It is equal to YEAR + * most of the time, but at the year boundary it may be adjusted to YEAR-1 + * or YEAR+1 to reflect the overlap of a week into an adjacent year. In + * this case, a simple increment or decrement is performed on YEAR, even + * though this may yield an invalid YEAR value. For instance, if the YEAR + * is part of a calendar system with an N-year cycle field CYCLE, then + * incrementing the YEAR may involve incrementing CYCLE and setting YEAR + * back to 0 or 1. This is not handled by this code, and in fact cannot be + * simply handled without having subclasses define an entire parallel set of + * fields for fields larger than or equal to a year. This additional + * complexity is not warranted, since the intention of the YEAR_WOY field is + * to support ISO 8601 notation, so it will typically be used with a + * proleptic Gregorian calendar, which has no field larger than a year. + */ + void computeWeekFields(UErrorCode &ec); + + + /** + * Ensure that each field is within its valid range by calling {@link + * #validateField(int, int&)} on each field that has been set. This method + * should only be called if this calendar is not lenient. + * @see #isLenient + * @see #validateField(int, int&) + */ + void validateFields(UErrorCode &status); + + /** + * Validate a single field of this calendar given its minimum and + * maximum allowed value. If the field is out of range, + * U_ILLEGAL_ARGUMENT_ERROR will be set. Subclasses may + * use this method in their implementation of {@link + * #validateField(int, int&)}. + */ + void validateField(UCalendarDateFields field, int32_t min, int32_t max, UErrorCode& status); + + protected: +#ifndef U_HIDE_INTERNAL_API + /** + * Convert a quasi Julian date to the day of the week. The Julian date used here is + * not a true Julian date, since it is measured from midnight, not noon. Return + * value is one-based. + * + * @param julian The given Julian date number. + * @return Day number from 1..7 (SUN..SAT). + * @internal + */ + static uint8_t julianDayToDayOfWeek(int32_t julian); +#endif /* U_HIDE_INTERNAL_API */ + + private: + CharString* validLocale = nullptr; + CharString* actualLocale = nullptr; + + public: +#if !UCONFIG_NO_SERVICE + /** + * INTERNAL FOR 2.6 -- Registration. + */ + +#ifndef U_HIDE_INTERNAL_API + /** + * Return a StringEnumeration over the locales available at the time of the call, + * including registered locales. + * @return a StringEnumeration over the locales available at the time of the call + * @internal + */ + static StringEnumeration* getAvailableLocales(); + + /** + * Register a new Calendar factory. The factory will be adopted. + * INTERNAL in 2.6 + * + * Because ICU may choose to cache Calendars internally, this must + * be called at application startup, prior to any calls to + * Calendar::createInstance to avoid undefined behavior. + * + * @param toAdopt the factory instance to be adopted + * @param status the in/out status code, no special meanings are assigned + * @return a registry key that can be used to unregister this factory + * @internal + */ + static URegistryKey registerFactory(ICUServiceFactory* toAdopt, UErrorCode& status); + + /** + * Unregister a previously-registered CalendarFactory using the key returned from the + * register call. Key becomes invalid after a successful call and should not be used again. + * The CalendarFactory corresponding to the key will be deleted. + * INTERNAL in 2.6 + * + * Because ICU may choose to cache Calendars internally, this should + * be called during application shutdown, after all calls to + * Calendar::createInstance to avoid undefined behavior. + * + * @param key the registry key returned by a previous call to registerFactory + * @param status the in/out status code, no special meanings are assigned + * @return true if the factory for the key was successfully unregistered + * @internal + */ + static UBool unregister(URegistryKey key, UErrorCode& status); +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Multiple Calendar Implementation + * @internal + */ + friend class CalendarFactory; + + /** + * Multiple Calendar Implementation + * @internal + */ + friend class CalendarService; + + /** + * Multiple Calendar Implementation + * @internal + */ + friend class DefaultCalendarFactory; +#endif /* !UCONFIG_NO_SERVICE */ + + /** + * @return true if this calendar has a default century (i.e. 03 -> 2003) + * @internal + */ + virtual UBool haveDefaultCentury() const = 0; + + /** + * @return the start of the default century, as a UDate + * @internal + */ + virtual UDate defaultCenturyStart() const = 0; + /** + * @return the beginning year of the default century, as a year + * @internal + */ + virtual int32_t defaultCenturyStartYear() const = 0; + + /** Get the locale for this calendar object. You can choose between valid and actual locale. + * @param type type of the locale we're looking for (valid or actual) + * @param status error code for the operation + * @return the locale + * @stable ICU 2.8 + */ + Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const; + + /** + * @return The related Gregorian year; will be obtained by modifying the value + * obtained by get from UCAL_EXTENDED_YEAR field + * @internal + */ + virtual int32_t getRelatedYear(UErrorCode &status) const; + + /** + * @param year The related Gregorian year to set; will be modified as necessary then + * set in UCAL_EXTENDED_YEAR field + * @internal + */ + virtual void setRelatedYear(int32_t year); + +#ifndef U_HIDE_INTERNAL_API + /** Get the locale for this calendar object. You can choose between valid and actual locale. + * @param type type of the locale we're looking for (valid or actual) + * @param status error code for the operation + * @return the locale + * @internal + */ + const char* getLocaleID(ULocDataLocaleType type, UErrorCode &status) const; +#endif /* U_HIDE_INTERNAL_API */ + +private: + /** + * Cast TimeZone used by this object to BasicTimeZone, or nullptr if the TimeZone + * is not an instance of BasicTimeZone. + */ + BasicTimeZone* getBasicTimeZone() const; + + /** + * Find the previous zone transition near the given time. + * @param base The base time, inclusive + * @param transitionTime Receives the result time + * @param status The error status + * @return true if a transition is found. + */ + UBool getImmediatePreviousZoneTransition(UDate base, UDate *transitionTime, UErrorCode& status) const; + +public: +#ifndef U_HIDE_INTERNAL_API + /** + * Creates a new Calendar from a Locale for the cache. + * This method does not set the time or timezone in returned calendar. + * @param locale the locale. + * @param status any error returned here. + * @return the new Calendar object with no time or timezone set. + * @internal For ICU use only. + */ + static Calendar * U_EXPORT2 makeInstance( + const Locale &locale, UErrorCode &status); + + /** + * Get the calendar type for given locale. + * @param locale the locale + * @param typeBuffer calendar type returned here + * @param typeBufferSize The size of typeBuffer in bytes. If the type + * can't fit in the buffer, this method sets status to + * U_BUFFER_OVERFLOW_ERROR + * @param status error, if any, returned here. + * @internal For ICU use only. + */ + static void U_EXPORT2 getCalendarTypeFromLocale( + const Locale &locale, + char *typeBuffer, + int32_t typeBufferSize, + UErrorCode &status); +#endif /* U_HIDE_INTERNAL_API */ +}; + +// ------------------------------------- + +inline Calendar* +Calendar::createInstance(TimeZone* zone, UErrorCode& errorCode) +{ + // since the Locale isn't specified, use the default locale + return createInstance(zone, Locale::getDefault(), errorCode); +} + +// ------------------------------------- + +inline void +Calendar::roll(UCalendarDateFields field, UBool up, UErrorCode& status) +{ + roll(field, static_cast(up ? +1 : -1), status); +} + +#ifndef U_HIDE_DEPRECATED_API +inline void +Calendar::roll(EDateFields field, UBool up, UErrorCode& status) +{ + roll(static_cast(field), up, status); +} +#endif /* U_HIDE_DEPRECATED_API */ + + +// ------------------------------------- + +/** + * Fast method for subclasses. The caller must maintain fUserSetDSTOffset and + * fUserSetZoneOffset, as well as the isSet[] array. + */ + +inline void +Calendar::internalSet(UCalendarDateFields field, int32_t value) +{ + fFields[field] = value; + fStamp[field] = kInternallySet; +} + +/** + * Macro for the class to declare it override + * haveDefaultCentury, defaultCenturyStart, and + * defaultCenturyStartYear functions in this class. + * @internal + */ +#define DECLARE_OVERRIDE_SYSTEM_DEFAULT_CENTURY \ + virtual UBool haveDefaultCentury() const override; \ + virtual UDate defaultCenturyStart() const override; \ + virtual int32_t defaultCenturyStartYear() const override; + +#ifndef U_HIDE_INTERNAL_API +inline int32_t Calendar::weekNumber(int32_t dayOfPeriod, int32_t dayOfWeek) +{ + return weekNumber(dayOfPeriod, dayOfPeriod, dayOfWeek); +} +#endif /* U_HIDE_INTERNAL_API */ + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // _CALENDAR diff --git a/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/caniter.h b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/caniter.h new file mode 100644 index 0000000000..b904ef2ff8 --- /dev/null +++ b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/caniter.h @@ -0,0 +1,215 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * Copyright (C) 1996-2014, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************************* + */ + +#ifndef CANITER_H +#define CANITER_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/uobject.h" +#include "unicode/unistr.h" + +/** + * \file + * \brief C++ API: Canonical Iterator + */ + +/** Should permutation skip characters with combining class zero + * Should be either true or false. This is a compile time option + * @stable ICU 2.4 + */ +#ifndef CANITER_SKIP_ZEROES +#define CANITER_SKIP_ZEROES true +#endif + +U_NAMESPACE_BEGIN + +class Hashtable; +class Normalizer2; +class Normalizer2Impl; + +/** + * This class allows one to iterate through all the strings that are canonically equivalent to a given + * string. For example, here are some sample results: +Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} +1: \\u0041\\u030A\\u0064\\u0307\\u0327 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} +2: \\u0041\\u030A\\u0064\\u0327\\u0307 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} +3: \\u0041\\u030A\\u1E0B\\u0327 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} +4: \\u0041\\u030A\\u1E11\\u0307 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} +5: \\u00C5\\u0064\\u0307\\u0327 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} +6: \\u00C5\\u0064\\u0327\\u0307 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} +7: \\u00C5\\u1E0B\\u0327 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} +8: \\u00C5\\u1E11\\u0307 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} +9: \\u212B\\u0064\\u0307\\u0327 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} +10: \\u212B\\u0064\\u0327\\u0307 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} +11: \\u212B\\u1E0B\\u0327 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} +12: \\u212B\\u1E11\\u0307 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} + *
Note: the code is intended for use with small strings, and is not suitable for larger ones, + * since it has not been optimized for that situation. + * Note, CanonicalIterator is not intended to be subclassed. + * @author M. Davis + * @author C++ port by V. Weinstein + * @stable ICU 2.4 + */ +class U_COMMON_API CanonicalIterator final : public UObject { +public: + /** + * Construct a CanonicalIterator object + * @param source string to get results for + * @param status Fill-in parameter which receives the status of this operation. + * @stable ICU 2.4 + */ + CanonicalIterator(const UnicodeString &source, UErrorCode &status); + + /** Destructor + * Cleans pieces + * @stable ICU 2.4 + */ + virtual ~CanonicalIterator(); + + /** + * Gets the NFD form of the current source we are iterating over. + * @return gets the source: NOTE: it is the NFD form of source + * @stable ICU 2.4 + */ + UnicodeString getSource(); + + /** + * Resets the iterator so that one can start again from the beginning. + * @stable ICU 2.4 + */ + void reset(); + + /** + * Get the next canonically equivalent string. + *
Warning: The strings are not guaranteed to be in any particular order. + * @return the next string that is canonically equivalent. A bogus string is returned when + * the iteration is done. + * @stable ICU 2.4 + */ + UnicodeString next(); + + /** + * Set a new source for this iterator. Allows object reuse. + * @param newSource the source string to iterate against. This allows the same iterator to be used + * while changing the source string, saving object creation. + * @param status Fill-in parameter which receives the status of this operation. + * @stable ICU 2.4 + */ + void setSource(const UnicodeString &newSource, UErrorCode &status); + +#ifndef U_HIDE_INTERNAL_API + /** + * Dumb recursive implementation of permutation. + * TODO: optimize + * @param source the string to find permutations for + * @param skipZeros determine if skip zeros + * @param result the results in a set. + * @param status Fill-in parameter which receives the status of this operation. + * @param depth depth of the call. + * @internal + */ + static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth=0); +#endif /* U_HIDE_INTERNAL_API */ + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const override; + +private: + // ===================== PRIVATES ============================== + // private default constructor + CanonicalIterator() = delete; + + + /** + * Copy constructor. Private for now. + * @internal (private) + */ + CanonicalIterator(const CanonicalIterator& other) = delete; + + /** + * Assignment operator. Private for now. + * @internal (private) + */ + CanonicalIterator& operator=(const CanonicalIterator& other) = delete; + + // fields + UnicodeString source; + UBool done; + + // 2 dimensional array holds the pieces of the string with + // their different canonically equivalent representations + UnicodeString **pieces; + int32_t pieces_length; + int32_t *pieces_lengths; + + // current is used in iterating to combine pieces + int32_t *current; + int32_t current_length; + + // transient fields + UnicodeString buffer; + + const Normalizer2 *nfd; + const Normalizer2Impl *nfcImpl; + + // we have a segment, in NFD. Find all the strings that are canonically equivalent to it. + UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment) + + //Set getEquivalents2(String segment); + Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status); + //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status); + + /** + * See if the decomposition of cp2 is at segment starting at segmentPos + * (with canonical rearrangement!) + * If so, take the remainder, and return the equivalents + */ + //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer); + Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); + //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); + + void cleanPieces(); + +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/casemap.h b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/casemap.h new file mode 100644 index 0000000000..eca7cbf80a --- /dev/null +++ b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/casemap.h @@ -0,0 +1,497 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// casemap.h +// created: 2017jan12 Markus W. Scherer + +#ifndef __CASEMAP_H__ +#define __CASEMAP_H__ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/stringpiece.h" +#include "unicode/uobject.h" + +/** + * \file + * \brief C++ API: Low-level C++ case mapping functions. + */ + +U_NAMESPACE_BEGIN + +class BreakIterator; +class ByteSink; +class Edits; + +/** + * Low-level C++ case mapping functions. + * + * @stable ICU 59 + */ +class U_COMMON_API CaseMap final : public UMemory { +public: + /** + * Lowercases a UTF-16 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param locale The locale ID. ("" = root locale, nullptr = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then + * dest may be nullptr and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see u_strToLower + * @stable ICU 59 + */ + static int32_t toLower( + const char *locale, uint32_t options, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + + /** + * Uppercases a UTF-16 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param locale The locale ID. ("" = root locale, nullptr = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then + * dest may be nullptr and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see u_strToUpper + * @stable ICU 59 + */ + static int32_t toUpper( + const char *locale, uint32_t options, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + + /** + * Titlecases a UTF-16 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. (This can be modified with options bits.) + * + * @param locale The locale ID. ("" = root locale, nullptr = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, + * U_TITLECASE_NO_LOWERCASE, + * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, + * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. + * @param iter A break iterator to find the first characters of words that are to be titlecased. + * It is set to the source string (setText()) + * and used one or more times for iteration (first() and next()). + * If nullptr, then a word break iterator for the locale is used + * (or something equivalent). + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then + * dest may be nullptr and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see u_strToTitle + * @see ucasemap_toTitle + * @stable ICU 59 + */ + static int32_t toTitle( + const char *locale, uint32_t options, BreakIterator *iter, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + +#endif // UCONFIG_NO_BREAK_ITERATION + + /** + * Case-folds a UTF-16 string and optionally records edits. + * + * Case folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, + * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then + * dest may be nullptr and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see u_strFoldCase + * @stable ICU 59 + */ + static int32_t fold( + uint32_t options, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + + /** + * Lowercases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * + * @param locale The locale ID. ("" = root locale, nullptr = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src The original string. + * @param sink A ByteSink to which the result string is written. + * sink.Flush() is called at the end. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * + * @see ucasemap_utf8ToLower + * @stable ICU 60 + */ + static void utf8ToLower( + const char *locale, uint32_t options, + StringPiece src, ByteSink &sink, Edits *edits, + UErrorCode &errorCode); + + /** + * Uppercases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * + * @param locale The locale ID. ("" = root locale, nullptr = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src The original string. + * @param sink A ByteSink to which the result string is written. + * sink.Flush() is called at the end. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * + * @see ucasemap_utf8ToUpper + * @stable ICU 60 + */ + static void utf8ToUpper( + const char *locale, uint32_t options, + StringPiece src, ByteSink &sink, Edits *edits, + UErrorCode &errorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + + /** + * Titlecases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. (This can be modified with options bits.) + * + * @param locale The locale ID. ("" = root locale, nullptr = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, + * U_TITLECASE_NO_LOWERCASE, + * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, + * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. + * @param iter A break iterator to find the first characters of words that are to be titlecased. + * It is set to the source string (setUText()) + * and used one or more times for iteration (first() and next()). + * If nullptr, then a word break iterator for the locale is used + * (or something equivalent). + * @param src The original string. + * @param sink A ByteSink to which the result string is written. + * sink.Flush() is called at the end. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * + * @see ucasemap_utf8ToTitle + * @stable ICU 60 + */ + static void utf8ToTitle( + const char *locale, uint32_t options, BreakIterator *iter, + StringPiece src, ByteSink &sink, Edits *edits, + UErrorCode &errorCode); + +#endif // UCONFIG_NO_BREAK_ITERATION + + /** + * Case-folds a UTF-8 string and optionally records edits. + * + * Case folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src The original string. + * @param sink A ByteSink to which the result string is written. + * sink.Flush() is called at the end. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * + * @see ucasemap_utf8FoldCase + * @stable ICU 60 + */ + static void utf8Fold( + uint32_t options, + StringPiece src, ByteSink &sink, Edits *edits, + UErrorCode &errorCode); + + /** + * Lowercases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param locale The locale ID. ("" = root locale, nullptr = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be nullptr and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucasemap_utf8ToLower + * @stable ICU 59 + */ + static int32_t utf8ToLower( + const char *locale, uint32_t options, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + + /** + * Uppercases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param locale The locale ID. ("" = root locale, nullptr = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be nullptr and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucasemap_utf8ToUpper + * @stable ICU 59 + */ + static int32_t utf8ToUpper( + const char *locale, uint32_t options, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + + /** + * Titlecases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. (This can be modified with options bits.) + * + * @param locale The locale ID. ("" = root locale, nullptr = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, + * U_TITLECASE_NO_LOWERCASE, + * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, + * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. + * @param iter A break iterator to find the first characters of words that are to be titlecased. + * It is set to the source string (setUText()) + * and used one or more times for iteration (first() and next()). + * If nullptr, then a word break iterator for the locale is used + * (or something equivalent). + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be nullptr and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucasemap_utf8ToTitle + * @stable ICU 59 + */ + static int32_t utf8ToTitle( + const char *locale, uint32_t options, BreakIterator *iter, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + +#endif // UCONFIG_NO_BREAK_ITERATION + + /** + * Case-folds a UTF-8 string and optionally records edits. + * + * Case folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, + * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be nullptr and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucasemap_utf8FoldCase + * @stable ICU 59 + */ + static int32_t utf8Fold( + uint32_t options, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + +private: + CaseMap() = delete; + CaseMap(const CaseMap &other) = delete; + CaseMap &operator=(const CaseMap &other) = delete; +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __CASEMAP_H__ diff --git a/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/char16ptr.h b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/char16ptr.h new file mode 100644 index 0000000000..049de9efee --- /dev/null +++ b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/char16ptr.h @@ -0,0 +1,453 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// char16ptr.h +// created: 2017feb28 Markus W. Scherer + +#ifndef __CHAR16PTR_H__ +#define __CHAR16PTR_H__ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API + +#include +#include +#include + +#endif + +/** + * \file + * \brief C++ API: char16_t pointer wrappers with + * implicit conversion from bit-compatible raw pointer types. + * Also conversion functions from char16_t * to UChar * and OldUChar *. + */ + +/** + * \def U_ALIASING_BARRIER + * Barrier for pointer anti-aliasing optimizations even across function boundaries. + * @internal + */ +#ifdef U_ALIASING_BARRIER + // Use the predefined value. +#elif (defined(__clang__) || defined(__GNUC__)) && U_PLATFORM != U_PF_BROWSER_NATIVE_CLIENT +# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory") +#elif defined(U_IN_DOXYGEN) +# define U_ALIASING_BARRIER(ptr) +#endif + +// ICU DLL-exported +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. + * @stable ICU 59 + */ +class U_COMMON_API Char16Ptr final { +public: + /** + * Copies the pointer. + * @param p pointer + * @stable ICU 59 + */ + inline Char16Ptr(char16_t *p); +#if !U_CHAR16_IS_TYPEDEF + /** + * Converts the pointer to char16_t *. + * @param p pointer to be converted + * @stable ICU 59 + */ + inline Char16Ptr(uint16_t *p); +#endif +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Converts the pointer to char16_t *. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * @param p pointer to be converted + * @stable ICU 59 + */ + inline Char16Ptr(wchar_t *p); +#endif + /** + * nullptr constructor. + * @param p nullptr + * @stable ICU 59 + */ + inline Char16Ptr(std::nullptr_t p); + /** + * Destructor. + * @stable ICU 59 + */ + inline ~Char16Ptr(); + + /** + * Pointer access. + * @return the wrapped pointer + * @stable ICU 59 + */ + inline char16_t *get() const; + /** + * char16_t pointer access via type conversion (e.g., static_cast). + * @return the wrapped pointer + * @stable ICU 59 + */ + inline operator char16_t *() const { return get(); } + +private: + Char16Ptr() = delete; + +#ifdef U_ALIASING_BARRIER + template static char16_t *cast(T *t) { + U_ALIASING_BARRIER(t); + return reinterpret_cast(t); + } + + char16_t *p_; +#else + union { + char16_t *cp; + uint16_t *up; + wchar_t *wp; + } u_; +#endif +}; + +/// \cond +#ifdef U_ALIASING_BARRIER + +Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {} +#if !U_CHAR16_IS_TYPEDEF +Char16Ptr::Char16Ptr(uint16_t *p) : p_(cast(p)) {} +#endif +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::Char16Ptr(wchar_t *p) : p_(cast(p)) {} +#endif +Char16Ptr::Char16Ptr(std::nullptr_t p) : p_(p) {} +Char16Ptr::~Char16Ptr() { + U_ALIASING_BARRIER(p_); +} + +char16_t *Char16Ptr::get() const { return p_; } + +#else + +Char16Ptr::Char16Ptr(char16_t *p) { u_.cp = p; } +#if !U_CHAR16_IS_TYPEDEF +Char16Ptr::Char16Ptr(uint16_t *p) { u_.up = p; } +#endif +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::Char16Ptr(wchar_t *p) { u_.wp = p; } +#endif +Char16Ptr::Char16Ptr(std::nullptr_t p) { u_.cp = p; } +Char16Ptr::~Char16Ptr() {} + +char16_t *Char16Ptr::get() const { return u_.cp; } + +#endif +/// \endcond + +/** + * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. + * @stable ICU 59 + */ +class U_COMMON_API ConstChar16Ptr final { +public: + /** + * Copies the pointer. + * @param p pointer + * @stable ICU 59 + */ + inline ConstChar16Ptr(const char16_t *p); +#if !U_CHAR16_IS_TYPEDEF + /** + * Converts the pointer to char16_t *. + * @param p pointer to be converted + * @stable ICU 59 + */ + inline ConstChar16Ptr(const uint16_t *p); +#endif +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Converts the pointer to char16_t *. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * @param p pointer to be converted + * @stable ICU 59 + */ + inline ConstChar16Ptr(const wchar_t *p); +#endif + /** + * nullptr constructor. + * @param p nullptr + * @stable ICU 59 + */ + inline ConstChar16Ptr(const std::nullptr_t p); + + /** + * Destructor. + * @stable ICU 59 + */ + inline ~ConstChar16Ptr(); + + /** + * Pointer access. + * @return the wrapped pointer + * @stable ICU 59 + */ + inline const char16_t *get() const; + /** + * char16_t pointer access via type conversion (e.g., static_cast). + * @return the wrapped pointer + * @stable ICU 59 + */ + inline operator const char16_t *() const { return get(); } + +private: + ConstChar16Ptr() = delete; + +#ifdef U_ALIASING_BARRIER + template static const char16_t *cast(const T *t) { + U_ALIASING_BARRIER(t); + return reinterpret_cast(t); + } + + const char16_t *p_; +#else + union { + const char16_t *cp; + const uint16_t *up; + const wchar_t *wp; + } u_; +#endif +}; + +/// \cond +#ifdef U_ALIASING_BARRIER + +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {} +#if !U_CHAR16_IS_TYPEDEF +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p_(cast(p)) {} +#endif +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p_(cast(p)) {} +#endif +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p_(p) {} +ConstChar16Ptr::~ConstChar16Ptr() { + U_ALIASING_BARRIER(p_); +} + +const char16_t *ConstChar16Ptr::get() const { return p_; } + +#else + +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u_.cp = p; } +#if !U_CHAR16_IS_TYPEDEF +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u_.up = p; } +#endif +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u_.wp = p; } +#endif +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u_.cp = p; } +ConstChar16Ptr::~ConstChar16Ptr() {} + +const char16_t *ConstChar16Ptr::get() const { return u_.cp; } + +#endif +/// \endcond + +U_NAMESPACE_END + +#endif // U_SHOW_CPLUSPLUS_API + +// Usable in header-only definitions +#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API + +namespace U_ICU_NAMESPACE_OR_INTERNAL { + +#ifndef U_FORCE_HIDE_INTERNAL_API +/** @internal */ +template>> +inline const char16_t *uprv_char16PtrFromUChar(const T *p) { + if constexpr (std::is_same_v) { + return p; + } else { +#if U_SHOW_CPLUSPLUS_API + return ConstChar16Ptr(p).get(); +#else +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +#endif + } +} +#if !U_CHAR16_IS_TYPEDEF && (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 180000) +/** @internal */ +inline const char16_t *uprv_char16PtrFromUint16(const uint16_t *p) { +#if U_SHOW_CPLUSPLUS_API + return ConstChar16Ptr(p).get(); +#else +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +#endif +} +#endif +#if U_SIZEOF_WCHAR_T==2 +/** @internal */ +inline const char16_t *uprv_char16PtrFromWchar(const wchar_t *p) { +#if U_SHOW_CPLUSPLUS_API + return ConstChar16Ptr(p).get(); +#else +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +#endif +} +#endif +#endif + +/** + * Converts from const char16_t * to const UChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as const UChar * + * @stable ICU 59 + */ +inline const UChar *toUCharPtr(const char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +/** + * Converts from char16_t * to UChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as UChar * + * @stable ICU 59 + */ +inline UChar *toUCharPtr(char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +/** + * Converts from const char16_t * to const OldUChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as const OldUChar * + * @stable ICU 59 + */ +inline const OldUChar *toOldUCharPtr(const char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +/** + * Converts from char16_t * to OldUChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as OldUChar * + * @stable ICU 59 + */ +inline OldUChar *toOldUCharPtr(char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +} // U_ICU_NAMESPACE_OR_INTERNAL + +#endif // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API + +// ICU DLL-exported +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +#ifndef U_FORCE_HIDE_INTERNAL_API +/** + * Is T convertible to a std::u16string_view or some other 16-bit string view? + * @internal + */ +template +constexpr bool ConvertibleToU16StringView = + std::is_convertible_v +#if !U_CHAR16_IS_TYPEDEF && (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 180000) + || std::is_convertible_v> +#endif +#if U_SIZEOF_WCHAR_T==2 + || std::is_convertible_v +#endif + ; + +namespace internal { +/** + * Pass-through overload. + * @internal + */ +inline std::u16string_view toU16StringView(std::u16string_view sv) { return sv; } + +#if !U_CHAR16_IS_TYPEDEF && (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 180000) +/** + * Basically undefined behavior but sometimes necessary conversion + * from std::basic_string_view to std::u16string_view. + * @internal + */ +inline std::u16string_view toU16StringView(std::basic_string_view sv) { + return { ConstChar16Ptr(sv.data()), sv.length() }; +} +#endif + +#if U_SIZEOF_WCHAR_T==2 +/** + * Basically undefined behavior but sometimes necessary conversion + * from std::wstring_view to std::u16string_view. + * @internal + */ +inline std::u16string_view toU16StringView(std::wstring_view sv) { + return { ConstChar16Ptr(sv.data()), sv.length() }; +} +#endif + +/** + * Pass-through overload. + * @internal + */ +template >>> +inline std::u16string_view toU16StringViewNullable(const T& text) { + return toU16StringView(text); +} + +/** + * In case of nullptr, return an empty view. + * @internal + */ +template >>, + typename = void> +inline std::u16string_view toU16StringViewNullable(const T& text) { + if (text == nullptr) return {}; // For backward compatibility. + return toU16StringView(text); +} + +} // internal +#endif // U_FORCE_HIDE_INTERNAL_API + +U_NAMESPACE_END + +#endif // U_SHOW_CPLUSPLUS_API + +#endif // __CHAR16PTR_H__ diff --git a/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/chariter.h b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/chariter.h new file mode 100644 index 0000000000..411825677f --- /dev/null +++ b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/chariter.h @@ -0,0 +1,734 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************** +* +* Copyright (C) 1997-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************** +*/ + +#ifndef CHARITER_H +#define CHARITER_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" +#include "unicode/unistr.h" +/** + * \file + * \brief C++ API: Character Iterator + */ + +U_NAMESPACE_BEGIN +/** + * Abstract class that defines an API for forward-only iteration + * on text objects. + * This is a minimal interface for iteration without random access + * or backwards iteration. It is especially useful for wrapping + * streams with converters into an object for collation or + * normalization. + * + *

Characters can be accessed in two ways: as code units or as + * code points. + * Unicode code points are 21-bit integers and are the scalar values + * of Unicode characters. ICU uses the type UChar32 for them. + * Unicode code units are the storage units of a given + * Unicode/UCS Transformation Format (a character encoding scheme). + * With UTF-16, all code points can be represented with either one + * or two code units ("surrogates"). + * String storage is typically based on code units, while properties + * of characters are typically determined using code point values. + * Some processes may be designed to work with sequences of code units, + * or it may be known that all characters that are important to an + * algorithm can be represented with single code units. + * Other processes will need to use the code point access functions.

+ * + *

ForwardCharacterIterator provides nextPostInc() to access + * a code unit and advance an internal position into the text object, + * similar to a return text[position++].
+ * It provides next32PostInc() to access a code point and advance an internal + * position.

+ * + *

next32PostInc() assumes that the current position is that of + * the beginning of a code point, i.e., of its first code unit. + * After next32PostInc(), this will be true again. + * In general, access to code units and code points in the same + * iteration loop should not be mixed. In UTF-16, if the current position + * is on a second code unit (Low Surrogate), then only that code unit + * is returned even by next32PostInc().

+ * + *

For iteration with either function, there are two ways to + * check for the end of the iteration. When there are no more + * characters in the text object: + *

    + *
  • The hasNext() function returns false.
  • + *
  • nextPostInc() and next32PostInc() return DONE + * when one attempts to read beyond the end of the text object.
  • + *
+ * + * Example: + * \code + * void function1(ForwardCharacterIterator &it) { + * UChar32 c; + * while(it.hasNext()) { + * c=it.next32PostInc(); + * // use c + * } + * } + * + * void function1(ForwardCharacterIterator &it) { + * char16_t c; + * while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) { + * // use c + * } + * } + * \endcode + *

+ * + * @stable ICU 2.0 + */ +class U_COMMON_API ForwardCharacterIterator : public UObject { +public: + /** + * Value returned by most of ForwardCharacterIterator's functions + * when the iterator has reached the limits of its iteration. + * @stable ICU 2.0 + */ + enum { DONE = 0xffff }; + + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~ForwardCharacterIterator(); + + /** + * Returns true when both iterators refer to the same + * character in the same character-storage object. + * @param that The ForwardCharacterIterator to be compared for equality + * @return true when both iterators refer to the same + * character in the same character-storage object + * @stable ICU 2.0 + */ + virtual bool operator==(const ForwardCharacterIterator& that) const = 0; + + /** + * Returns true when the iterators refer to different + * text-storage objects, or to different characters in the + * same text-storage object. + * @param that The ForwardCharacterIterator to be compared for inequality + * @return true when the iterators refer to different + * text-storage objects, or to different characters in the + * same text-storage object + * @stable ICU 2.0 + */ + inline bool operator!=(const ForwardCharacterIterator& that) const; + + /** + * Generates a hash code for this iterator. + * @return the hash code. + * @stable ICU 2.0 + */ + virtual int32_t hashCode() const = 0; + + /** + * Returns a UClassID for this ForwardCharacterIterator ("poor man's + * RTTI").

Despite the fact that this function is public, + * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API! + * @return a UClassID for this ForwardCharacterIterator + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID() const override = 0; + + /** + * Gets the current code unit for returning and advances to the next code unit + * in the iteration range + * (toward endIndex()). If there are + * no more code units to return, returns DONE. + * @return the current code unit. + * @stable ICU 2.0 + */ + virtual char16_t nextPostInc() = 0; + + /** + * Gets the current code point for returning and advances to the next code point + * in the iteration range + * (toward endIndex()). If there are + * no more code points to return, returns DONE. + * @return the current code point. + * @stable ICU 2.0 + */ + virtual UChar32 next32PostInc() = 0; + + /** + * Returns false if there are no more code units or code points + * at or after the current position in the iteration range. + * This is used with nextPostInc() or next32PostInc() in forward + * iteration. + * @returns false if there are no more code units or code points + * at or after the current position in the iteration range. + * @stable ICU 2.0 + */ + virtual UBool hasNext() = 0; + +protected: + /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/ + ForwardCharacterIterator(); + + /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/ + ForwardCharacterIterator(const ForwardCharacterIterator &other); + + /** + * Assignment operator to be overridden in the implementing class. + * @stable ICU 2.0 + */ + ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; } +}; + +/** + * Abstract class that defines an API for iteration + * on text objects. + * This is an interface for forward and backward iteration + * and random access into a text object. + * + *

The API provides backward compatibility to the Java and older ICU + * CharacterIterator classes but extends them significantly: + *

    + *
  1. CharacterIterator is now a subclass of ForwardCharacterIterator.
  2. + *
  3. While the old API functions provided forward iteration with + * "pre-increment" semantics, the new one also provides functions + * with "post-increment" semantics. They are more efficient and should + * be the preferred iterator functions for new implementations. + * The backward iteration always had "pre-decrement" semantics, which + * are efficient.
  4. + *
  5. Just like ForwardCharacterIterator, it provides access to + * both code units and code points. Code point access versions are available + * for the old and the new iteration semantics.
  6. + *
  7. There are new functions for setting and moving the current position + * without returning a character, for efficiency.
  8. + *
+ * + * See ForwardCharacterIterator for examples for using the new forward iteration + * functions. For backward iteration, there is also a hasPrevious() function + * that can be used analogously to hasNext(). + * The old functions work as before and are shown below.

+ * + *

Examples for some of the new functions:

+ * + * Forward iteration with hasNext(): + * \code + * void forward1(CharacterIterator &it) { + * UChar32 c; + * for(it.setToStart(); it.hasNext();) { + * c=it.next32PostInc(); + * // use c + * } + * } + * \endcode + * Forward iteration more similar to loops with the old forward iteration, + * showing a way to convert simple for() loops: + * \code + * void forward2(CharacterIterator &it) { + * char16_t c; + * for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) { + * // use c + * } + * } + * \endcode + * Backward iteration with setToEnd() and hasPrevious(): + * \code + * void backward1(CharacterIterator &it) { + * UChar32 c; + * for(it.setToEnd(); it.hasPrevious();) { + * c=it.previous32(); + * // use c + * } + * } + * \endcode + * Backward iteration with a more traditional for() loop: + * \code + * void backward2(CharacterIterator &it) { + * char16_t c; + * for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) { + * // use c + * } + * } + * \endcode + * + * Example for random access: + * \code + * void random(CharacterIterator &it) { + * // set to the third code point from the beginning + * it.move32(3, CharacterIterator::kStart); + * // get a code point from here without moving the position + * UChar32 c=it.current32(); + * // get the position + * int32_t pos=it.getIndex(); + * // get the previous code unit + * char16_t u=it.previous(); + * // move back one more code unit + * it.move(-1, CharacterIterator::kCurrent); + * // set the position back to where it was + * // and read the same code point c and move beyond it + * it.setIndex(pos); + * if(c!=it.next32PostInc()) { + * exit(1); // CharacterIterator inconsistent + * } + * } + * \endcode + * + *

Examples, especially for the old API:

+ * + * Function processing characters, in this example simple output + *
+ * \code
+ *  void processChar( char16_t c )
+ *  {
+ *      cout << " " << c;
+ *  }
+ * \endcode
+ * 
+ * Traverse the text from start to finish + *
 
+ * \code
+ *  void traverseForward(CharacterIterator& iter)
+ *  {
+ *      for(char16_t c = iter.first(); c != CharacterIterator::DONE; c = iter.next()) {
+ *          processChar(c);
+ *      }
+ *  }
+ * \endcode
+ * 
+ * Traverse the text backwards, from end to start + *
+ * \code
+ *  void traverseBackward(CharacterIterator& iter)
+ *  {
+ *      for(char16_t c = iter.last(); c != CharacterIterator::DONE; c = iter.previous()) {
+ *          processChar(c);
+ *      }
+ *  }
+ * \endcode
+ * 
+ * Traverse both forward and backward from a given position in the text. + * Calls to notBoundary() in this example represents some additional stopping criteria. + *
+ * \code
+ * void traverseOut(CharacterIterator& iter, int32_t pos)
+ * {
+ *      char16_t c;
+ *      for (c = iter.setIndex(pos);
+ *      c != CharacterIterator::DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
+ *          c = iter.next()) {}
+ *      int32_t end = iter.getIndex();
+ *      for (c = iter.setIndex(pos);
+ *          c != CharacterIterator::DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
+ *          c = iter.previous()) {}
+ *      int32_t start = iter.getIndex() + 1;
+ *  
+ *      cout << "start: " << start << " end: " << end << endl;
+ *      for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
+ *          processChar(c);
+ *     }
+ *  }
+ * \endcode
+ * 
+ * Creating a StringCharacterIterator and calling the test functions + *
+ * \code
+ *  void CharacterIterator_Example( void )
+ *   {
+ *       cout << endl << "===== CharacterIterator_Example: =====" << endl;
+ *       UnicodeString text("Ein kleiner Satz.");
+ *       StringCharacterIterator iterator(text);
+ *       cout << "----- traverseForward: -----------" << endl;
+ *       traverseForward( iterator );
+ *       cout << endl << endl << "----- traverseBackward: ----------" << endl;
+ *       traverseBackward( iterator );
+ *       cout << endl << endl << "----- traverseOut: ---------------" << endl;
+ *       traverseOut( iterator, 7 );
+ *       cout << endl << endl << "-----" << endl;
+ *   }
+ * \endcode
+ * 
+ * + * @stable ICU 2.0 + */ +class U_COMMON_API CharacterIterator : public ForwardCharacterIterator { +public: + /** + * Origin enumeration for the move() and move32() functions. + * @stable ICU 2.0 + */ + enum EOrigin { kStart, kCurrent, kEnd }; + + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~CharacterIterator(); + + /** + * Returns a pointer to a new CharacterIterator of the same + * concrete class as this one, and referring to the same + * character in the same text-storage object as this one. The + * caller is responsible for deleting the new clone. + * @return a pointer to a new CharacterIterator + * @stable ICU 2.0 + */ + virtual CharacterIterator* clone() const = 0; + + /** + * Sets the iterator to refer to the first code unit in its + * iteration range, and returns that code unit. + * This can be used to begin an iteration with next(). + * @return the first code unit in its iteration range. + * @stable ICU 2.0 + */ + virtual char16_t first() = 0; + + /** + * Sets the iterator to refer to the first code unit in its + * iteration range, returns that code unit, and moves the position + * to the second code unit. This is an alternative to setToStart() + * for forward iteration with nextPostInc(). + * @return the first code unit in its iteration range. + * @stable ICU 2.0 + */ + virtual char16_t firstPostInc(); + + /** + * Sets the iterator to refer to the first code point in its + * iteration range, and returns that code unit, + * This can be used to begin an iteration with next32(). + * Note that an iteration with next32PostInc(), beginning with, + * e.g., setToStart() or firstPostInc(), is more efficient. + * @return the first code point in its iteration range. + * @stable ICU 2.0 + */ + virtual UChar32 first32() = 0; + + /** + * Sets the iterator to refer to the first code point in its + * iteration range, returns that code point, and moves the position + * to the second code point. This is an alternative to setToStart() + * for forward iteration with next32PostInc(). + * @return the first code point in its iteration range. + * @stable ICU 2.0 + */ + virtual UChar32 first32PostInc(); + + /** + * Sets the iterator to refer to the first code unit or code point in its + * iteration range. This can be used to begin a forward + * iteration with nextPostInc() or next32PostInc(). + * @return the start position of the iteration range + * @stable ICU 2.0 + */ + inline int32_t setToStart(); + + /** + * Sets the iterator to refer to the last code unit in its + * iteration range, and returns that code unit. + * This can be used to begin an iteration with previous(). + * @return the last code unit. + * @stable ICU 2.0 + */ + virtual char16_t last() = 0; + + /** + * Sets the iterator to refer to the last code point in its + * iteration range, and returns that code unit. + * This can be used to begin an iteration with previous32(). + * @return the last code point. + * @stable ICU 2.0 + */ + virtual UChar32 last32() = 0; + + /** + * Sets the iterator to the end of its iteration range, just behind + * the last code unit or code point. This can be used to begin a backward + * iteration with previous() or previous32(). + * @return the end position of the iteration range + * @stable ICU 2.0 + */ + inline int32_t setToEnd(); + + /** + * Sets the iterator to refer to the "position"-th code unit + * in the text-storage object the iterator refers to, and + * returns that code unit. + * @param position the "position"-th code unit in the text-storage object + * @return the "position"-th code unit. + * @stable ICU 2.0 + */ + virtual char16_t setIndex(int32_t position) = 0; + + /** + * Sets the iterator to refer to the beginning of the code point + * that contains the "position"-th code unit + * in the text-storage object the iterator refers to, and + * returns that code point. + * The current position is adjusted to the beginning of the code point + * (its first code unit). + * @param position the "position"-th code unit in the text-storage object + * @return the "position"-th code point. + * @stable ICU 2.0 + */ + virtual UChar32 setIndex32(int32_t position) = 0; + + /** + * Returns the code unit the iterator currently refers to. + * @return the current code unit. + * @stable ICU 2.0 + */ + virtual char16_t current() const = 0; + + /** + * Returns the code point the iterator currently refers to. + * @return the current code point. + * @stable ICU 2.0 + */ + virtual UChar32 current32() const = 0; + + /** + * Advances to the next code unit in the iteration range + * (toward endIndex()), and returns that code unit. If there are + * no more code units to return, returns DONE. + * @return the next code unit. + * @stable ICU 2.0 + */ + virtual char16_t next() = 0; + + /** + * Advances to the next code point in the iteration range + * (toward endIndex()), and returns that code point. If there are + * no more code points to return, returns DONE. + * Note that iteration with "pre-increment" semantics is less + * efficient than iteration with "post-increment" semantics + * that is provided by next32PostInc(). + * @return the next code point. + * @stable ICU 2.0 + */ + virtual UChar32 next32() = 0; + + /** + * Advances to the previous code unit in the iteration range + * (toward startIndex()), and returns that code unit. If there are + * no more code units to return, returns DONE. + * @return the previous code unit. + * @stable ICU 2.0 + */ + virtual char16_t previous() = 0; + + /** + * Advances to the previous code point in the iteration range + * (toward startIndex()), and returns that code point. If there are + * no more code points to return, returns DONE. + * @return the previous code point. + * @stable ICU 2.0 + */ + virtual UChar32 previous32() = 0; + + /** + * Returns false if there are no more code units or code points + * before the current position in the iteration range. + * This is used with previous() or previous32() in backward + * iteration. + * @return false if there are no more code units or code points + * before the current position in the iteration range, return true otherwise. + * @stable ICU 2.0 + */ + virtual UBool hasPrevious() = 0; + + /** + * Returns the numeric index in the underlying text-storage + * object of the character returned by first(). Since it's + * possible to create an iterator that iterates across only + * part of a text-storage object, this number isn't + * necessarily 0. + * @returns the numeric index in the underlying text-storage + * object of the character returned by first(). + * @stable ICU 2.0 + */ + inline int32_t startIndex() const; + + /** + * Returns the numeric index in the underlying text-storage + * object of the position immediately BEYOND the character + * returned by last(). + * @return the numeric index in the underlying text-storage + * object of the position immediately BEYOND the character + * returned by last(). + * @stable ICU 2.0 + */ + inline int32_t endIndex() const; + + /** + * Returns the numeric index in the underlying text-storage + * object of the character the iterator currently refers to + * (i.e., the character returned by current()). + * @return the numeric index in the text-storage object of + * the character the iterator currently refers to + * @stable ICU 2.0 + */ + inline int32_t getIndex() const; + + /** + * Returns the length of the entire text in the underlying + * text-storage object. + * @return the length of the entire text in the text-storage object + * @stable ICU 2.0 + */ + inline int32_t getLength() const; + + /** + * Moves the current position relative to the start or end of the + * iteration range, or relative to the current position itself. + * The movement is expressed in numbers of code units forward + * or backward by specifying a positive or negative delta. + * @param delta the position relative to origin. A positive delta means forward; + * a negative delta means backward. + * @param origin Origin enumeration {kStart, kCurrent, kEnd} + * @return the new position + * @stable ICU 2.0 + */ + virtual int32_t move(int32_t delta, EOrigin origin) = 0; + + /** + * Moves the current position relative to the start or end of the + * iteration range, or relative to the current position itself. + * The movement is expressed in numbers of code points forward + * or backward by specifying a positive or negative delta. + * @param delta the position relative to origin. A positive delta means forward; + * a negative delta means backward. + * @param origin Origin enumeration {kStart, kCurrent, kEnd} + * @return the new position + * @stable ICU 2.0 + */ +#ifdef move32 + // One of the system headers right now is sometimes defining a conflicting macro we don't use +#undef move32 +#endif + virtual int32_t move32(int32_t delta, EOrigin origin) = 0; + + /** + * Copies the text under iteration into the UnicodeString + * referred to by "result". + * @param result Receives a copy of the text under iteration. + * @stable ICU 2.0 + */ + virtual void getText(UnicodeString& result) = 0; + +protected: + /** + * Empty constructor. + * @stable ICU 2.0 + */ + CharacterIterator(); + + /** + * Constructor, just setting the length field in this base class. + * @stable ICU 2.0 + */ + CharacterIterator(int32_t length); + + /** + * Constructor, just setting the length and position fields in this base class. + * @stable ICU 2.0 + */ + CharacterIterator(int32_t length, int32_t position); + + /** + * Constructor, just setting the length, start, end, and position fields in this base class. + * @stable ICU 2.0 + */ + CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position); + + /** + * Copy constructor. + * + * @param that The CharacterIterator to be copied + * @stable ICU 2.0 + */ + CharacterIterator(const CharacterIterator &that); + + /** + * Assignment operator. Sets this CharacterIterator to have the same behavior, + * as the one passed in. + * @param that The CharacterIterator passed in. + * @return the newly set CharacterIterator. + * @stable ICU 2.0 + */ + CharacterIterator &operator=(const CharacterIterator &that); + + /** + * Base class text length field. + * Necessary this for correct getText() and hashCode(). + * @stable ICU 2.0 + */ + int32_t textLength; + + /** + * Base class field for the current position. + * @stable ICU 2.0 + */ + int32_t pos; + + /** + * Base class field for the start of the iteration range. + * @stable ICU 2.0 + */ + int32_t begin; + + /** + * Base class field for the end of the iteration range. + * @stable ICU 2.0 + */ + int32_t end; +}; + +inline bool +ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const { + return !operator==(that); +} + +inline int32_t +CharacterIterator::setToStart() { + return move(0, kStart); +} + +inline int32_t +CharacterIterator::setToEnd() { + return move(0, kEnd); +} + +inline int32_t +CharacterIterator::startIndex() const { + return begin; +} + +inline int32_t +CharacterIterator::endIndex() const { + return end; +} + +inline int32_t +CharacterIterator::getIndex() const { + return pos; +} + +inline int32_t +CharacterIterator::getLength() const { + return textLength; +} + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/choicfmt.h b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/choicfmt.h new file mode 100644 index 0000000000..2b6fb626ac --- /dev/null +++ b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/choicfmt.h @@ -0,0 +1,601 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 1997-2013, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File CHOICFMT.H +* +* Modification History: +* +* Date Name Description +* 02/19/97 aliu Converted from java. +* 03/20/97 helena Finished first cut of implementation and got rid +* of nextDouble/previousDouble and replaced with +* boolean array. +* 4/10/97 aliu Clean up. Modified to work on AIX. +* 8/6/97 nos Removed overloaded constructor, member var 'buffer'. +* 07/22/98 stephen Removed operator!= (implemented in Format) +******************************************************************************** +*/ + +#ifndef CHOICFMT_H +#define CHOICFMT_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +/** + * \file + * \brief C++ API: Choice Format. + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/fieldpos.h" +#include "unicode/format.h" +#include "unicode/messagepattern.h" +#include "unicode/numfmt.h" +#include "unicode/unistr.h" + +#ifndef U_HIDE_DEPRECATED_API + +U_NAMESPACE_BEGIN + +class MessageFormat; + +/** + * ChoiceFormat converts between ranges of numeric values and strings for those ranges. + * The strings must conform to the MessageFormat pattern syntax. + * + *

ChoiceFormat is probably not what you need. + * Please use MessageFormat + * with plural arguments for proper plural selection, + * and select arguments for simple selection among a fixed set of choices!

+ * + *

A ChoiceFormat splits + * the real number line \htmlonly-∞ to + * +∞\endhtmlonly into two + * or more contiguous ranges. Each range is mapped to a + * string.

+ * + *

ChoiceFormat was originally intended + * for displaying grammatically correct + * plurals such as "There is one file." vs. "There are 2 files." + * However, plural rules for many languages + * are too complex for the capabilities of ChoiceFormat, + * and its requirement of specifying the precise rules for each message + * is unmanageable for translators.

+ * + *

There are two methods of defining a ChoiceFormat; both + * are equivalent. The first is by using a string pattern. This is the + * preferred method in most cases. The second method is through direct + * specification of the arrays that logically make up the + * ChoiceFormat.

+ * + *

Note: Typically, choice formatting is done (if done at all) via MessageFormat + * with a choice argument type, + * rather than using a stand-alone ChoiceFormat.

+ * + *
Patterns and Their Interpretation
+ * + *

The pattern string defines the range boundaries and the strings for each number range. + * Syntax: + *

+ * choiceStyle = number separator message ('|' number separator message)*
+ * number = normal_number | ['-'] \htmlonly∞\endhtmlonly (U+221E, infinity)
+ * normal_number = double value (unlocalized ASCII string)
+ * separator = less_than | less_than_or_equal
+ * less_than = '<'
+ * less_than_or_equal = '#' | \htmlonly≤\endhtmlonly (U+2264)
+ * message: see {@link MessageFormat}
+ * 
+ * Pattern_White_Space between syntax elements is ignored, except + * around each range's sub-message.

+ * + *

Each numeric sub-range extends from the current range's number + * to the next range's number. + * The number itself is included in its range if a less_than_or_equal sign is used, + * and excluded from its range (and instead included in the previous range) + * if a less_than sign is used.

+ * + *

When a ChoiceFormat is constructed from + * arrays of numbers, closure flags and strings, + * they are interpreted just like + * the sequence of (number separator string) in an equivalent pattern string. + * closure[i]==true corresponds to a less_than separator sign. + * The equivalent pattern string will be constructed automatically.

+ * + *

During formatting, a number is mapped to the first range + * where the number is not greater than the range's upper limit. + * That range's message string is returned. A NaN maps to the very first range.

+ * + *

During parsing, a range is selected for the longest match of + * any range's message. That range's number is returned, ignoring the separator/closure. + * Only a simple string match is performed, without parsing of arguments that + * might be specified in the message strings.

+ * + *

Note that the first range's number is ignored in formatting + * but may be returned from parsing.

+ * + *
Examples
+ * + *

Here is an example of two arrays that map the number + * 1..7 to the English day of the week abbreviations + * Sun..Sat. No closures array is given; this is the same as + * specifying all closures to be false.

+ * + *
    {1,2,3,4,5,6,7},
+ *     {"Sun","Mon","Tue","Wed","Thur","Fri","Sat"}
+ * + *

Here is an example that maps the ranges [-Inf, 1), [1, 1], and (1, + * +Inf] to three strings. That is, the number line is split into three + * ranges: x < 1.0, x = 1.0, and x > 1.0. + * (The round parentheses in the notation above indicate an exclusive boundary, + * like the turned bracket in European notation: [-Inf, 1) == [-Inf, 1[ )

+ * + *
    {0, 1, 1},
+ *     {false, false, true},
+ *     {"no files", "one file", "many files"}
+ * + *

Here is an example that shows formatting and parsing:

+ * + * \code + * #include + * #include + * #include + * + * int main(int argc, char *argv[]) { + * double limits[] = {1,2,3,4,5,6,7}; + * UnicodeString monthNames[] = { + * "Sun","Mon","Tue","Wed","Thu","Fri","Sat"}; + * ChoiceFormat fmt(limits, monthNames, 7); + * UnicodeString str; + * char buf[256]; + * for (double x = 1.0; x <= 8.0; x += 1.0) { + * fmt.format(x, str); + * str.extract(0, str.length(), buf, 256, ""); + * str.truncate(0); + * cout << x << " -> " + * << buf << endl; + * } + * cout << endl; + * return 0; + * } + * \endcode + * + *

User subclasses are not supported. While clients may write + * subclasses, such code will not necessarily work and will not be + * guaranteed to work stably from release to release. + * + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ +class U_I18N_API ChoiceFormat: public NumberFormat { +public: + /** + * Constructs a new ChoiceFormat from the pattern string. + * + * @param pattern Pattern used to construct object. + * @param status Output param to receive success code. If the + * pattern cannot be parsed, set to failure code. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + ChoiceFormat(const UnicodeString& pattern, + UErrorCode& status); + + + /** + * Constructs a new ChoiceFormat with the given limits and message strings. + * All closure flags default to false, + * equivalent to less_than_or_equal separators. + * + * Copies the limits and formats instead of adopting them. + * + * @param limits Array of limit values. + * @param formats Array of formats. + * @param count Size of 'limits' and 'formats' arrays. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + ChoiceFormat(const double* limits, + const UnicodeString* formats, + int32_t count ); + + /** + * Constructs a new ChoiceFormat with the given limits, closure flags and message strings. + * + * Copies the limits and formats instead of adopting them. + * + * @param limits Array of limit values + * @param closures Array of booleans specifying whether each + * element of 'limits' is open or closed. If false, then the + * corresponding limit number is a member of its range. + * If true, then the limit number belongs to the previous range it. + * @param formats Array of formats + * @param count Size of 'limits', 'closures', and 'formats' arrays + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + ChoiceFormat(const double* limits, + const UBool* closures, + const UnicodeString* formats, + int32_t count); + + /** + * Copy constructor. + * + * @param that ChoiceFormat object to be copied from + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + ChoiceFormat(const ChoiceFormat& that); + + /** + * Assignment operator. + * + * @param that ChoiceFormat object to be copied + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + const ChoiceFormat& operator=(const ChoiceFormat& that); + + /** + * Destructor. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual ~ChoiceFormat(); + + /** + * Clones this Format object. The caller owns the + * result and must delete it when done. + * + * @return a copy of this object + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual ChoiceFormat* clone() const override; + + /** + * Returns true if the given Format objects are semantically equal. + * Objects of different subclasses are considered unequal. + * + * @param other ChoiceFormat object to be compared + * @return true if other is the same as this. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual bool operator==(const Format& other) const override; + + /** + * Sets the pattern. + * @param pattern The pattern to be applied. + * @param status Output param set to success/failure code on + * exit. If the pattern is invalid, this will be + * set to a failure result. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual void applyPattern(const UnicodeString& pattern, + UErrorCode& status); + + /** + * Sets the pattern. + * @param pattern The pattern to be applied. + * @param parseError Struct to receive information on position + * of error if an error is encountered + * @param status Output param set to success/failure code on + * exit. If the pattern is invalid, this will be + * set to a failure result. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual void applyPattern(const UnicodeString& pattern, + UParseError& parseError, + UErrorCode& status); + /** + * Gets the pattern. + * + * @param pattern Output param which will receive the pattern + * Previous contents are deleted. + * @return A reference to 'pattern' + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual UnicodeString& toPattern(UnicodeString &pattern) const; + + /** + * Sets the choices to be used in formatting. + * For details see the constructor with the same parameter list. + * + * @param limitsToCopy Contains the top value that you want + * parsed with that format,and should be in + * ascending sorted order. When formatting X, + * the choice will be the i, where limit[i] + * <= X < limit[i+1]. + * @param formatsToCopy The format strings you want to use for each limit. + * @param count The size of the above arrays. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual void setChoices(const double* limitsToCopy, + const UnicodeString* formatsToCopy, + int32_t count ); + + /** + * Sets the choices to be used in formatting. + * For details see the constructor with the same parameter list. + * + * @param limits Array of limits + * @param closures Array of limit booleans + * @param formats Array of format string + * @param count The size of the above arrays + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual void setChoices(const double* limits, + const UBool* closures, + const UnicodeString* formats, + int32_t count); + + /** + * Returns nullptr and 0. + * Before ICU 4.8, this used to return the choice limits array. + * + * @param count Will be set to 0. + * @return nullptr + * @deprecated ICU 4.8 Use the MessagePattern class to analyze a ChoiceFormat pattern. + */ + virtual const double* getLimits(int32_t& count) const; + + /** + * Returns nullptr and 0. + * Before ICU 4.8, this used to return the limit booleans array. + * + * @param count Will be set to 0. + * @return nullptr + * @deprecated ICU 4.8 Use the MessagePattern class to analyze a ChoiceFormat pattern. + */ + virtual const UBool* getClosures(int32_t& count) const; + + /** + * Returns nullptr and 0. + * Before ICU 4.8, this used to return the array of choice strings. + * + * @param count Will be set to 0. + * @return nullptr + * @deprecated ICU 4.8 Use the MessagePattern class to analyze a ChoiceFormat pattern. + */ + virtual const UnicodeString* getFormats(int32_t& count) const; + + + using NumberFormat::format; + + /** + * Formats a double number using this object's choices. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual UnicodeString& format(double number, + UnicodeString& appendTo, + FieldPosition& pos) const override; + /** + * Formats an int32_t number using this object's choices. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual UnicodeString& format(int32_t number, + UnicodeString& appendTo, + FieldPosition& pos) const override; + + /** + * Formats an int64_t number using this object's choices. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual UnicodeString& format(int64_t number, + UnicodeString& appendTo, + FieldPosition& pos) const override; + + /** + * Formats an array of objects using this object's choices. + * + * @param objs The array of objects to be formatted. + * @param cnt The size of objs. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param success Output param set to success/failure code on + * exit. + * @return Reference to 'appendTo' parameter. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual UnicodeString& format(const Formattable* objs, + int32_t cnt, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& success) const; + + using NumberFormat::parse; + + /** + * Looks for the longest match of any message string on the input text and, + * if there is a match, sets the result object to the corresponding range's number. + * + * If no string matches, then the parsePosition is unchanged. + * + * @param text The text to be parsed. + * @param result Formattable to be set to the parse result. + * If parse fails, return contents are undefined. + * @param parsePosition The position to start parsing at on input. + * On output, moved to after the last successfully + * parse character. On parse failure, does not change. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual void parse(const UnicodeString& text, + Formattable& result, + ParsePosition& parsePosition) const override; + + /** + * Returns a unique class ID POLYMORPHICALLY. Part of ICU's "poor man's RTTI". + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual UClassID getDynamicClassID() const override; + + /** + * Returns the class ID for this class. This is useful only for + * comparing to a return value from getDynamicClassID(). For example: + *

+     * .       Base* polymorphic_pointer = createPolymorphicObject();
+     * .       if (polymorphic_pointer->getDynamicClassID() ==
+     * .           Derived::getStaticClassID()) ...
+     * 
+ * @return The class ID for all objects of this class. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + static UClassID U_EXPORT2 getStaticClassID(); + +private: + /** + * Converts a double value to a string. + * @param value the double number to be converted. + * @param string the result string. + * @return the converted string. + */ + static UnicodeString& dtos(double value, UnicodeString& string); + + ChoiceFormat() = delete; // default constructor not implemented + + /** + * Construct a new ChoiceFormat with the limits and the corresponding formats + * based on the pattern. + * + * @param newPattern Pattern used to construct object. + * @param parseError Struct to receive information on position + * of error if an error is encountered. + * @param status Output param to receive success code. If the + * pattern cannot be parsed, set to failure code. + */ + ChoiceFormat(const UnicodeString& newPattern, + UParseError& parseError, + UErrorCode& status); + + friend class MessageFormat; + + virtual void setChoices(const double* limits, + const UBool* closures, + const UnicodeString* formats, + int32_t count, + UErrorCode &errorCode); + + /** + * Finds the ChoiceFormat sub-message for the given number. + * @param pattern A MessagePattern. + * @param partIndex the index of the first ChoiceFormat argument style part. + * @param number a number to be mapped to one of the ChoiceFormat argument's intervals + * @return the sub-message start part index. + */ + static int32_t findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number); + + static double parseArgument( + const MessagePattern &pattern, int32_t partIndex, + const UnicodeString &source, ParsePosition &pos); + + /** + * Matches the pattern string from the end of the partIndex to + * the beginning of the limitPartIndex, + * including all syntax except SKIP_SYNTAX, + * against the source string starting at sourceOffset. + * If they match, returns the length of the source string match. + * Otherwise returns -1. + */ + static int32_t matchStringUntilLimitPart( + const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex, + const UnicodeString &source, int32_t sourceOffset); + + /** + * Some of the ChoiceFormat constructors do not have a UErrorCode parameter. + * We need _some_ way to provide one for the MessagePattern constructor. + * Alternatively, the MessagePattern could be a pointer field, but that is + * not nice either. + */ + UErrorCode constructorErrorCode; + + /** + * The MessagePattern which contains the parsed structure of the pattern string. + * + * Starting with ICU 4.8, the MessagePattern contains a sequence of + * numeric/selector/message parts corresponding to the parsed pattern. + * For details see the MessagePattern class API docs. + */ + MessagePattern msgPattern; + + /** + * Docs & fields from before ICU 4.8, before MessagePattern was used. + * Commented out, and left only for explanation of semantics. + * -------- + * Each ChoiceFormat divides the range -Inf..+Inf into fCount + * intervals. The intervals are: + * + * 0: fChoiceLimits[0]..fChoiceLimits[1] + * 1: fChoiceLimits[1]..fChoiceLimits[2] + * ... + * fCount-2: fChoiceLimits[fCount-2]..fChoiceLimits[fCount-1] + * fCount-1: fChoiceLimits[fCount-1]..+Inf + * + * Interval 0 is special; during formatting (mapping numbers to + * strings), it also contains all numbers less than + * fChoiceLimits[0], as well as NaN values. + * + * Interval i maps to and from string fChoiceFormats[i]. When + * parsing (mapping strings to numbers), then intervals map to + * their lower limit, that is, interval i maps to fChoiceLimit[i]. + * + * The intervals may be closed, half open, or open. This affects + * formatting but does not affect parsing. Interval i is affected + * by fClosures[i] and fClosures[i+1]. If fClosures[i] + * is false, then the value fChoiceLimits[i] is in interval i. + * That is, intervals i and i are: + * + * i-1: ... x < fChoiceLimits[i] + * i: fChoiceLimits[i] <= x ... + * + * If fClosures[i] is true, then the value fChoiceLimits[i] is + * in interval i-1. That is, intervals i-1 and i are: + * + * i-1: ... x <= fChoiceLimits[i] + * i: fChoiceLimits[i] < x ... + * + * Because of the nature of interval 0, fClosures[0] has no + * effect. + */ + // double* fChoiceLimits; + // UBool* fClosures; + // UnicodeString* fChoiceFormats; + // int32_t fCount; +}; + + +U_NAMESPACE_END + +#endif // U_HIDE_DEPRECATED_API +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // CHOICFMT_H +//eof diff --git a/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/coleitr.h b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/coleitr.h new file mode 100644 index 0000000000..a147d6cfba --- /dev/null +++ b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/coleitr.h @@ -0,0 +1,411 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ****************************************************************************** + * Copyright (C) 1997-2014, International Business Machines + * Corporation and others. All Rights Reserved. + ****************************************************************************** + */ + +/** + * \file + * \brief C++ API: Collation Element Iterator. + */ + +/** +* File coleitr.h +* +* Created by: Helena Shih +* +* Modification History: +* +* Date Name Description +* +* 8/18/97 helena Added internal API documentation. +* 08/03/98 erm Synched with 1.2 version CollationElementIterator.java +* 12/10/99 aliu Ported Thai collation support from Java. +* 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h) +* 02/19/01 swquek Removed CollationElementsIterator() since it is +* private constructor and no calls are made to it +* 2012-2014 markus Rewritten in C++ again. +*/ + +#ifndef COLEITR_H +#define COLEITR_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#if !UCONFIG_NO_COLLATION + +#include "unicode/unistr.h" +#include "unicode/uobject.h" + +struct UCollationElements; +struct UHashtable; + +U_NAMESPACE_BEGIN + +struct CollationData; + +class CharacterIterator; +class CollationIterator; +class RuleBasedCollator; +class UCollationPCE; +class UVector32; + +/** +* The CollationElementIterator class is used as an iterator to walk through +* each character of an international string. Use the iterator to return the +* ordering priority of the positioned character. The ordering priority of a +* character, which we refer to as a key, defines how a character is collated in +* the given collation object. +* For example, consider the following in Slovak and in traditional Spanish collation: +*
+*        "ca" -> the first key is key('c') and second key is key('a').
+*        "cha" -> the first key is key('ch') and second key is key('a').
+* And in German phonebook collation, +*
 \htmlonly       "æb"-> the first key is key('a'), the second key is key('e'), and
+*        the third key is key('b'). \endhtmlonly 
+* The key of a character, is an integer composed of primary order(short), +* secondary order(char), and tertiary order(char). Java strictly defines the +* size and signedness of its primitive data types. Therefore, the static +* functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return +* int32_t to ensure the correctness of the key value. +*

Example of the iterator usage: (without error checking) +*

+* \code
+*   void CollationElementIterator_Example()
+*   {
+*       UnicodeString str = "This is a test";
+*       UErrorCode success = U_ZERO_ERROR;
+*       RuleBasedCollator* rbc =
+*           (RuleBasedCollator*) RuleBasedCollator::createInstance(success);
+*       CollationElementIterator* c =
+*           rbc->createCollationElementIterator( str );
+*       int32_t order = c->next(success);
+*       c->reset();
+*       order = c->previous(success);
+*       delete c;
+*       delete rbc;
+*   }
+* \endcode
+* 
+*

+* The method next() returns the collation order of the next character based on +* the comparison level of the collator. The method previous() returns the +* collation order of the previous character based on the comparison level of +* the collator. The Collation Element Iterator moves only in one direction +* between calls to reset(), setOffset(), or setText(). That is, next() +* and previous() can not be inter-used. Whenever previous() is to be called after +* next() or vice versa, reset(), setOffset() or setText() has to be called first +* to reset the status, shifting pointers to either the end or the start of +* the string (reset() or setText()), or the specified position (setOffset()). +* Hence at the next call of next() or previous(), the first or last collation order, +* or collation order at the specified position will be returned. If a change of +* direction is done without one of these calls, the result is undefined. +*

+* The result of a forward iterate (next()) and reversed result of the backward +* iterate (previous()) on the same string are equivalent, if collation orders +* with the value 0 are ignored. +* Character based on the comparison level of the collator. A collation order +* consists of primary order, secondary order and tertiary order. The data +* type of the collation order is int32_t. +* +* Note, CollationElementIterator should not be subclassed. +* @see Collator +* @see RuleBasedCollator +* @version 1.8 Jan 16 2001 +*/ +class U_I18N_API CollationElementIterator final : public UObject { +public: + + // CollationElementIterator public data member ------------------------------ + + enum { + /** + * NULLORDER indicates that an error has occurred while processing + * @stable ICU 2.0 + */ + NULLORDER = static_cast(0xffffffff) + }; + + // CollationElementIterator public constructor/destructor ------------------- + + /** + * Copy constructor. + * + * @param other the object to be copied from + * @stable ICU 2.0 + */ + CollationElementIterator(const CollationElementIterator& other); + + /** + * Destructor + * @stable ICU 2.0 + */ + virtual ~CollationElementIterator(); + + // CollationElementIterator public methods ---------------------------------- + + /** + * Returns true if "other" is the same as "this" + * + * @param other the object to be compared + * @return true if "other" is the same as "this" + * @stable ICU 2.0 + */ + bool operator==(const CollationElementIterator& other) const; + + /** + * Returns true if "other" is not the same as "this". + * + * @param other the object to be compared + * @return true if "other" is not the same as "this" + * @stable ICU 2.0 + */ + bool operator!=(const CollationElementIterator& other) const; + + /** + * Resets the cursor to the beginning of the string. + * @stable ICU 2.0 + */ + void reset(); + + /** + * Gets the ordering priority of the next character in the string. + * @param status the error code status. + * @return the next character's ordering. otherwise returns NULLORDER if an + * error has occurred or if the end of string has been reached + * @stable ICU 2.0 + */ + int32_t next(UErrorCode& status); + + /** + * Get the ordering priority of the previous collation element in the string. + * @param status the error code status. + * @return the previous element's ordering. otherwise returns NULLORDER if an + * error has occurred or if the start of string has been reached + * @stable ICU 2.0 + */ + int32_t previous(UErrorCode& status); + + /** + * Gets the primary order of a collation order. + * @param order the collation order + * @return the primary order of a collation order. + * @stable ICU 2.0 + */ + static inline int32_t primaryOrder(int32_t order); + + /** + * Gets the secondary order of a collation order. + * @param order the collation order + * @return the secondary order of a collation order. + * @stable ICU 2.0 + */ + static inline int32_t secondaryOrder(int32_t order); + + /** + * Gets the tertiary order of a collation order. + * @param order the collation order + * @return the tertiary order of a collation order. + * @stable ICU 2.0 + */ + static inline int32_t tertiaryOrder(int32_t order); + + /** + * Return the maximum length of any expansion sequences that end with the + * specified comparison order. + * @param order a collation order returned by previous or next. + * @return maximum size of the expansion sequences ending with the collation + * element or 1 if collation element does not occur at the end of any + * expansion sequence + * @stable ICU 2.0 + */ + int32_t getMaxExpansion(int32_t order) const; + + /** + * Gets the comparison order in the desired strength. Ignore the other + * differences. + * @param order The order value + * @stable ICU 2.0 + */ + int32_t strengthOrder(int32_t order) const; + + /** + * Sets the source string. + * @param str the source string. + * @param status the error code status. + * @stable ICU 2.0 + */ + void setText(const UnicodeString& str, UErrorCode& status); + + /** + * Sets the source string. + * @param str the source character iterator. + * @param status the error code status. + * @stable ICU 2.0 + */ + void setText(CharacterIterator& str, UErrorCode& status); + + /** + * Checks if a comparison order is ignorable. + * @param order the collation order. + * @return true if a character is ignorable, false otherwise. + * @stable ICU 2.0 + */ + static inline UBool isIgnorable(int32_t order); + + /** + * Gets the offset of the currently processed character in the source string. + * @return the offset of the character. + * @stable ICU 2.0 + */ + int32_t getOffset() const; + + /** + * Sets the offset of the currently processed character in the source string. + * @param newOffset the new offset. + * @param status the error code status. + * @return the offset of the character. + * @stable ICU 2.0 + */ + void setOffset(int32_t newOffset, UErrorCode& status); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const override; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + +#ifndef U_HIDE_INTERNAL_API + /** @internal */ + static inline CollationElementIterator *fromUCollationElements(UCollationElements *uc) { + return reinterpret_cast(uc); + } + /** @internal */ + static inline const CollationElementIterator *fromUCollationElements(const UCollationElements *uc) { + return reinterpret_cast(uc); + } + /** @internal */ + inline UCollationElements *toUCollationElements() { + return reinterpret_cast(this); + } + /** @internal */ + inline const UCollationElements *toUCollationElements() const { + return reinterpret_cast(this); + } +#endif // U_HIDE_INTERNAL_API + +private: + friend class RuleBasedCollator; + friend class UCollationPCE; + + /** + * CollationElementIterator constructor. This takes the source string and the + * collation object. The cursor will walk thru the source string based on the + * predefined collation rules. If the source string is empty, NULLORDER will + * be returned on the calls to next(). + * @param sourceText the source string. + * @param order the collation object. + * @param status the error code status. + */ + CollationElementIterator(const UnicodeString& sourceText, + const RuleBasedCollator* order, UErrorCode& status); + // Note: The constructors should take settings & tailoring, not a collator, + // to avoid circular dependencies. + // However, for operator==() we would need to be able to compare tailoring data for equality + // without making CollationData or CollationTailoring depend on TailoredSet. + // (See the implementation of RuleBasedCollator::operator==().) + // That might require creating an intermediate class that would be used + // by both CollationElementIterator and RuleBasedCollator + // but only contain the part of RBC== related to data and rules. + + /** + * CollationElementIterator constructor. This takes the source string and the + * collation object. The cursor will walk thru the source string based on the + * predefined collation rules. If the source string is empty, NULLORDER will + * be returned on the calls to next(). + * @param sourceText the source string. + * @param order the collation object. + * @param status the error code status. + */ + CollationElementIterator(const CharacterIterator& sourceText, + const RuleBasedCollator* order, UErrorCode& status); + + /** + * Assignment operator + * + * @param other the object to be copied + */ + const CollationElementIterator& + operator=(const CollationElementIterator& other); + + CollationElementIterator() = delete; // default constructor not implemented + + /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */ + inline int8_t normalizeDir() const { return dir_ == 1 ? 0 : dir_; } + + static UHashtable *computeMaxExpansions(const CollationData *data, UErrorCode &errorCode); + + static int32_t getMaxExpansion(const UHashtable *maxExpansions, int32_t order); + + // CollationElementIterator private data members ---------------------------- + + CollationIterator *iter_; // owned + const RuleBasedCollator *rbc_; // aliased + uint32_t otherHalf_; + /** + * <0: backwards; 0: just after reset() (previous() begins from end); + * 1: just after setOffset(); >1: forward + */ + int8_t dir_; + /** + * Stores offsets from expansions and from unsafe-backwards iteration, + * so that getOffset() returns intermediate offsets for the CEs + * that are consistent with forward iteration. + */ + UVector32 *offsets_; + + UnicodeString string_; +}; + +// CollationElementIterator inline method definitions -------------------------- + +inline int32_t CollationElementIterator::primaryOrder(int32_t order) +{ + return (order >> 16) & 0xffff; +} + +inline int32_t CollationElementIterator::secondaryOrder(int32_t order) +{ + return (order >> 8) & 0xff; +} + +inline int32_t CollationElementIterator::tertiaryOrder(int32_t order) +{ + return order & 0xff; +} + +inline UBool CollationElementIterator::isIgnorable(int32_t order) +{ + return (order & 0xffff0000) == 0; +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_COLLATION */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/coll.h b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/coll.h new file mode 100644 index 0000000000..2de467c4f0 --- /dev/null +++ b/packages/php-wasm/compile/libintl/asyncify/dist/root/lib/include/unicode/coll.h @@ -0,0 +1,1387 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1996-2016, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +*/ + +/** + * \file + * \brief C++ API: Collation Service. + */ + +/** +* File coll.h +* +* Created by: Helena Shih +* +* Modification History: +* +* Date Name Description +* 02/5/97 aliu Modified createDefault to load collation data from +* binary files when possible. Added related methods +* createCollationFromFile, chopLocale, createPathName. +* 02/11/97 aliu Added members addToCache, findInCache, and fgCache. +* 02/12/97 aliu Modified to create objects from RuleBasedCollator cache. +* Moved cache out of Collation class. +* 02/13/97 aliu Moved several methods out of this class and into +* RuleBasedCollator, with modifications. Modified +* createDefault() to call new RuleBasedCollator(Locale&) +* constructor. General clean up and documentation. +* 02/20/97 helena Added clone, operator==, operator!=, operator=, copy +* constructor and getDynamicClassID. +* 03/25/97 helena Updated with platform independent data types. +* 05/06/97 helena Added memory allocation error detection. +* 06/20/97 helena Java class name change. +* 09/03/97 helena Added createCollationKeyValues(). +* 02/10/98 damiba Added compare() with length as parameter. +* 04/23/99 stephen Removed EDecompositionMode, merged with +* Normalizer::EMode. +* 11/02/99 helena Collator performance enhancements. Eliminates the +* UnicodeString construction and special case for NO_OP. +* 11/23/99 srl More performance enhancements. Inlining of +* critical accessors. +* 05/15/00 helena Added version information API. +* 01/29/01 synwee Modified into a C++ wrapper which calls C apis +* (ucol.h). +* 2012-2014 markus Rewritten in C++ again. +*/ + +#ifndef COLL_H +#define COLL_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#if !UCONFIG_NO_COLLATION + +#include +#include +#include + +#include "unicode/char16ptr.h" +#include "unicode/uobject.h" +#include "unicode/ucol.h" +#include "unicode/unorm.h" +#include "unicode/locid.h" +#include "unicode/uniset.h" +#include "unicode/umisc.h" +#include "unicode/unistr.h" +#include "unicode/uiter.h" +#include "unicode/stringpiece.h" + +U_NAMESPACE_BEGIN + +class StringEnumeration; + +#if !UCONFIG_NO_SERVICE +/** + * @stable ICU 2.6 + */ +class CollatorFactory; +#endif + +/** +* @stable ICU 2.0 +*/ +class CollationKey; + +/** +* The Collator class performs locale-sensitive string +* comparison.
+* You use this class to build searching and sorting routines for natural +* language text. +*

+* Collator is an abstract base class. Subclasses implement +* specific collation strategies. One subclass, +* RuleBasedCollator, is currently provided and is applicable +* to a wide set of languages. Other subclasses may be created to handle more +* specialized needs. +*

+* Like other locale-sensitive classes, you can use the static factory method, +* createInstance, to obtain the appropriate +* Collator object for a given locale. You will only need to +* look at the subclasses of Collator if you need to +* understand the details of a particular collation strategy or if you need to +* modify that strategy. +*

+* The following example shows how to compare two strings using the +* Collator for the default locale. +* \htmlonly

\endhtmlonly +*
+* \code
+* // Compare two strings in the default locale
+* UErrorCode success = U_ZERO_ERROR;
+* Collator* myCollator = Collator::createInstance(success);
+* if (myCollator->compare("abc", "ABC") < 0)
+*   cout << "abc is less than ABC" << endl;
+* else
+*   cout << "abc is greater than or equal to ABC" << endl;
+* \endcode
+* 
+* \htmlonly
\endhtmlonly +*

+* You can set a Collator's strength attribute to +* determine the level of difference considered significant in comparisons. +* Five strengths are provided: PRIMARY, SECONDARY, +* TERTIARY, QUATERNARY and IDENTICAL. +* The exact assignment of strengths to language features is locale dependent. +* For example, in Czech, "e" and "f" are considered primary differences, +* while "e" and "\u00EA" are secondary differences, "e" and "E" are tertiary +* differences and "e" and "e" are identical. The following shows how both case +* and accents could be ignored for US English. +* \htmlonly

\endhtmlonly +*
+* \code
+* //Get the Collator for US English and set its strength to PRIMARY
+* UErrorCode success = U_ZERO_ERROR;
+* Collator* usCollator = Collator::createInstance(Locale::getUS(), success);
+* usCollator->setStrength(Collator::PRIMARY);
+* if (usCollator->compare("abc", "ABC") == 0)
+*     cout << "'abc' and 'ABC' strings are equivalent with strength PRIMARY" << endl;
+* \endcode
+* 
+* \htmlonly
\endhtmlonly +* +* The getSortKey methods +* convert a string to a series of bytes that can be compared bitwise against +* other sort keys using strcmp(). Sort keys are written as +* zero-terminated byte strings. +* +* Another set of APIs returns a CollationKey object that wraps +* the sort key bytes instead of returning the bytes themselves. +*

+*

+* Note: Collators with different Locale, +* and CollationStrength settings will return different sort +* orders for the same set of strings. Locales have specific collation rules, +* and the way in which secondary and tertiary differences are taken into +* account, for example, will result in a different sorting order for same +* strings. +*

+* @see RuleBasedCollator +* @see CollationKey +* @see CollationElementIterator +* @see Locale +* @see Normalizer2 +* @version 2.0 11/15/01 +*/ + +class U_I18N_API Collator : public UObject { +public: + + // Collator public enums ----------------------------------------------- + + /** + * Base letter represents a primary difference. Set comparison level to + * PRIMARY to ignore secondary and tertiary differences.
+ * Use this to set the strength of a Collator object.
+ * Example of primary difference, "abc" < "abd" + * + * Diacritical differences on the same base letter represent a secondary + * difference. Set comparison level to SECONDARY to ignore tertiary + * differences. Use this to set the strength of a Collator object.
+ * Example of secondary difference, "ä" >> "a". + * + * Uppercase and lowercase versions of the same character represents a + * tertiary difference. Set comparison level to TERTIARY to include all + * comparison differences. Use this to set the strength of a Collator + * object.
+ * Example of tertiary difference, "abc" <<< "ABC". + * + * Two characters are considered "identical" when they have the same unicode + * spellings.
+ * For example, "ä" == "ä". + * + * UCollationStrength is also used to determine the strength of sort keys + * generated from Collator objects. + * @stable ICU 2.0 + */ + enum ECollationStrength + { + PRIMARY = UCOL_PRIMARY, // 0 + SECONDARY = UCOL_SECONDARY, // 1 + TERTIARY = UCOL_TERTIARY, // 2 + QUATERNARY = UCOL_QUATERNARY, // 3 + IDENTICAL = UCOL_IDENTICAL // 15 + }; + + + // Cannot use #ifndef U_HIDE_DEPRECATED_API for the following, it is + // used by virtual methods that cannot have that conditional. +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * LESS is returned if source string is compared to be less than target + * string in the compare() method. + * EQUAL is returned if source string is compared to be equal to target + * string in the compare() method. + * GREATER is returned if source string is compared to be greater than + * target string in the compare() method. + * @see Collator#compare + * @deprecated ICU 2.6. Use C enum UCollationResult defined in ucol.h + */ + enum EComparisonResult + { + LESS = UCOL_LESS, // -1 + EQUAL = UCOL_EQUAL, // 0 + GREATER = UCOL_GREATER // 1 + }; +#endif // U_FORCE_HIDE_DEPRECATED_API + + // Collator public destructor ----------------------------------------- + + /** + * Destructor + * @stable ICU 2.0 + */ + virtual ~Collator(); + + // Collator public methods -------------------------------------------- + + /** + * Returns true if "other" is the same as "this". + * + * The base class implementation returns true if "other" has the same type/class as "this": + * `typeid(*this) == typeid(other)`. + * + * Subclass implementations should do something like the following: + * + * if (this == &other) { return true; } + * if (!Collator::operator==(other)) { return false; } // not the same class + * + * const MyCollator &o = (const MyCollator&)other; + * (compare this vs. o's subclass fields) + * + * @param other Collator object to be compared + * @return true if other is the same as this. + * @stable ICU 2.0 + */ + virtual bool operator==(const Collator& other) const; + + /** + * Returns true if "other" is not the same as "this". + * Calls ! operator==(const Collator&) const which works for all subclasses. + * @param other Collator object to be compared + * @return true if other is not the same as this. + * @stable ICU 2.0 + */ + virtual bool operator!=(const Collator& other) const; + + /** + * Makes a copy of this object. + * @return a copy of this object, owned by the caller + * @stable ICU 2.0 + */ + virtual Collator* clone() const = 0; + + /** + * Creates the Collator object for the current default locale. + * The default locale is determined by Locale::getDefault. + * The UErrorCode& err parameter is used to return status information to the user. + * To check whether the construction succeeded or not, you should check the + * value of U_SUCCESS(err). If you wish more detailed information, you can + * check for informational error results which still indicate success. + * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For + * example, 'de_CH' was requested, but nothing was found there, so 'de' was + * used. U_USING_DEFAULT_ERROR indicates that the default locale data was + * used; neither the requested locale nor any of its fall back locales + * could be found. + * The caller owns the returned object and is responsible for deleting it. + * + * @param err the error code status. + * @return the collation object of the default locale.(for example, en_US) + * @see Locale#getDefault + * @stable ICU 2.0 + */ + static Collator* U_EXPORT2 createInstance(UErrorCode& err); + + /** + * Gets the collation object for the desired locale. The + * resource of the desired locale will be loaded. + * + * Locale::getRoot() is the base collation table and all other languages are + * built on top of it with additional language-specific modifications. + * + * For some languages, multiple collation types are available; + * for example, "de@collation=phonebook". + * Starting with ICU 54, collation attributes can be specified via locale keywords as well, + * in the old locale extension syntax ("el@colCaseFirst=upper") + * or in language tag syntax ("el-u-kf-upper"). + * See User Guide: Collation API. + * + * The UErrorCode& err parameter is used to return status information to the user. + * To check whether the construction succeeded or not, you should check + * the value of U_SUCCESS(err). If you wish more detailed information, you + * can check for informational error results which still indicate success. + * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For + * example, 'de_CH' was requested, but nothing was found there, so 'de' was + * used. U_USING_DEFAULT_ERROR indicates that the default locale data was + * used; neither the requested locale nor any of its fall back locales + * could be found. + * + * The caller owns the returned object and is responsible for deleting it. + * @param loc The locale ID for which to open a collator. + * @param err the error code status. + * @return the created table-based collation object based on the desired + * locale. + * @see Locale + * @see ResourceLoader + * @stable ICU 2.2 + */ + static Collator* U_EXPORT2 createInstance(const Locale& loc, UErrorCode& err); + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * The comparison function compares the character data stored in two + * different strings. Returns information about whether a string is less + * than, greater than or equal to another string. + * @param source the source string to be compared with. + * @param target the string that is to be compared with the source string. + * @return Returns a byte value. GREATER if source is greater + * than target; EQUAL if source is equal to target; LESS if source is less + * than target + * @deprecated ICU 2.6 use the overload with UErrorCode & + */ + virtual EComparisonResult compare(const UnicodeString& source, + const UnicodeString& target) const; +#endif // U_FORCE_HIDE_DEPRECATED_API + + /** + * The comparison function compares the character data stored in two + * different strings. Returns information about whether a string is less + * than, greater than or equal to another string. + * @param source the source string to be compared with. + * @param target the string that is to be compared with the source string. + * @param status possible error code + * @return Returns an enum value. UCOL_GREATER if source is greater + * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less + * than target + * @stable ICU 2.6 + */ + virtual UCollationResult compare(const UnicodeString& source, + const UnicodeString& target, + UErrorCode &status) const = 0; + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * Does the same thing as compare but limits the comparison to a specified + * length + * @param source the source string to be compared with. + * @param target the string that is to be compared with the source string. + * @param length the length the comparison is limited to + * @return Returns a byte value. GREATER if source (up to the specified + * length) is greater than target; EQUAL if source (up to specified + * length) is equal to target; LESS if source (up to the specified + * length) is less than target. + * @deprecated ICU 2.6 use the overload with UErrorCode & + */ + virtual EComparisonResult compare(const UnicodeString& source, + const UnicodeString& target, + int32_t length) const; +#endif // U_FORCE_HIDE_DEPRECATED_API + + /** + * Does the same thing as compare but limits the comparison to a specified + * length + * @param source the source string to be compared with. + * @param target the string that is to be compared with the source string. + * @param length the length the comparison is limited to + * @param status possible error code + * @return Returns an enum value. UCOL_GREATER if source (up to the specified + * length) is greater than target; UCOL_EQUAL if source (up to specified + * length) is equal to target; UCOL_LESS if source (up to the specified + * length) is less than target. + * @stable ICU 2.6 + */ + virtual UCollationResult compare(const UnicodeString& source, + const UnicodeString& target, + int32_t length, + UErrorCode &status) const = 0; + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * The comparison function compares the character data stored in two + * different string arrays. Returns information about whether a string array + * is less than, greater than or equal to another string array. + *

Example of use: + *

+     * .       char16_t ABC[] = {0x41, 0x42, 0x43, 0};  // = "ABC"
+     * .       char16_t abc[] = {0x61, 0x62, 0x63, 0};  // = "abc"
+     * .       UErrorCode status = U_ZERO_ERROR;
+     * .       Collator *myCollation =
+     * .                         Collator::createInstance(Locale::getUS(), status);
+     * .       if (U_FAILURE(status)) return;
+     * .       myCollation->setStrength(Collator::PRIMARY);
+     * .       // result would be Collator::EQUAL ("abc" == "ABC")
+     * .       // (no primary difference between "abc" and "ABC")
+     * .       Collator::EComparisonResult result =
+     * .                             myCollation->compare(abc, 3, ABC, 3);
+     * .       myCollation->setStrength(Collator::TERTIARY);
+     * .       // result would be Collator::LESS ("abc" <<< "ABC")
+     * .       // (with tertiary difference between "abc" and "ABC")
+     * .       result = myCollation->compare(abc, 3, ABC, 3);
+     * 
+ * @param source the source string array to be compared with. + * @param sourceLength the length of the source string array. If this value + * is equal to -1, the string array is null-terminated. + * @param target the string that is to be compared with the source string. + * @param targetLength the length of the target string array. If this value + * is equal to -1, the string array is null-terminated. + * @return Returns a byte value. GREATER if source is greater than target; + * EQUAL if source is equal to target; LESS if source is less than + * target + * @deprecated ICU 2.6 use the overload with UErrorCode & + */ + virtual EComparisonResult compare(const char16_t* source, int32_t sourceLength, + const char16_t* target, int32_t targetLength) + const; +#endif // U_FORCE_HIDE_DEPRECATED_API + + /** + * The comparison function compares the character data stored in two + * different string arrays. Returns information about whether a string array + * is less than, greater than or equal to another string array. + * @param source the source string array to be compared with. + * @param sourceLength the length of the source string array. If this value + * is equal to -1, the string array is null-terminated. + * @param target the string that is to be compared with the source string. + * @param targetLength the length of the target string array. If this value + * is equal to -1, the string array is null-terminated. + * @param status possible error code + * @return Returns an enum value. UCOL_GREATER if source is greater + * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less + * than target + * @stable ICU 2.6 + */ + virtual UCollationResult compare(const char16_t* source, int32_t sourceLength, + const char16_t* target, int32_t targetLength, + UErrorCode &status) const = 0; + + /** + * Compares two strings using the Collator. + * Returns whether the first one compares less than/equal to/greater than + * the second one. + * This version takes UCharIterator input. + * @param sIter the first ("source") string iterator + * @param tIter the second ("target") string iterator + * @param status ICU status + * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER + * @stable ICU 4.2 + */ + virtual UCollationResult compare(UCharIterator &sIter, + UCharIterator &tIter, + UErrorCode &status) const; + + /** + * Compares two UTF-8 strings using the Collator. + * Returns whether the first one compares less than/equal to/greater than + * the second one. + * This version takes UTF-8 input. + * Note that a StringPiece can be implicitly constructed + * from a std::string or a NUL-terminated const char * string. + * @param source the first UTF-8 string + * @param target the second UTF-8 string + * @param status ICU status + * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER + * @stable ICU 4.2 + */ + virtual UCollationResult compareUTF8(const StringPiece &source, + const StringPiece &target, + UErrorCode &status) const; + + /** + * Transforms the string into a series of characters that can be compared + * with CollationKey::compareTo. It is not possible to restore the original + * string from the chars in the sort key. + *

Use CollationKey::equals or CollationKey::compare to compare the + * generated sort keys. + * If the source string is null, a null collation key will be returned. + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * + * @param source the source string to be transformed into a sort key. + * @param key the collation key to be filled in + * @param status the error code status. + * @return the collation key of the string based on the collation rules. + * @see CollationKey#compare + * @stable ICU 2.0 + */ + virtual CollationKey& getCollationKey(const UnicodeString& source, + CollationKey& key, + UErrorCode& status) const = 0; + + /** + * Transforms the string into a series of characters that can be compared + * with CollationKey::compareTo. It is not possible to restore the original + * string from the chars in the sort key. + *

Use CollationKey::equals or CollationKey::compare to compare the + * generated sort keys. + *

If the source string is null, a null collation key will be returned. + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * + * @param source the source string to be transformed into a sort key. + * @param sourceLength length of the collation key + * @param key the collation key to be filled in + * @param status the error code status. + * @return the collation key of the string based on the collation rules. + * @see CollationKey#compare + * @stable ICU 2.0 + */ + virtual CollationKey& getCollationKey(const char16_t*source, + int32_t sourceLength, + CollationKey& key, + UErrorCode& status) const = 0; + /** + * Generates the hash code for the collation object + * @stable ICU 2.0 + */ + virtual int32_t hashCode() const = 0; + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * Gets the locale of the Collator + * + * @param type can be either requested, valid or actual locale. For more + * information see the definition of ULocDataLocaleType in + * uloc.h + * @param status the error code status. + * @return locale where the collation data lives. If the collator + * was instantiated from rules, locale is empty. + * @deprecated ICU 2.8 This API is under consideration for revision + * in ICU 3.0. + */ + virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const = 0; +#endif // U_FORCE_HIDE_DEPRECATED_API + + /** + * Convenience method for comparing two strings based on the collation rules. + * @param source the source string to be compared with. + * @param target the target string to be compared with. + * @return true if the first string is greater than the second one, + * according to the collation rules. false, otherwise. + * @see Collator#compare + * @stable ICU 2.0 + */ + UBool greater(const UnicodeString& source, const UnicodeString& target) + const; + + /** + * Convenience method for comparing two strings based on the collation rules. + * @param source the source string to be compared with. + * @param target the target string to be compared with. + * @return true if the first string is greater than or equal to the second + * one, according to the collation rules. false, otherwise. + * @see Collator#compare + * @stable ICU 2.0 + */ + UBool greaterOrEqual(const UnicodeString& source, + const UnicodeString& target) const; + + /** + * Convenience method for comparing two strings based on the collation rules. + * @param source the source string to be compared with. + * @param target the target string to be compared with. + * @return true if the strings are equal according to the collation rules. + * false, otherwise. + * @see Collator#compare + * @stable ICU 2.0 + */ + UBool equals(const UnicodeString& source, const UnicodeString& target) const; + +#ifndef U_HIDE_DRAFT_API + + /** + * Creates a comparison function object that uses this collator. + * Like std::equal_to but uses the collator instead of operator==. + * @draft ICU 76 + */ + inline auto equal_to() const { return Predicate(*this); } + + /** + * Creates a comparison function object that uses this collator. + * Like std::greater but uses the collator instead of operator>. + * @draft ICU 76 + */ + inline auto greater() const { return Predicate(*this); } + + /** + * Creates a comparison function object that uses this collator. + * Like std::less but uses the collator instead of operator<. + * @draft ICU 76 + */ + inline auto less() const { return Predicate(*this); } + + /** + * Creates a comparison function object that uses this collator. + * Like std::not_equal_to but uses the collator instead of operator!=. + * @draft ICU 76 + */ + inline auto not_equal_to() const { return Predicate(*this); } + + /** + * Creates a comparison function object that uses this collator. + * Like std::greater_equal but uses the collator instead of operator>=. + * @draft ICU 76 + */ + inline auto greater_equal() const { return Predicate(*this); } + + /** + * Creates a comparison function object that uses this collator. + * Like std::less_equal but uses the collator instead of operator<=. + * @draft ICU 76 + */ + inline auto less_equal() const { return Predicate(*this); } + +#endif // U_HIDE_DRAFT_API + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * Determines the minimum strength that will be used in comparison or + * transformation. + *

E.g. with strength == SECONDARY, the tertiary difference is ignored + *

E.g. with strength == PRIMARY, the secondary and tertiary difference + * are ignored. + * @return the current comparison level. + * @see Collator#setStrength + * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead + */ + virtual ECollationStrength getStrength() const; + + /** + * Sets the minimum strength to be used in comparison or transformation. + *

Example of use: + *

+     *  \code
+     *  UErrorCode status = U_ZERO_ERROR;
+     *  Collator*myCollation = Collator::createInstance(Locale::getUS(), status);
+     *  if (U_FAILURE(status)) return;
+     *  myCollation->setStrength(Collator::PRIMARY);
+     *  // result will be "abc" == "ABC"
+     *  // tertiary differences will be ignored
+     *  Collator::ComparisonResult result = myCollation->compare("abc", "ABC");
+     * \endcode
+     * 
+ * @see Collator#getStrength + * @param newStrength the new comparison level. + * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead + */ + virtual void setStrength(ECollationStrength newStrength); +#endif // U_FORCE_HIDE_DEPRECATED_API + + /** + * Retrieves the reordering codes for this collator. + * @param dest The array to fill with the script ordering. + * @param destCapacity The length of dest. If it is 0, then dest may be nullptr and the function + * will only return the length of the result without writing any codes (pre-flighting). + * @param status A reference to an error code value, which must not indicate + * a failure before the function call. + * @return The length of the script ordering array. + * @see ucol_setReorderCodes + * @see Collator#getEquivalentReorderCodes + * @see Collator#setReorderCodes + * @see UScriptCode + * @see UColReorderCode + * @stable ICU 4.8 + */ + virtual int32_t getReorderCodes(int32_t *dest, + int32_t destCapacity, + UErrorCode& status) const; + + /** + * Sets the ordering of scripts for this collator. + * + *

The reordering codes are a combination of script codes and reorder codes. + * @param reorderCodes An array of script codes in the new order. This can be nullptr if the + * length is also set to 0. An empty array will clear any reordering codes on the collator. + * @param reorderCodesLength The length of reorderCodes. + * @param status error code + * @see ucol_setReorderCodes + * @see Collator#getReorderCodes + * @see Collator#getEquivalentReorderCodes + * @see UScriptCode + * @see UColReorderCode + * @stable ICU 4.8 + */ + virtual void setReorderCodes(const int32_t* reorderCodes, + int32_t reorderCodesLength, + UErrorCode& status) ; + + /** + * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder + * codes will be grouped and must reorder together. + * Beginning with ICU 55, scripts only reorder together if they are primary-equal, + * for example Hiragana and Katakana. + * + * @param reorderCode The reorder code to determine equivalence for. + * @param dest The array to fill with the script equivalence reordering codes. + * @param destCapacity The length of dest. If it is 0, then dest may be nullptr and the + * function will only return the length of the result without writing any codes (pre-flighting). + * @param status A reference to an error code value, which must not indicate + * a failure before the function call. + * @return The length of the of the reordering code equivalence array. + * @see ucol_setReorderCodes + * @see Collator#getReorderCodes + * @see Collator#setReorderCodes + * @see UScriptCode + * @see UColReorderCode + * @stable ICU 4.8 + */ + static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode, + int32_t* dest, + int32_t destCapacity, + UErrorCode& status); + + /** + * Get name of the object for the desired Locale, in the desired language + * @param objectLocale must be from getAvailableLocales + * @param displayLocale specifies the desired locale for output + * @param name the fill-in parameter of the return value + * @return display-able name of the object for the object locale in the + * desired language + * @stable ICU 2.0 + */ + static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, + const Locale& displayLocale, + UnicodeString& name); + + /** + * Get name of the object for the desired Locale, in the language of the + * default locale. + * @param objectLocale must be from getAvailableLocales + * @param name the fill-in parameter of the return value + * @return name of the object for the desired locale in the default language + * @stable ICU 2.0 + */ + static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, + UnicodeString& name); + + /** + * Get the set of Locales for which Collations are installed. + * + *

Note this does not include locales supported by registered collators. + * If collators might have been registered, use the overload of getAvailableLocales + * that returns a StringEnumeration.

+ * + * @param count the output parameter of number of elements in the locale list + * @return the list of available locales for which collations are installed + * @stable ICU 2.0 + */ + static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); + + /** + * Return a StringEnumeration over the locales available at the time of the call, + * including registered locales. If a severe error occurs (such as out of memory + * condition) this will return null. If there is no locale data, an empty enumeration + * will be returned. + * @return a StringEnumeration over the locales available at the time of the call + * @stable ICU 2.6 + */ + static StringEnumeration* U_EXPORT2 getAvailableLocales(); + + /** + * Create a string enumerator of all possible keywords that are relevant to + * collation. At this point, the only recognized keyword for this + * service is "collation". + * @param status input-output error code + * @return a string enumeration over locale strings. The caller is + * responsible for closing the result. + * @stable ICU 3.0 + */ + static StringEnumeration* U_EXPORT2 getKeywords(UErrorCode& status); + + /** + * Given a keyword, create a string enumeration of all values + * for that keyword that are currently in use. + * @param keyword a particular keyword as enumerated by + * ucol_getKeywords. If any other keyword is passed in, status is set + * to U_ILLEGAL_ARGUMENT_ERROR. + * @param status input-output error code + * @return a string enumeration over collation keyword values, or nullptr + * upon error. The caller is responsible for deleting the result. + * @stable ICU 3.0 + */ + static StringEnumeration* U_EXPORT2 getKeywordValues(const char *keyword, UErrorCode& status); + + /** + * Given a key and a locale, returns an array of string values in a preferred + * order that would make a difference. These are all and only those values where + * the open (creation) of the service with the locale formed from the input locale + * plus input keyword and that value has different behavior than creation with the + * input locale alone. + * @param keyword one of the keys supported by this service. For now, only + * "collation" is supported. + * @param locale the locale + * @param commonlyUsed if set to true it will return only commonly used values + * with the given locale in preferred order. Otherwise, + * it will return all the available values for the locale. + * @param status ICU status + * @return a string enumeration over keyword values for the given key and the locale. + * @stable ICU 4.2 + */ + static StringEnumeration* U_EXPORT2 getKeywordValuesForLocale(const char* keyword, const Locale& locale, + UBool commonlyUsed, UErrorCode& status); + + /** + * Return the functionally equivalent locale for the given + * requested locale, with respect to given keyword, for the + * collation service. If two locales return the same result, then + * collators instantiated for these locales will behave + * equivalently. The converse is not always true; two collators + * may in fact be equivalent, but return different results, due to + * internal details. The return result has no other meaning than + * that stated above, and implies nothing as to the relationship + * between the two locales. This is intended for use by + * applications who wish to cache collators, or otherwise reuse + * collators when possible. The functional equivalent may change + * over time. For more information, please see the + * Locales and Services section of the ICU User Guide. + * @param keyword a particular keyword as enumerated by + * ucol_getKeywords. + * @param locale the requested locale + * @param isAvailable reference to a fillin parameter that + * indicates whether the requested locale was 'available' to the + * collation service. A locale is defined as 'available' if it + * physically exists within the collation locale data. + * @param status reference to input-output error code + * @return the functionally equivalent collation locale, or the root + * locale upon error. + * @stable ICU 3.0 + */ + static Locale U_EXPORT2 getFunctionalEquivalent(const char* keyword, const Locale& locale, + UBool& isAvailable, UErrorCode& status); + +#if !UCONFIG_NO_SERVICE + /** + * Register a new Collator. The collator will be adopted. + * Because ICU may choose to cache collators internally, this must be + * called at application startup, prior to any calls to + * Collator::createInstance to avoid undefined behavior. + * @param toAdopt the Collator instance to be adopted + * @param locale the locale with which the collator will be associated + * @param status the in/out status code, no special meanings are assigned + * @return a registry key that can be used to unregister this collator + * @stable ICU 2.6 + */ + static URegistryKey U_EXPORT2 registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status); + + /** + * Register a new CollatorFactory. The factory will be adopted. + * Because ICU may choose to cache collators internally, this must be + * called at application startup, prior to any calls to + * Collator::createInstance to avoid undefined behavior. + * @param toAdopt the CollatorFactory instance to be adopted + * @param status the in/out status code, no special meanings are assigned + * @return a registry key that can be used to unregister this collator + * @stable ICU 2.6 + */ + static URegistryKey U_EXPORT2 registerFactory(CollatorFactory* toAdopt, UErrorCode& status); + + /** + * Unregister a previously-registered Collator or CollatorFactory + * using the key returned from the register call. Key becomes + * invalid after a successful call and should not be used again. + * The object corresponding to the key will be deleted. + * Because ICU may choose to cache collators internally, this should + * be called during application shutdown, after all calls to + * Collator::createInstance to avoid undefined behavior. + * @param key the registry key returned by a previous call to registerInstance + * @param status the in/out status code, no special meanings are assigned + * @return true if the collator for the key was successfully unregistered + * @stable ICU 2.6 + */ + static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status); +#endif /* UCONFIG_NO_SERVICE */ + + /** + * Gets the version information for a Collator. + * @param info the version # information, the result will be filled in + * @stable ICU 2.0 + */ + virtual void getVersion(UVersionInfo info) const = 0; + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual method. + * This method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * @return The class ID for this object. All objects of a given class have + * the same class ID. Objects of other classes have different class + * IDs. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID() const override = 0; + + /** + * Universal attribute setter + * @param attr attribute type + * @param value attribute value + * @param status to indicate whether the operation went on smoothly or + * there were errors + * @stable ICU 2.2 + */ + virtual void setAttribute(UColAttribute attr, UColAttributeValue value, + UErrorCode &status) = 0; + + /** + * Universal attribute getter + * @param attr attribute type + * @param status to indicate whether the operation went on smoothly or + * there were errors + * @return attribute value + * @stable ICU 2.2 + */ + virtual UColAttributeValue getAttribute(UColAttribute attr, + UErrorCode &status) const = 0; + + /** + * Sets the variable top to the top of the specified reordering group. + * The variable top determines the highest-sorting character + * which is affected by UCOL_ALTERNATE_HANDLING. + * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect. + * + * The base class implementation sets U_UNSUPPORTED_ERROR. + * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, + * UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY; + * or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return *this + * @see getMaxVariable + * @stable ICU 53 + */ + virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode); + + /** + * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING. + * + * The base class implementation returns UCOL_REORDER_CODE_PUNCTUATION. + * @return the maximum variable reordering group. + * @see setMaxVariable + * @stable ICU 53 + */ + virtual UColReorderCode getMaxVariable() const; + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * Sets the variable top to the primary weight of the specified string. + * + * Beginning with ICU 53, the variable top is pinned to + * the top of one of the supported reordering groups, + * and it must not be beyond the last of those groups. + * See setMaxVariable(). + * @param varTop one or more (if contraction) char16_ts to which the variable top should be set + * @param len length of variable top string. If -1 it is considered to be zero terminated. + * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
+ * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
+ * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond + * the last reordering group supported by setMaxVariable() + * @return variable top primary weight + * @deprecated ICU 53 Call setMaxVariable() instead. + */ + virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status) = 0; + + /** + * Sets the variable top to the primary weight of the specified string. + * + * Beginning with ICU 53, the variable top is pinned to + * the top of one of the supported reordering groups, + * and it must not be beyond the last of those groups. + * See setMaxVariable(). + * @param varTop a UnicodeString size 1 or more (if contraction) of char16_ts to which the variable top should be set + * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
+ * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
+ * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond + * the last reordering group supported by setMaxVariable() + * @return variable top primary weight + * @deprecated ICU 53 Call setMaxVariable() instead. + */ + virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status) = 0; + + /** + * Sets the variable top to the specified primary weight. + * + * Beginning with ICU 53, the variable top is pinned to + * the top of one of the supported reordering groups, + * and it must not be beyond the last of those groups. + * See setMaxVariable(). + * @param varTop primary weight, as returned by setVariableTop or ucol_getVariableTop + * @param status error code + * @deprecated ICU 53 Call setMaxVariable() instead. + */ + virtual void setVariableTop(uint32_t varTop, UErrorCode &status) = 0; +#endif // U_FORCE_HIDE_DEPRECATED_API + + /** + * Gets the variable top value of a Collator. + * @param status error code (not changed by function). If error code is set, the return value is undefined. + * @return the variable top primary weight + * @see getMaxVariable + * @stable ICU 2.0 + */ + virtual uint32_t getVariableTop(UErrorCode &status) const = 0; + + /** + * Get a UnicodeSet that contains all the characters and sequences + * tailored in this collator. + * @param status error code of the operation + * @return a pointer to a UnicodeSet object containing all the + * code points and sequences that may sort differently than + * in the root collator. The object must be disposed of by using delete + * @stable ICU 2.4 + */ + virtual UnicodeSet *getTailoredSet(UErrorCode &status) const; + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * Same as clone(). + * The base class implementation simply calls clone(). + * @return a copy of this object, owned by the caller + * @see clone() + * @deprecated ICU 50 no need to have two methods for cloning + */ + virtual Collator* safeClone() const; +#endif // U_FORCE_HIDE_DEPRECATED_API + + /** + * Get the sort key as an array of bytes from a UnicodeString. + * Sort key byte arrays are zero-terminated and can be compared using + * strcmp(). + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * + * @param source string to be processed. + * @param result buffer to store result in. If nullptr, number of bytes needed + * will be returned. + * @param resultLength length of the result buffer. If if not enough the + * buffer will be filled to capacity. + * @return Number of bytes needed for storing the sort key + * @stable ICU 2.2 + */ + virtual int32_t getSortKey(const UnicodeString& source, + uint8_t* result, + int32_t resultLength) const = 0; + + /** + * Get the sort key as an array of bytes from a char16_t buffer. + * Sort key byte arrays are zero-terminated and can be compared using + * strcmp(). + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * + * @param source string to be processed. + * @param sourceLength length of string to be processed. + * If -1, the string is 0 terminated and length will be decided by the + * function. + * @param result buffer to store result in. If nullptr, number of bytes needed + * will be returned. + * @param resultLength length of the result buffer. If if not enough the + * buffer will be filled to capacity. + * @return Number of bytes needed for storing the sort key + * @stable ICU 2.2 + */ + virtual int32_t getSortKey(const char16_t*source, int32_t sourceLength, + uint8_t*result, int32_t resultLength) const = 0; + + /** + * Produce a bound for a given sortkey and a number of levels. + * Return value is always the number of bytes needed, regardless of + * whether the result buffer was big enough or even valid.
+ * Resulting bounds can be used to produce a range of strings that are + * between upper and lower bounds. For example, if bounds are produced + * for a sortkey of string "smith", strings between upper and lower + * bounds with one level would include "Smith", "SMITH", "sMiTh".
+ * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER + * is produced, strings matched would be as above. However, if bound + * produced using UCOL_BOUND_UPPER_LONG is used, the above example will + * also match "Smithsonian" and similar.
+ * For more on usage, see example in cintltst/capitst.c in procedure + * TestBounds. + * Sort keys may be compared using strcmp. + * @param source The source sortkey. + * @param sourceLength The length of source, or -1 if null-terminated. + * (If an unmodified sortkey is passed, it is always null + * terminated). + * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which + * produces a lower inclusive bound, UCOL_BOUND_UPPER, that + * produces upper bound that matches strings of the same length + * or UCOL_BOUND_UPPER_LONG that matches strings that have the + * same starting substring as the source string. + * @param noOfLevels Number of levels required in the resulting bound (for most + * uses, the recommended value is 1). See users guide for + * explanation on number of levels a sortkey can have. + * @param result A pointer to a buffer to receive the resulting sortkey. + * @param resultLength The maximum size of result. + * @param status Used for returning error code if something went wrong. If the + * number of levels requested is higher than the number of levels + * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is + * issued. + * @return The size needed to fully store the bound. + * @see ucol_keyHashCode + * @stable ICU 2.1 + */ + static int32_t U_EXPORT2 getBound(const uint8_t *source, + int32_t sourceLength, + UColBoundMode boundType, + uint32_t noOfLevels, + uint8_t *result, + int32_t resultLength, + UErrorCode &status); + + +protected: + + // Collator protected constructors ------------------------------------- + + /** + * Default constructor. + * Constructor is different from the old default Collator constructor. + * The task for determining the default collation strength and normalization + * mode is left to the child class. + * @stable ICU 2.0 + */ + Collator(); + +#ifndef U_HIDE_DEPRECATED_API + /** + * Constructor. + * Empty constructor, does not handle the arguments. + * This constructor is done for backward compatibility with 1.7 and 1.8. + * The task for handling the argument collation strength and normalization + * mode is left to the child class. + * @param collationStrength collation strength + * @param decompositionMode + * @deprecated ICU 2.4. Subclasses should use the default constructor + * instead and handle the strength and normalization mode themselves. + */ + Collator(UCollationStrength collationStrength, + UNormalizationMode decompositionMode); +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Copy constructor. + * @param other Collator object to be copied from + * @stable ICU 2.0 + */ + Collator(const Collator& other); + +public: + /** + * Used internally by registration to define the requested and valid locales. + * @param requestedLocale the requested locale + * @param validLocale the valid locale + * @param actualLocale the actual locale + * @internal + */ + virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale); + + /** Get the short definition string for a collator. This internal API harvests the collator's + * locale and the attribute set and produces a string that can be used for opening + * a collator with the same attributes using the ucol_openFromShortString API. + * This string will be normalized. + * The structure and the syntax of the string is defined in the "Naming collators" + * section of the users guide: + * https://unicode-org.github.io/icu/userguide/collation/concepts#collator-naming-scheme + * This function supports preflighting. + * + * This is internal, and intended to be used with delegate converters. + * + * @param locale a locale that will appear as a collators locale in the resulting + * short string definition. If nullptr, the locale will be harvested + * from the collator. + * @param buffer space to hold the resulting string + * @param capacity capacity of the buffer + * @param status for returning errors. All the preflighting errors are featured + * @return length of the resulting string + * @see ucol_openFromShortString + * @see ucol_normalizeShortDefinitionString + * @see ucol_getShortDefinitionString + * @internal + */ + virtual int32_t internalGetShortDefinitionString(const char *locale, + char *buffer, + int32_t capacity, + UErrorCode &status) const; + + /** + * Implements ucol_strcollUTF8(). + * @internal + */ + virtual UCollationResult internalCompareUTF8( + const char *left, int32_t leftLength, + const char *right, int32_t rightLength, + UErrorCode &errorCode) const; + + /** + * Implements ucol_nextSortKeyPart(). + * @internal + */ + virtual int32_t + internalNextSortKeyPart( + UCharIterator *iter, uint32_t state[2], + uint8_t *dest, int32_t count, UErrorCode &errorCode) const; + +#ifndef U_HIDE_INTERNAL_API + /** @internal */ + static inline Collator *fromUCollator(UCollator *uc) { + return reinterpret_cast(uc); + } + /** @internal */ + static inline const Collator *fromUCollator(const UCollator *uc) { + return reinterpret_cast(uc); + } + /** @internal */ + inline UCollator *toUCollator() { + return reinterpret_cast(this); + } + /** @internal */ + inline const UCollator *toUCollator() const { + return reinterpret_cast(this); + } +#endif // U_HIDE_INTERNAL_API + +private: + /** + * Assignment operator. Private for now. + */ + Collator& operator=(const Collator& other) = delete; + + friend class CFactory; + friend class SimpleCFactory; + friend class ICUCollatorFactory; + friend class ICUCollatorService; + static Collator* makeInstance(const Locale& desiredLocale, + UErrorCode& status); + +#ifndef U_HIDE_DRAFT_API + /** + * Function object for performing comparisons using a Collator. + * @internal + */ + template