replicant-frameworks_native/include/utils/AndroidUnicode.h

/*
 * Copyright (C) 2006 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//

#ifndef ANDROID_UNICODE_H
#define ANDROID_UNICODE_H

#include <stdint.h>
#include <sys/types.h>

#define REPLACEMENT_CHAR (0xFFFD)

// this part of code is copied from umachine.h under ICU
/**
 * Define UChar32 as a type for single Unicode code points.
 * UChar32 is a signed 32-bit integer (same as int32_t).
 *
 * The Unicode code point range is 0..0x10ffff.
 * All other values (negative or >=0x110000) are illegal as Unicode code points.
 * They may be used as sentinel values to indicate "done", "error"
 * or similar non-code point conditions.
 *
 * @stable ICU 2.4
 */
typedef int32_t UChar32;

namespace android {

    class Encoding;
    /**
     * \class Unicode
     *
     * Helper class for getting properties of Unicode characters. Characters
     * can have one of the types listed in CharType and each character can have the
     * directionality of Direction.
     */
    class Unicode
    {
    public:
        /**
         * Directions specified in the Unicode standard. These directions map directly
         * to java.lang.Character.
         */
        enum Direction {
            DIRECTIONALITY_UNDEFINED = -1,
            DIRECTIONALITY_LEFT_TO_RIGHT,
            DIRECTIONALITY_RIGHT_TO_LEFT,
            DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
            DIRECTIONALITY_EUROPEAN_NUMBER,
            DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
            DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
            DIRECTIONALITY_ARABIC_NUMBER,
            DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
            DIRECTIONALITY_NONSPACING_MARK,
            DIRECTIONALITY_BOUNDARY_NEUTRAL,
            DIRECTIONALITY_PARAGRAPH_SEPARATOR,
            DIRECTIONALITY_SEGMENT_SEPARATOR,
            DIRECTIONALITY_WHITESPACE,
            DIRECTIONALITY_OTHER_NEUTRALS,
            DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
            DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
            DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
            DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
            DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
        };

        /**
         * Character types as specified in the Unicode standard. These map directly to
         * java.lang.Character.
         */
        enum CharType {
            CHARTYPE_UNASSIGNED = 0,
            CHARTYPE_UPPERCASE_LETTER,
            CHARTYPE_LOWERCASE_LETTER,
            CHARTYPE_TITLECASE_LETTER,
            CHARTYPE_MODIFIER_LETTER,
            CHARTYPE_OTHER_LETTER,
            CHARTYPE_NON_SPACING_MARK,
            CHARTYPE_ENCLOSING_MARK,
            CHARTYPE_COMBINING_SPACING_MARK,
            CHARTYPE_DECIMAL_DIGIT_NUMBER,
            CHARTYPE_LETTER_NUMBER,
            CHARTYPE_OTHER_NUMBER,
            CHARTYPE_SPACE_SEPARATOR,
            CHARTYPE_LINE_SEPARATOR,
            CHARTYPE_PARAGRAPH_SEPARATOR,
            CHARTYPE_CONTROL,
            CHARTYPE_FORMAT,
            CHARTYPE_MISSING_VALUE_FOR_JAVA,    /* This is the mysterious missing 17 value from the java constants */
            CHARTYPE_PRIVATE_USE,
            CHARTYPE_SURROGATE,
            CHARTYPE_DASH_PUNCTUATION,
            CHARTYPE_START_PUNCTUATION,
            CHARTYPE_END_PUNCTUATION,
            CHARTYPE_CONNECTOR_PUNCTUATION,
            CHARTYPE_OTHER_PUNCTUATION,
            CHARTYPE_MATH_SYMBOL,
            CHARTYPE_CURRENCY_SYMBOL,
            CHARTYPE_MODIFIER_SYMBOL,
            CHARTYPE_OTHER_SYMBOL,
            CHARTYPE_INITIAL_QUOTE_PUNCTUATION,
            CHARTYPE_FINAL_QUOTE_PUNCTUATION
        };

        /**
         * Decomposition types as described by the unicode standard. These values map to
         * the same values in uchar.h in ICU.
         */
        enum DecompositionType {
            DECOMPOSITION_NONE = 0,
            DECOMPOSITION_CANONICAL,
            DECOMPOSITION_COMPAT,
            DECOMPOSITION_CIRCLE,
            DECOMPOSITION_FINAL,
            DECOMPOSITION_FONT,
            DECOMPOSITION_FRACTION,
            DECOMPOSITION_INITIAL,
            DECOMPOSITION_ISOLATED,
            DECOMPOSITION_MEDIAL,
            DECOMPOSITION_NARROW,
            DECOMPOSITION_NOBREAK,
            DECOMPOSITION_SMALL,
            DECOMPOSITION_SQUARE,
            DECOMPOSITION_SUB,
            DECOMPOSITION_SUPER,
            DECOMPOSITION_VERTICAL,
            DECOMPOSITION_WIDE
        };

        /**
         * Returns the packed data for java calls
         * @param c The unicode character.
         * @return The packed data for the character.
         *
         * Copied from java.lang.Character implementation:
         * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
         * F E D C B A 9 8 7 6 5 4 3 2 1 0 F E D C B A 9 8 7 6 5 4 3 2 1 0
         * 
         *                              31 types                 ---------
         *                   18 directionalities       ---------
         *                   2 mirroreds             -
         *                               -----------      56  toupper diffs
         *                   -----------                  48  tolower diffs
         *               ---                              4 totitlecase diffs
         * -------------                                 84 numeric values
         *     ---------                                 24 mirror char diffs
         */
        static uint32_t getPackedData(UChar32 c);
        
        /**
         * Get the Character type.
         * @param c The unicode character.
         * @return The character's type or CHARTYPE_UNASSIGNED if the character is invalid
         *         or has an unassigned class.
         */
        static CharType getType(UChar32 c);    

        /**
         * Get the Character's decomposition type.
         * @param c The unicode character.
         * @return The character's decomposition type or DECOMPOSITION_NONE is there 
         *         is no decomposition.
         */
        static DecompositionType getDecompositionType(UChar32 c);
        
        /**
         * Returns the digit value of a character or -1 if the character
         * is not within the specified radix.
         *
         * The digit value is computed for integer characters and letters
         * within the given radix. This function does not handle Roman Numerals,
         * fractions, or any other characters that may represent numbers.
         * 
         * @param c The unicode character
         * @param radix The intended radix.
         * @return The digit value or -1 if there is no digit value or if the value is outside the radix.
         */
        static int getDigitValue(UChar32 c, int radix = 10);

        /**
         * Return the numeric value of a character
         *
         * @param c The unicode character.
         * @return The numeric value of the character. -1 if the character has no numeric value, 
         *         -2 if the character has a numeric value that is not representable by an integer.
         */
        static int getNumericValue(UChar32 c);

        /**
         * Convert the character to lowercase
         * @param c The unicode character.
         * @return The lowercase character equivalent of c. If c does not have a lowercase equivalent,
         *         the original character is returned.
         */
        static UChar32 toLower(UChar32 c);
            
        /**
         * Convert the character to uppercase
         * @param c The unicode character.
         * @return The uppercase character equivalent of c. If c does not have an uppercase equivalent,
         *         the original character is returned.
         */
        static UChar32 toUpper(UChar32 c);
    
        /**
         * Get the directionality of the character.
         * @param c The unicode character.
         * @return The direction of the character or DIRECTIONALITY_UNDEFINED.
         */
        static Direction getDirectionality(UChar32 c);
            
        /**
         * Check if the character is a mirrored character. This means that the character
         * has an equivalent character that is the mirror image of itself.
         * @param c The unicode character.
         * @return True iff c has a mirror equivalent.
         */
        static bool isMirrored(UChar32 c);
         
        /**
         * Return the mirror of the given character.
         * @param c The unicode character.
         * @return The mirror equivalent of c. If c does not have a mirror equivalent,
         *         the original character is returned.
         * @see isMirrored
         */
        static UChar32 toMirror(UChar32 c);
        
        /**
         * Convert the character to title case.
         * @param c The unicode character.
         * @return The titlecase equivalent of c. If c does not have a titlecase equivalent,
         *         the original character is returned.
         */
        static UChar32 toTitle(UChar32 c);

   };

}

#endif
Initial Contribution 2008-10-21 14:00:00 +00:00			`/*`
			`* Copyright (C) 2006 The Android Open Source Project`
			`*`
			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
			`*/`

			`//`

			`#ifndef ANDROID_UNICODE_H`
			`#define ANDROID_UNICODE_H`

			`#include <stdint.h>`
			`#include <sys/types.h>`

			`#define REPLACEMENT_CHAR (0xFFFD)`

			`// this part of code is copied from umachine.h under ICU`
			`/**`
			`* Define UChar32 as a type for single Unicode code points.`
			`* UChar32 is a signed 32-bit integer (same as int32_t).`
			`*`
			`* The Unicode code point range is 0..0x10ffff.`
			`* All other values (negative or >=0x110000) are illegal as Unicode code points.`
			`* They may be used as sentinel values to indicate "done", "error"`
			`* or similar non-code point conditions.`
			`*`
			`* @stable ICU 2.4`
			`*/`
			`typedef int32_t UChar32;`

			`namespace android {`

			`class Encoding;`
			`/**`
			`* \class Unicode`
			`*`
			`* Helper class for getting properties of Unicode characters. Characters`
			`* can have one of the types listed in CharType and each character can have the`
			`* directionality of Direction.`
			`*/`
			`class Unicode`
			`{`
			`public:`
			`/**`
			`* Directions specified in the Unicode standard. These directions map directly`
			`* to java.lang.Character.`
			`*/`
			`enum Direction {`
			`DIRECTIONALITY_UNDEFINED = -1,`
			`DIRECTIONALITY_LEFT_TO_RIGHT,`
			`DIRECTIONALITY_RIGHT_TO_LEFT,`
			`DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,`
			`DIRECTIONALITY_EUROPEAN_NUMBER,`
			`DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,`
			`DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,`
			`DIRECTIONALITY_ARABIC_NUMBER,`
			`DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,`
			`DIRECTIONALITY_NONSPACING_MARK,`
			`DIRECTIONALITY_BOUNDARY_NEUTRAL,`
			`DIRECTIONALITY_PARAGRAPH_SEPARATOR,`
			`DIRECTIONALITY_SEGMENT_SEPARATOR,`
			`DIRECTIONALITY_WHITESPACE,`
			`DIRECTIONALITY_OTHER_NEUTRALS,`
			`DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,`
			`DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,`
			`DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,`
			`DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,`
			`DIRECTIONALITY_POP_DIRECTIONAL_FORMAT`
			`};`

			`/**`
			`* Character types as specified in the Unicode standard. These map directly to`
			`* java.lang.Character.`
			`*/`
			`enum CharType {`
			`CHARTYPE_UNASSIGNED = 0,`
			`CHARTYPE_UPPERCASE_LETTER,`
			`CHARTYPE_LOWERCASE_LETTER,`
			`CHARTYPE_TITLECASE_LETTER,`
			`CHARTYPE_MODIFIER_LETTER,`
			`CHARTYPE_OTHER_LETTER,`
			`CHARTYPE_NON_SPACING_MARK,`
			`CHARTYPE_ENCLOSING_MARK,`
			`CHARTYPE_COMBINING_SPACING_MARK,`
			`CHARTYPE_DECIMAL_DIGIT_NUMBER,`
			`CHARTYPE_LETTER_NUMBER,`
			`CHARTYPE_OTHER_NUMBER,`
			`CHARTYPE_SPACE_SEPARATOR,`
			`CHARTYPE_LINE_SEPARATOR,`
			`CHARTYPE_PARAGRAPH_SEPARATOR,`
			`CHARTYPE_CONTROL,`
			`CHARTYPE_FORMAT,`
			`CHARTYPE_MISSING_VALUE_FOR_JAVA, /* This is the mysterious missing 17 value from the java constants */`
			`CHARTYPE_PRIVATE_USE,`
			`CHARTYPE_SURROGATE,`
			`CHARTYPE_DASH_PUNCTUATION,`
			`CHARTYPE_START_PUNCTUATION,`
			`CHARTYPE_END_PUNCTUATION,`
			`CHARTYPE_CONNECTOR_PUNCTUATION,`
			`CHARTYPE_OTHER_PUNCTUATION,`
			`CHARTYPE_MATH_SYMBOL,`
			`CHARTYPE_CURRENCY_SYMBOL,`
			`CHARTYPE_MODIFIER_SYMBOL,`
			`CHARTYPE_OTHER_SYMBOL,`
			`CHARTYPE_INITIAL_QUOTE_PUNCTUATION,`
			`CHARTYPE_FINAL_QUOTE_PUNCTUATION`
			`};`

			`/**`
			`* Decomposition types as described by the unicode standard. These values map to`
			`* the same values in uchar.h in ICU.`
			`*/`
			`enum DecompositionType {`
			`DECOMPOSITION_NONE = 0,`
			`DECOMPOSITION_CANONICAL,`
			`DECOMPOSITION_COMPAT,`
			`DECOMPOSITION_CIRCLE,`
			`DECOMPOSITION_FINAL,`
			`DECOMPOSITION_FONT,`
			`DECOMPOSITION_FRACTION,`
			`DECOMPOSITION_INITIAL,`
			`DECOMPOSITION_ISOLATED,`
			`DECOMPOSITION_MEDIAL,`
			`DECOMPOSITION_NARROW,`
			`DECOMPOSITION_NOBREAK,`
			`DECOMPOSITION_SMALL,`
			`DECOMPOSITION_SQUARE,`
			`DECOMPOSITION_SUB,`
			`DECOMPOSITION_SUPER,`
			`DECOMPOSITION_VERTICAL,`
			`DECOMPOSITION_WIDE`
			`};`

			`/**`
			`* Returns the packed data for java calls`
			`* @param c The unicode character.`
			`* @return The packed data for the character.`
			`*`
			`* Copied from java.lang.Character implementation:`
			`* 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1`
			`* F E D C B A 9 8 7 6 5 4 3 2 1 0 F E D C B A 9 8 7 6 5 4 3 2 1 0`
			`*`
			`* 31 types ---------`
			`* 18 directionalities ---------`
			`* 2 mirroreds -`
			`* ----------- 56 toupper diffs`
			`* ----------- 48 tolower diffs`
			`* --- 4 totitlecase diffs`
			`* ------------- 84 numeric values`
			`* --------- 24 mirror char diffs`
			`*/`
			`static uint32_t getPackedData(UChar32 c);`

			`/**`
			`* Get the Character type.`
			`* @param c The unicode character.`
			`* @return The character's type or CHARTYPE_UNASSIGNED if the character is invalid`
			`* or has an unassigned class.`
			`*/`
			`static CharType getType(UChar32 c);`

			`/**`
			`* Get the Character's decomposition type.`
			`* @param c The unicode character.`
			`* @return The character's decomposition type or DECOMPOSITION_NONE is there`
			`* is no decomposition.`
			`*/`
			`static DecompositionType getDecompositionType(UChar32 c);`

			`/**`
			`* Returns the digit value of a character or -1 if the character`
			`* is not within the specified radix.`
			`*`
			`* The digit value is computed for integer characters and letters`
			`* within the given radix. This function does not handle Roman Numerals,`
			`* fractions, or any other characters that may represent numbers.`
			`*`
			`* @param c The unicode character`
			`* @param radix The intended radix.`
			`* @return The digit value or -1 if there is no digit value or if the value is outside the radix.`
			`*/`
			`static int getDigitValue(UChar32 c, int radix = 10);`

			`/**`
			`* Return the numeric value of a character`
			`*`
			`* @param c The unicode character.`
			`* @return The numeric value of the character. -1 if the character has no numeric value,`
			`* -2 if the character has a numeric value that is not representable by an integer.`
			`*/`
			`static int getNumericValue(UChar32 c);`

			`/**`
			`* Convert the character to lowercase`
			`* @param c The unicode character.`
			`* @return The lowercase character equivalent of c. If c does not have a lowercase equivalent,`
			`* the original character is returned.`
			`*/`
			`static UChar32 toLower(UChar32 c);`

			`/**`
			`* Convert the character to uppercase`
			`* @param c The unicode character.`
			`* @return The uppercase character equivalent of c. If c does not have an uppercase equivalent,`
			`* the original character is returned.`
			`*/`
			`static UChar32 toUpper(UChar32 c);`

			`/**`
			`* Get the directionality of the character.`
			`* @param c The unicode character.`
			`* @return The direction of the character or DIRECTIONALITY_UNDEFINED.`
			`*/`
			`static Direction getDirectionality(UChar32 c);`

			`/**`
			`* Check if the character is a mirrored character. This means that the character`
			`* has an equivalent character that is the mirror image of itself.`
			`* @param c The unicode character.`
			`* @return True iff c has a mirror equivalent.`
			`*/`
			`static bool isMirrored(UChar32 c);`

			`/**`
			`* Return the mirror of the given character.`
			`* @param c The unicode character.`
			`* @return The mirror equivalent of c. If c does not have a mirror equivalent,`
			`* the original character is returned.`
			`* @see isMirrored`
			`*/`
			`static UChar32 toMirror(UChar32 c);`

			`/**`
			`* Convert the character to title case.`
			`* @param c The unicode character.`
			`* @return The titlecase equivalent of c. If c does not have a titlecase equivalent,`
			`* the original character is returned.`
			`*/`
			`static UChar32 toTitle(UChar32 c);`

			`};`

			`}`

			`#endif`