GNU Classpath (0.17) | ||
Frames | No Frames |
1: /* java.lang.Character -- Wrapper class for char, and Unicode subsets 2: Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc. 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: 39: package java.lang; 40: 41: import gnu.java.lang.CharData; 42: 43: import java.io.Serializable; 44: 45: /** 46: * Wrapper class for the primitive char data type. In addition, this class 47: * allows one to retrieve property information and perform transformations 48: * on the 57,707 defined characters in the Unicode Standard, Version 3.0.0. 49: * java.lang.Character is designed to be very dynamic, and as such, it 50: * retrieves information on the Unicode character set from a separate 51: * database, gnu.java.lang.CharData, which can be easily upgraded. 52: * 53: * <p>For predicates, boundaries are used to describe 54: * the set of characters for which the method will return true. 55: * This syntax uses fairly normal regular expression notation. 56: * See 5.13 of the Unicode Standard, Version 3.0, for the 57: * boundary specification. 58: * 59: * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a> 60: * for more information on the Unicode Standard. 61: * 62: * @author Tom Tromey (tromey@cygnus.com) 63: * @author Paul N. Fisher 64: * @author Jochen Hoenicke 65: * @author Eric Blake (ebb9@email.byu.edu) 66: * @see CharData 67: * @since 1.0 68: * @status updated to 1.4 69: */ 70: public final class Character implements Serializable, Comparable 71: { 72: /** 73: * A subset of Unicode blocks. 74: * 75: * @author Paul N. Fisher 76: * @author Eric Blake (ebb9@email.byu.edu) 77: * @since 1.2 78: */ 79: public static class Subset 80: { 81: /** The name of the subset. */ 82: private final String name; 83: 84: /** 85: * Construct a new subset of characters. 86: * 87: * @param name the name of the subset 88: * @throws NullPointerException if name is null 89: */ 90: protected Subset(String name) 91: { 92: // Note that name.toString() is name, unless name was null. 93: this.name = name.toString(); 94: } 95: 96: /** 97: * Compares two Subsets for equality. This is <code>final</code>, and 98: * restricts the comparison on the <code>==</code> operator, so it returns 99: * true only for the same object. 100: * 101: * @param o the object to compare 102: * @return true if o is this 103: */ 104: public final boolean equals(Object o) 105: { 106: return o == this; 107: } 108: 109: /** 110: * Makes the original hashCode of Object final, to be consistent with 111: * equals. 112: * 113: * @return the hash code for this object 114: */ 115: public final int hashCode() 116: { 117: return super.hashCode(); 118: } 119: 120: /** 121: * Returns the name of the subset. 122: * 123: * @return the name 124: */ 125: public final String toString() 126: { 127: return name; 128: } 129: } // class Subset 130: 131: /** 132: * A family of character subsets in the Unicode specification. A character 133: * is in at most one of these blocks. 134: * 135: * This inner class was generated automatically from 136: * <code>doc/unicode/Block-3.txt</code>, by some perl scripts. 137: * This Unicode definition file can be found on the 138: * <a href="http://www.unicode.org">http://www.unicode.org</a> website. 139: * JDK 1.4 uses Unicode version 3.0.0. 140: * 141: * @author scripts/unicode-blocks.pl (written by Eric Blake) 142: * @since 1.2 143: */ 144: public static final class UnicodeBlock extends Subset 145: { 146: /** The start of the subset. */ 147: private final char start; 148: 149: /** The end of the subset. */ 150: private final char end; 151: 152: /** 153: * Constructor for strictly defined blocks. 154: * 155: * @param start the start character of the range 156: * @param end the end character of the range 157: * @param name the block name 158: */ 159: private UnicodeBlock(char start, char end, String name) 160: { 161: super(name); 162: this.start = start; 163: this.end = end; 164: } 165: 166: /** 167: * Returns the Unicode character block which a character belongs to. 168: * 169: * @param ch the character to look up 170: * @return the set it belongs to, or null if it is not in one 171: */ 172: public static UnicodeBlock of(char ch) 173: { 174: // Special case, since SPECIALS contains two ranges. 175: if (ch == '\uFEFF') 176: return SPECIALS; 177: // Simple binary search for the correct block. 178: int low = 0; 179: int hi = sets.length - 1; 180: while (low <= hi) 181: { 182: int mid = (low + hi) >> 1; 183: UnicodeBlock b = sets[mid]; 184: if (ch < b.start) 185: hi = mid - 1; 186: else if (ch > b.end) 187: low = mid + 1; 188: else 189: return b; 190: } 191: return null; 192: } 193: 194: /** 195: * Basic Latin. 196: * '\u0000' - '\u007F'. 197: */ 198: public static final UnicodeBlock BASIC_LATIN 199: = new UnicodeBlock('\u0000', '\u007F', 200: "BASIC_LATIN"); 201: 202: /** 203: * Latin-1 Supplement. 204: * '\u0080' - '\u00FF'. 205: */ 206: public static final UnicodeBlock LATIN_1_SUPPLEMENT 207: = new UnicodeBlock('\u0080', '\u00FF', 208: "LATIN_1_SUPPLEMENT"); 209: 210: /** 211: * Latin Extended-A. 212: * '\u0100' - '\u017F'. 213: */ 214: public static final UnicodeBlock LATIN_EXTENDED_A 215: = new UnicodeBlock('\u0100', '\u017F', 216: "LATIN_EXTENDED_A"); 217: 218: /** 219: * Latin Extended-B. 220: * '\u0180' - '\u024F'. 221: */ 222: public static final UnicodeBlock LATIN_EXTENDED_B 223: = new UnicodeBlock('\u0180', '\u024F', 224: "LATIN_EXTENDED_B"); 225: 226: /** 227: * IPA Extensions. 228: * '\u0250' - '\u02AF'. 229: */ 230: public static final UnicodeBlock IPA_EXTENSIONS 231: = new UnicodeBlock('\u0250', '\u02AF', 232: "IPA_EXTENSIONS"); 233: 234: /** 235: * Spacing Modifier Letters. 236: * '\u02B0' - '\u02FF'. 237: */ 238: public static final UnicodeBlock SPACING_MODIFIER_LETTERS 239: = new UnicodeBlock('\u02B0', '\u02FF', 240: "SPACING_MODIFIER_LETTERS"); 241: 242: /** 243: * Combining Diacritical Marks. 244: * '\u0300' - '\u036F'. 245: */ 246: public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS 247: = new UnicodeBlock('\u0300', '\u036F', 248: "COMBINING_DIACRITICAL_MARKS"); 249: 250: /** 251: * Greek. 252: * '\u0370' - '\u03FF'. 253: */ 254: public static final UnicodeBlock GREEK 255: = new UnicodeBlock('\u0370', '\u03FF', 256: "GREEK"); 257: 258: /** 259: * Cyrillic. 260: * '\u0400' - '\u04FF'. 261: */ 262: public static final UnicodeBlock CYRILLIC 263: = new UnicodeBlock('\u0400', '\u04FF', 264: "CYRILLIC"); 265: 266: /** 267: * Armenian. 268: * '\u0530' - '\u058F'. 269: */ 270: public static final UnicodeBlock ARMENIAN 271: = new UnicodeBlock('\u0530', '\u058F', 272: "ARMENIAN"); 273: 274: /** 275: * Hebrew. 276: * '\u0590' - '\u05FF'. 277: */ 278: public static final UnicodeBlock HEBREW 279: = new UnicodeBlock('\u0590', '\u05FF', 280: "HEBREW"); 281: 282: /** 283: * Arabic. 284: * '\u0600' - '\u06FF'. 285: */ 286: public static final UnicodeBlock ARABIC 287: = new UnicodeBlock('\u0600', '\u06FF', 288: "ARABIC"); 289: 290: /** 291: * Syriac. 292: * '\u0700' - '\u074F'. 293: * @since 1.4 294: */ 295: public static final UnicodeBlock SYRIAC 296: = new UnicodeBlock('\u0700', '\u074F', 297: "SYRIAC"); 298: 299: /** 300: * Thaana. 301: * '\u0780' - '\u07BF'. 302: * @since 1.4 303: */ 304: public static final UnicodeBlock THAANA 305: = new UnicodeBlock('\u0780', '\u07BF', 306: "THAANA"); 307: 308: /** 309: * Devanagari. 310: * '\u0900' - '\u097F'. 311: */ 312: public static final UnicodeBlock DEVANAGARI 313: = new UnicodeBlock('\u0900', '\u097F', 314: "DEVANAGARI"); 315: 316: /** 317: * Bengali. 318: * '\u0980' - '\u09FF'. 319: */ 320: public static final UnicodeBlock BENGALI 321: = new UnicodeBlock('\u0980', '\u09FF', 322: "BENGALI"); 323: 324: /** 325: * Gurmukhi. 326: * '\u0A00' - '\u0A7F'. 327: */ 328: public static final UnicodeBlock GURMUKHI 329: = new UnicodeBlock('\u0A00', '\u0A7F', 330: "GURMUKHI"); 331: 332: /** 333: * Gujarati. 334: * '\u0A80' - '\u0AFF'. 335: */ 336: public static final UnicodeBlock GUJARATI 337: = new UnicodeBlock('\u0A80', '\u0AFF', 338: "GUJARATI"); 339: 340: /** 341: * Oriya. 342: * '\u0B00' - '\u0B7F'. 343: */ 344: public static final UnicodeBlock ORIYA 345: = new UnicodeBlock('\u0B00', '\u0B7F', 346: "ORIYA"); 347: 348: /** 349: * Tamil. 350: * '\u0B80' - '\u0BFF'. 351: */ 352: public static final UnicodeBlock TAMIL 353: = new UnicodeBlock('\u0B80', '\u0BFF', 354: "TAMIL"); 355: 356: /** 357: * Telugu. 358: * '\u0C00' - '\u0C7F'. 359: */ 360: public static final UnicodeBlock TELUGU 361: = new UnicodeBlock('\u0C00', '\u0C7F', 362: "TELUGU"); 363: 364: /** 365: * Kannada. 366: * '\u0C80' - '\u0CFF'. 367: */ 368: public static final UnicodeBlock KANNADA 369: = new UnicodeBlock('\u0C80', '\u0CFF', 370: "KANNADA"); 371: 372: /** 373: * Malayalam. 374: * '\u0D00' - '\u0D7F'. 375: */ 376: public static final UnicodeBlock MALAYALAM 377: = new UnicodeBlock('\u0D00', '\u0D7F', 378: "MALAYALAM"); 379: 380: /** 381: * Sinhala. 382: * '\u0D80' - '\u0DFF'. 383: * @since 1.4 384: */ 385: public static final UnicodeBlock SINHALA 386: = new UnicodeBlock('\u0D80', '\u0DFF', 387: "SINHALA"); 388: 389: /** 390: * Thai. 391: * '\u0E00' - '\u0E7F'. 392: */ 393: public static final UnicodeBlock THAI 394: = new UnicodeBlock('\u0E00', '\u0E7F', 395: "THAI"); 396: 397: /** 398: * Lao. 399: * '\u0E80' - '\u0EFF'. 400: */ 401: public static final UnicodeBlock LAO 402: = new UnicodeBlock('\u0E80', '\u0EFF', 403: "LAO"); 404: 405: /** 406: * Tibetan. 407: * '\u0F00' - '\u0FFF'. 408: */ 409: public static final UnicodeBlock TIBETAN 410: = new UnicodeBlock('\u0F00', '\u0FFF', 411: "TIBETAN"); 412: 413: /** 414: * Myanmar. 415: * '\u1000' - '\u109F'. 416: * @since 1.4 417: */ 418: public static final UnicodeBlock MYANMAR 419: = new UnicodeBlock('\u1000', '\u109F', 420: "MYANMAR"); 421: 422: /** 423: * Georgian. 424: * '\u10A0' - '\u10FF'. 425: */ 426: public static final UnicodeBlock GEORGIAN 427: = new UnicodeBlock('\u10A0', '\u10FF', 428: "GEORGIAN"); 429: 430: /** 431: * Hangul Jamo. 432: * '\u1100' - '\u11FF'. 433: */ 434: public static final UnicodeBlock HANGUL_JAMO 435: = new UnicodeBlock('\u1100', '\u11FF', 436: "HANGUL_JAMO"); 437: 438: /** 439: * Ethiopic. 440: * '\u1200' - '\u137F'. 441: * @since 1.4 442: */ 443: public static final UnicodeBlock ETHIOPIC 444: = new UnicodeBlock('\u1200', '\u137F', 445: "ETHIOPIC"); 446: 447: /** 448: * Cherokee. 449: * '\u13A0' - '\u13FF'. 450: * @since 1.4 451: */ 452: public static final UnicodeBlock CHEROKEE 453: = new UnicodeBlock('\u13A0', '\u13FF', 454: "CHEROKEE"); 455: 456: /** 457: * Unified Canadian Aboriginal Syllabics. 458: * '\u1400' - '\u167F'. 459: * @since 1.4 460: */ 461: public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 462: = new UnicodeBlock('\u1400', '\u167F', 463: "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS"); 464: 465: /** 466: * Ogham. 467: * '\u1680' - '\u169F'. 468: * @since 1.4 469: */ 470: public static final UnicodeBlock OGHAM 471: = new UnicodeBlock('\u1680', '\u169F', 472: "OGHAM"); 473: 474: /** 475: * Runic. 476: * '\u16A0' - '\u16FF'. 477: * @since 1.4 478: */ 479: public static final UnicodeBlock RUNIC 480: = new UnicodeBlock('\u16A0', '\u16FF', 481: "RUNIC"); 482: 483: /** 484: * Khmer. 485: * '\u1780' - '\u17FF'. 486: * @since 1.4 487: */ 488: public static final UnicodeBlock KHMER 489: = new UnicodeBlock('\u1780', '\u17FF', 490: "KHMER"); 491: 492: /** 493: * Mongolian. 494: * '\u1800' - '\u18AF'. 495: * @since 1.4 496: */ 497: public static final UnicodeBlock MONGOLIAN 498: = new UnicodeBlock('\u1800', '\u18AF', 499: "MONGOLIAN"); 500: 501: /** 502: * Latin Extended Additional. 503: * '\u1E00' - '\u1EFF'. 504: */ 505: public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL 506: = new UnicodeBlock('\u1E00', '\u1EFF', 507: "LATIN_EXTENDED_ADDITIONAL"); 508: 509: /** 510: * Greek Extended. 511: * '\u1F00' - '\u1FFF'. 512: */ 513: public static final UnicodeBlock GREEK_EXTENDED 514: = new UnicodeBlock('\u1F00', '\u1FFF', 515: "GREEK_EXTENDED"); 516: 517: /** 518: * General Punctuation. 519: * '\u2000' - '\u206F'. 520: */ 521: public static final UnicodeBlock GENERAL_PUNCTUATION 522: = new UnicodeBlock('\u2000', '\u206F', 523: "GENERAL_PUNCTUATION"); 524: 525: /** 526: * Superscripts and Subscripts. 527: * '\u2070' - '\u209F'. 528: */ 529: public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 530: = new UnicodeBlock('\u2070', '\u209F', 531: "SUPERSCRIPTS_AND_SUBSCRIPTS"); 532: 533: /** 534: * Currency Symbols. 535: * '\u20A0' - '\u20CF'. 536: */ 537: public static final UnicodeBlock CURRENCY_SYMBOLS 538: = new UnicodeBlock('\u20A0', '\u20CF', 539: "CURRENCY_SYMBOLS"); 540: 541: /** 542: * Combining Marks for Symbols. 543: * '\u20D0' - '\u20FF'. 544: */ 545: public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 546: = new UnicodeBlock('\u20D0', '\u20FF', 547: "COMBINING_MARKS_FOR_SYMBOLS"); 548: 549: /** 550: * Letterlike Symbols. 551: * '\u2100' - '\u214F'. 552: */ 553: public static final UnicodeBlock LETTERLIKE_SYMBOLS 554: = new UnicodeBlock('\u2100', '\u214F', 555: "LETTERLIKE_SYMBOLS"); 556: 557: /** 558: * Number Forms. 559: * '\u2150' - '\u218F'. 560: */ 561: public static final UnicodeBlock NUMBER_FORMS 562: = new UnicodeBlock('\u2150', '\u218F', 563: "NUMBER_FORMS"); 564: 565: /** 566: * Arrows. 567: * '\u2190' - '\u21FF'. 568: */ 569: public static final UnicodeBlock ARROWS 570: = new UnicodeBlock('\u2190', '\u21FF', 571: "ARROWS"); 572: 573: /** 574: * Mathematical Operators. 575: * '\u2200' - '\u22FF'. 576: */ 577: public static final UnicodeBlock MATHEMATICAL_OPERATORS 578: = new UnicodeBlock('\u2200', '\u22FF', 579: "MATHEMATICAL_OPERATORS"); 580: 581: /** 582: * Miscellaneous Technical. 583: * '\u2300' - '\u23FF'. 584: */ 585: public static final UnicodeBlock MISCELLANEOUS_TECHNICAL 586: = new UnicodeBlock('\u2300', '\u23FF', 587: "MISCELLANEOUS_TECHNICAL"); 588: 589: /** 590: * Control Pictures. 591: * '\u2400' - '\u243F'. 592: */ 593: public static final UnicodeBlock CONTROL_PICTURES 594: = new UnicodeBlock('\u2400', '\u243F', 595: "CONTROL_PICTURES"); 596: 597: /** 598: * Optical Character Recognition. 599: * '\u2440' - '\u245F'. 600: */ 601: public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 602: = new UnicodeBlock('\u2440', '\u245F', 603: "OPTICAL_CHARACTER_RECOGNITION"); 604: 605: /** 606: * Enclosed Alphanumerics. 607: * '\u2460' - '\u24FF'. 608: */ 609: public static final UnicodeBlock ENCLOSED_ALPHANUMERICS 610: = new UnicodeBlock('\u2460', '\u24FF', 611: "ENCLOSED_ALPHANUMERICS"); 612: 613: /** 614: * Box Drawing. 615: * '\u2500' - '\u257F'. 616: */ 617: public static final UnicodeBlock BOX_DRAWING 618: = new UnicodeBlock('\u2500', '\u257F', 619: "BOX_DRAWING"); 620: 621: /** 622: * Block Elements. 623: * '\u2580' - '\u259F'. 624: */ 625: public static final UnicodeBlock BLOCK_ELEMENTS 626: = new UnicodeBlock('\u2580', '\u259F', 627: "BLOCK_ELEMENTS"); 628: 629: /** 630: * Geometric Shapes. 631: * '\u25A0' - '\u25FF'. 632: */ 633: public static final UnicodeBlock GEOMETRIC_SHAPES 634: = new UnicodeBlock('\u25A0', '\u25FF', 635: "GEOMETRIC_SHAPES"); 636: 637: /** 638: * Miscellaneous Symbols. 639: * '\u2600' - '\u26FF'. 640: */ 641: public static final UnicodeBlock MISCELLANEOUS_SYMBOLS 642: = new UnicodeBlock('\u2600', '\u26FF', 643: "MISCELLANEOUS_SYMBOLS"); 644: 645: /** 646: * Dingbats. 647: * '\u2700' - '\u27BF'. 648: */ 649: public static final UnicodeBlock DINGBATS 650: = new UnicodeBlock('\u2700', '\u27BF', 651: "DINGBATS"); 652: 653: /** 654: * Braille Patterns. 655: * '\u2800' - '\u28FF'. 656: * @since 1.4 657: */ 658: public static final UnicodeBlock BRAILLE_PATTERNS 659: = new UnicodeBlock('\u2800', '\u28FF', 660: "BRAILLE_PATTERNS"); 661: 662: /** 663: * CJK Radicals Supplement. 664: * '\u2E80' - '\u2EFF'. 665: * @since 1.4 666: */ 667: public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT 668: = new UnicodeBlock('\u2E80', '\u2EFF', 669: "CJK_RADICALS_SUPPLEMENT"); 670: 671: /** 672: * Kangxi Radicals. 673: * '\u2F00' - '\u2FDF'. 674: * @since 1.4 675: */ 676: public static final UnicodeBlock KANGXI_RADICALS 677: = new UnicodeBlock('\u2F00', '\u2FDF', 678: "KANGXI_RADICALS"); 679: 680: /** 681: * Ideographic Description Characters. 682: * '\u2FF0' - '\u2FFF'. 683: * @since 1.4 684: */ 685: public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 686: = new UnicodeBlock('\u2FF0', '\u2FFF', 687: "IDEOGRAPHIC_DESCRIPTION_CHARACTERS"); 688: 689: /** 690: * CJK Symbols and Punctuation. 691: * '\u3000' - '\u303F'. 692: */ 693: public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 694: = new UnicodeBlock('\u3000', '\u303F', 695: "CJK_SYMBOLS_AND_PUNCTUATION"); 696: 697: /** 698: * Hiragana. 699: * '\u3040' - '\u309F'. 700: */ 701: public static final UnicodeBlock HIRAGANA 702: = new UnicodeBlock('\u3040', '\u309F', 703: "HIRAGANA"); 704: 705: /** 706: * Katakana. 707: * '\u30A0' - '\u30FF'. 708: */ 709: public static final UnicodeBlock KATAKANA 710: = new UnicodeBlock('\u30A0', '\u30FF', 711: "KATAKANA"); 712: 713: /** 714: * Bopomofo. 715: * '\u3100' - '\u312F'. 716: */ 717: public static final UnicodeBlock BOPOMOFO 718: = new UnicodeBlock('\u3100', '\u312F', 719: "BOPOMOFO"); 720: 721: /** 722: * Hangul Compatibility Jamo. 723: * '\u3130' - '\u318F'. 724: */ 725: public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO 726: = new UnicodeBlock('\u3130', '\u318F', 727: "HANGUL_COMPATIBILITY_JAMO"); 728: 729: /** 730: * Kanbun. 731: * '\u3190' - '\u319F'. 732: */ 733: public static final UnicodeBlock KANBUN 734: = new UnicodeBlock('\u3190', '\u319F', 735: "KANBUN"); 736: 737: /** 738: * Bopomofo Extended. 739: * '\u31A0' - '\u31BF'. 740: * @since 1.4 741: */ 742: public static final UnicodeBlock BOPOMOFO_EXTENDED 743: = new UnicodeBlock('\u31A0', '\u31BF', 744: "BOPOMOFO_EXTENDED"); 745: 746: /** 747: * Enclosed CJK Letters and Months. 748: * '\u3200' - '\u32FF'. 749: */ 750: public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 751: = new UnicodeBlock('\u3200', '\u32FF', 752: "ENCLOSED_CJK_LETTERS_AND_MONTHS"); 753: 754: /** 755: * CJK Compatibility. 756: * '\u3300' - '\u33FF'. 757: */ 758: public static final UnicodeBlock CJK_COMPATIBILITY 759: = new UnicodeBlock('\u3300', '\u33FF', 760: "CJK_COMPATIBILITY"); 761: 762: /** 763: * CJK Unified Ideographs Extension A. 764: * '\u3400' - '\u4DB5'. 765: * @since 1.4 766: */ 767: public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 768: = new UnicodeBlock('\u3400', '\u4DB5', 769: "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A"); 770: 771: /** 772: * CJK Unified Ideographs. 773: * '\u4E00' - '\u9FFF'. 774: */ 775: public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 776: = new UnicodeBlock('\u4E00', '\u9FFF', 777: "CJK_UNIFIED_IDEOGRAPHS"); 778: 779: /** 780: * Yi Syllables. 781: * '\uA000' - '\uA48F'. 782: * @since 1.4 783: */ 784: public static final UnicodeBlock YI_SYLLABLES 785: = new UnicodeBlock('\uA000', '\uA48F', 786: "YI_SYLLABLES"); 787: 788: /** 789: * Yi Radicals. 790: * '\uA490' - '\uA4CF'. 791: * @since 1.4 792: */ 793: public static final UnicodeBlock YI_RADICALS 794: = new UnicodeBlock('\uA490', '\uA4CF', 795: "YI_RADICALS"); 796: 797: /** 798: * Hangul Syllables. 799: * '\uAC00' - '\uD7A3'. 800: */ 801: public static final UnicodeBlock HANGUL_SYLLABLES 802: = new UnicodeBlock('\uAC00', '\uD7A3', 803: "HANGUL_SYLLABLES"); 804: 805: /** 806: * Surrogates Area. 807: * '\uD800' - '\uDFFF'. 808: */ 809: public static final UnicodeBlock SURROGATES_AREA 810: = new UnicodeBlock('\uD800', '\uDFFF', 811: "SURROGATES_AREA"); 812: 813: /** 814: * Private Use Area. 815: * '\uE000' - '\uF8FF'. 816: */ 817: public static final UnicodeBlock PRIVATE_USE_AREA 818: = new UnicodeBlock('\uE000', '\uF8FF', 819: "PRIVATE_USE_AREA"); 820: 821: /** 822: * CJK Compatibility Ideographs. 823: * '\uF900' - '\uFAFF'. 824: */ 825: public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 826: = new UnicodeBlock('\uF900', '\uFAFF', 827: "CJK_COMPATIBILITY_IDEOGRAPHS"); 828: 829: /** 830: * Alphabetic Presentation Forms. 831: * '\uFB00' - '\uFB4F'. 832: */ 833: public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 834: = new UnicodeBlock('\uFB00', '\uFB4F', 835: "ALPHABETIC_PRESENTATION_FORMS"); 836: 837: /** 838: * Arabic Presentation Forms-A. 839: * '\uFB50' - '\uFDFF'. 840: */ 841: public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A 842: = new UnicodeBlock('\uFB50', '\uFDFF', 843: "ARABIC_PRESENTATION_FORMS_A"); 844: 845: /** 846: * Combining Half Marks. 847: * '\uFE20' - '\uFE2F'. 848: */ 849: public static final UnicodeBlock COMBINING_HALF_MARKS 850: = new UnicodeBlock('\uFE20', '\uFE2F', 851: "COMBINING_HALF_MARKS"); 852: 853: /** 854: * CJK Compatibility Forms. 855: * '\uFE30' - '\uFE4F'. 856: */ 857: public static final UnicodeBlock CJK_COMPATIBILITY_FORMS 858: = new UnicodeBlock('\uFE30', '\uFE4F', 859: "CJK_COMPATIBILITY_FORMS"); 860: 861: /** 862: * Small Form Variants. 863: * '\uFE50' - '\uFE6F'. 864: */ 865: public static final UnicodeBlock SMALL_FORM_VARIANTS 866: = new UnicodeBlock('\uFE50', '\uFE6F', 867: "SMALL_FORM_VARIANTS"); 868: 869: /** 870: * Arabic Presentation Forms-B. 871: * '\uFE70' - '\uFEFE'. 872: */ 873: public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B 874: = new UnicodeBlock('\uFE70', '\uFEFE', 875: "ARABIC_PRESENTATION_FORMS_B"); 876: 877: /** 878: * Halfwidth and Fullwidth Forms. 879: * '\uFF00' - '\uFFEF'. 880: */ 881: public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 882: = new UnicodeBlock('\uFF00', '\uFFEF', 883: "HALFWIDTH_AND_FULLWIDTH_FORMS"); 884: 885: /** 886: * Specials. 887: * '\uFEFF', '\uFFF0' - '\uFFFD'. 888: */ 889: public static final UnicodeBlock SPECIALS 890: = new UnicodeBlock('\uFFF0', '\uFFFD', 891: "SPECIALS"); 892: 893: /** 894: * The defined subsets. 895: */ 896: private static final UnicodeBlock sets[] = { 897: BASIC_LATIN, 898: LATIN_1_SUPPLEMENT, 899: LATIN_EXTENDED_A, 900: LATIN_EXTENDED_B, 901: IPA_EXTENSIONS, 902: SPACING_MODIFIER_LETTERS, 903: COMBINING_DIACRITICAL_MARKS, 904: GREEK, 905: CYRILLIC, 906: ARMENIAN, 907: HEBREW, 908: ARABIC, 909: SYRIAC, 910: THAANA, 911: DEVANAGARI, 912: BENGALI, 913: GURMUKHI, 914: GUJARATI, 915: ORIYA, 916: TAMIL, 917: TELUGU, 918: KANNADA, 919: MALAYALAM, 920: SINHALA, 921: THAI, 922: LAO, 923: TIBETAN, 924: MYANMAR, 925: GEORGIAN, 926: HANGUL_JAMO, 927: ETHIOPIC, 928: CHEROKEE, 929: UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 930: OGHAM, 931: RUNIC, 932: KHMER, 933: MONGOLIAN, 934: LATIN_EXTENDED_ADDITIONAL, 935: GREEK_EXTENDED, 936: GENERAL_PUNCTUATION, 937: SUPERSCRIPTS_AND_SUBSCRIPTS, 938: CURRENCY_SYMBOLS, 939: COMBINING_MARKS_FOR_SYMBOLS, 940: LETTERLIKE_SYMBOLS, 941: NUMBER_FORMS, 942: ARROWS, 943: MATHEMATICAL_OPERATORS, 944: MISCELLANEOUS_TECHNICAL, 945: CONTROL_PICTURES, 946: OPTICAL_CHARACTER_RECOGNITION, 947: ENCLOSED_ALPHANUMERICS, 948: BOX_DRAWING, 949: BLOCK_ELEMENTS, 950: GEOMETRIC_SHAPES, 951: MISCELLANEOUS_SYMBOLS, 952: DINGBATS, 953: BRAILLE_PATTERNS, 954: CJK_RADICALS_SUPPLEMENT, 955: KANGXI_RADICALS, 956: IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 957: CJK_SYMBOLS_AND_PUNCTUATION, 958: HIRAGANA, 959: KATAKANA, 960: BOPOMOFO, 961: HANGUL_COMPATIBILITY_JAMO, 962: KANBUN, 963: BOPOMOFO_EXTENDED, 964: ENCLOSED_CJK_LETTERS_AND_MONTHS, 965: CJK_COMPATIBILITY, 966: CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 967: CJK_UNIFIED_IDEOGRAPHS, 968: YI_SYLLABLES, 969: YI_RADICALS, 970: HANGUL_SYLLABLES, 971: SURROGATES_AREA, 972: PRIVATE_USE_AREA, 973: CJK_COMPATIBILITY_IDEOGRAPHS, 974: ALPHABETIC_PRESENTATION_FORMS, 975: ARABIC_PRESENTATION_FORMS_A, 976: COMBINING_HALF_MARKS, 977: CJK_COMPATIBILITY_FORMS, 978: SMALL_FORM_VARIANTS, 979: ARABIC_PRESENTATION_FORMS_B, 980: HALFWIDTH_AND_FULLWIDTH_FORMS, 981: SPECIALS, 982: }; 983: } // class UnicodeBlock 984: 985: /** 986: * The immutable value of this Character. 987: * 988: * @serial the value of this Character 989: */ 990: private final char value; 991: 992: /** 993: * Compatible with JDK 1.0+. 994: */ 995: private static final long serialVersionUID = 3786198910865385080L; 996: 997: /** 998: * Smallest value allowed for radix arguments in Java. This value is 2. 999: * 1000: * @see #digit(char, int) 1001: * @see #forDigit(int, int) 1002: * @see Integer#toString(int, int) 1003: * @see Integer#valueOf(String) 1004: */ 1005: public static final int MIN_RADIX = 2; 1006: 1007: /** 1008: * Largest value allowed for radix arguments in Java. This value is 36. 1009: * 1010: * @see #digit(char, int) 1011: * @see #forDigit(int, int) 1012: * @see Integer#toString(int, int) 1013: * @see Integer#valueOf(String) 1014: */ 1015: public static final int MAX_RADIX = 36; 1016: 1017: /** 1018: * The minimum value the char data type can hold. 1019: * This value is <code>'\\u0000'</code>. 1020: */ 1021: public static final char MIN_VALUE = '\u0000'; 1022: 1023: /** 1024: * The maximum value the char data type can hold. 1025: * This value is <code>'\\uFFFF'</code>. 1026: */ 1027: public static final char MAX_VALUE = '\uFFFF'; 1028: 1029: /** 1030: * Class object representing the primitive char data type. 1031: * 1032: * @since 1.1 1033: */ 1034: public static final Class TYPE = VMClassLoader.getPrimitiveClass('C'); 1035: 1036: /** 1037: * Lu = Letter, Uppercase (Informative). 1038: * 1039: * @since 1.1 1040: */ 1041: public static final byte UPPERCASE_LETTER = 1; 1042: 1043: /** 1044: * Ll = Letter, Lowercase (Informative). 1045: * 1046: * @since 1.1 1047: */ 1048: public static final byte LOWERCASE_LETTER = 2; 1049: 1050: /** 1051: * Lt = Letter, Titlecase (Informative). 1052: * 1053: * @since 1.1 1054: */ 1055: public static final byte TITLECASE_LETTER = 3; 1056: 1057: /** 1058: * Mn = Mark, Non-Spacing (Normative). 1059: * 1060: * @since 1.1 1061: */ 1062: public static final byte NON_SPACING_MARK = 6; 1063: 1064: /** 1065: * Mc = Mark, Spacing Combining (Normative). 1066: * 1067: * @since 1.1 1068: */ 1069: public static final byte COMBINING_SPACING_MARK = 8; 1070: 1071: /** 1072: * Me = Mark, Enclosing (Normative). 1073: * 1074: * @since 1.1 1075: */ 1076: public static final byte ENCLOSING_MARK = 7; 1077: 1078: /** 1079: * Nd = Number, Decimal Digit (Normative). 1080: * 1081: * @since 1.1 1082: */ 1083: public static final byte DECIMAL_DIGIT_NUMBER = 9; 1084: 1085: /** 1086: * Nl = Number, Letter (Normative). 1087: * 1088: * @since 1.1 1089: */ 1090: public static final byte LETTER_NUMBER = 10; 1091: 1092: /** 1093: * No = Number, Other (Normative). 1094: * 1095: * @since 1.1 1096: */ 1097: public static final byte OTHER_NUMBER = 11; 1098: 1099: /** 1100: * Zs = Separator, Space (Normative). 1101: * 1102: * @since 1.1 1103: */ 1104: public static final byte SPACE_SEPARATOR = 12; 1105: 1106: /** 1107: * Zl = Separator, Line (Normative). 1108: * 1109: * @since 1.1 1110: */ 1111: public static final byte LINE_SEPARATOR = 13; 1112: 1113: /** 1114: * Zp = Separator, Paragraph (Normative). 1115: * 1116: * @since 1.1 1117: */ 1118: public static final byte PARAGRAPH_SEPARATOR = 14; 1119: 1120: /** 1121: * Cc = Other, Control (Normative). 1122: * 1123: * @since 1.1 1124: */ 1125: public static final byte CONTROL = 15; 1126: 1127: /** 1128: * Cf = Other, Format (Normative). 1129: * 1130: * @since 1.1 1131: */ 1132: public static final byte FORMAT = 16; 1133: 1134: /** 1135: * Cs = Other, Surrogate (Normative). 1136: * 1137: * @since 1.1 1138: */ 1139: public static final byte SURROGATE = 19; 1140: 1141: /** 1142: * Co = Other, Private Use (Normative). 1143: * 1144: * @since 1.1 1145: */ 1146: public static final byte PRIVATE_USE = 18; 1147: 1148: /** 1149: * Cn = Other, Not Assigned (Normative). 1150: * 1151: * @since 1.1 1152: */ 1153: public static final byte UNASSIGNED = 0; 1154: 1155: /** 1156: * Lm = Letter, Modifier (Informative). 1157: * 1158: * @since 1.1 1159: */ 1160: public static final byte MODIFIER_LETTER = 4; 1161: 1162: /** 1163: * Lo = Letter, Other (Informative). 1164: * 1165: * @since 1.1 1166: */ 1167: public static final byte OTHER_LETTER = 5; 1168: 1169: /** 1170: * Pc = Punctuation, Connector (Informative). 1171: * 1172: * @since 1.1 1173: */ 1174: public static final byte CONNECTOR_PUNCTUATION = 23; 1175: 1176: /** 1177: * Pd = Punctuation, Dash (Informative). 1178: * 1179: * @since 1.1 1180: */ 1181: public static final byte DASH_PUNCTUATION = 20; 1182: 1183: /** 1184: * Ps = Punctuation, Open (Informative). 1185: * 1186: * @since 1.1 1187: */ 1188: public static final byte START_PUNCTUATION = 21; 1189: 1190: /** 1191: * Pe = Punctuation, Close (Informative). 1192: * 1193: * @since 1.1 1194: */ 1195: public static final byte END_PUNCTUATION = 22; 1196: 1197: /** 1198: * Pi = Punctuation, Initial Quote (Informative). 1199: * 1200: * @since 1.4 1201: */ 1202: public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 1203: 1204: /** 1205: * Pf = Punctuation, Final Quote (Informative). 1206: * 1207: * @since 1.4 1208: */ 1209: public static final byte FINAL_QUOTE_PUNCTUATION = 30; 1210: 1211: /** 1212: * Po = Punctuation, Other (Informative). 1213: * 1214: * @since 1.1 1215: */ 1216: public static final byte OTHER_PUNCTUATION = 24; 1217: 1218: /** 1219: * Sm = Symbol, Math (Informative). 1220: * 1221: * @since 1.1 1222: */ 1223: public static final byte MATH_SYMBOL = 25; 1224: 1225: /** 1226: * Sc = Symbol, Currency (Informative). 1227: * 1228: * @since 1.1 1229: */ 1230: public static final byte CURRENCY_SYMBOL = 26; 1231: 1232: /** 1233: * Sk = Symbol, Modifier (Informative). 1234: * 1235: * @since 1.1 1236: */ 1237: public static final byte MODIFIER_SYMBOL = 27; 1238: 1239: /** 1240: * So = Symbol, Other (Informative). 1241: * 1242: * @since 1.1 1243: */ 1244: public static final byte OTHER_SYMBOL = 28; 1245: 1246: /** 1247: * Undefined bidirectional character type. Undefined char values have 1248: * undefined directionality in the Unicode specification. 1249: * 1250: * @since 1.4 1251: */ 1252: public static final byte DIRECTIONALITY_UNDEFINED = -1; 1253: 1254: /** 1255: * Strong bidirectional character type "L". 1256: * 1257: * @since 1.4 1258: */ 1259: public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 1260: 1261: /** 1262: * Strong bidirectional character type "R". 1263: * 1264: * @since 1.4 1265: */ 1266: public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 1267: 1268: /** 1269: * Strong bidirectional character type "AL". 1270: * 1271: * @since 1.4 1272: */ 1273: public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 1274: 1275: /** 1276: * Weak bidirectional character type "EN". 1277: * 1278: * @since 1.4 1279: */ 1280: public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 1281: 1282: /** 1283: * Weak bidirectional character type "ES". 1284: * 1285: * @since 1.4 1286: */ 1287: public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 1288: 1289: /** 1290: * Weak bidirectional character type "ET". 1291: * 1292: * @since 1.4 1293: */ 1294: public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 1295: 1296: /** 1297: * Weak bidirectional character type "AN". 1298: * 1299: * @since 1.4 1300: */ 1301: public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 1302: 1303: /** 1304: * Weak bidirectional character type "CS". 1305: * 1306: * @since 1.4 1307: */ 1308: public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 1309: 1310: /** 1311: * Weak bidirectional character type "NSM". 1312: * 1313: * @since 1.4 1314: */ 1315: public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 1316: 1317: /** 1318: * Weak bidirectional character type "BN". 1319: * 1320: * @since 1.4 1321: */ 1322: public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 1323: 1324: /** 1325: * Neutral bidirectional character type "B". 1326: * 1327: * @since 1.4 1328: */ 1329: public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 1330: 1331: /** 1332: * Neutral bidirectional character type "S". 1333: * 1334: * @since 1.4 1335: */ 1336: public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 1337: 1338: /** 1339: * Strong bidirectional character type "WS". 1340: * 1341: * @since 1.4 1342: */ 1343: public static final byte DIRECTIONALITY_WHITESPACE = 12; 1344: 1345: /** 1346: * Neutral bidirectional character type "ON". 1347: * 1348: * @since 1.4 1349: */ 1350: public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 1351: 1352: /** 1353: * Strong bidirectional character type "LRE". 1354: * 1355: * @since 1.4 1356: */ 1357: public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 1358: 1359: /** 1360: * Strong bidirectional character type "LRO". 1361: * 1362: * @since 1.4 1363: */ 1364: public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 1365: 1366: /** 1367: * Strong bidirectional character type "RLE". 1368: * 1369: * @since 1.4 1370: */ 1371: public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 1372: 1373: /** 1374: * Strong bidirectional character type "RLO". 1375: * 1376: * @since 1.4 1377: */ 1378: public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 1379: 1380: /** 1381: * Weak bidirectional character type "PDF". 1382: * 1383: * @since 1.4 1384: */ 1385: public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 1386: 1387: /** 1388: * Stores unicode block offset lookup table. Exploit package visibility of 1389: * String.value to avoid copying the array. 1390: * @see #readChar(char) 1391: * @see CharData#BLOCKS 1392: */ 1393: private static final char[] blocks = String.zeroBasedStringValue(CharData.BLOCKS); 1394: 1395: /** 1396: * Stores unicode attribute offset lookup table. Exploit package visibility 1397: * of String.value to avoid copying the array. 1398: * @see CharData#DATA 1399: */ 1400: private static final char[] data = String.zeroBasedStringValue(CharData.DATA); 1401: 1402: /** 1403: * Stores unicode numeric value attribute table. Exploit package visibility 1404: * of String.value to avoid copying the array. 1405: * @see CharData#NUM_VALUE 1406: */ 1407: private static final char[] numValue 1408: = String.zeroBasedStringValue(CharData.NUM_VALUE); 1409: 1410: /** 1411: * Stores unicode uppercase attribute table. Exploit package visibility 1412: * of String.value to avoid copying the array. 1413: * @see CharData#UPPER 1414: */ 1415: private static final char[] upper = String.zeroBasedStringValue(CharData.UPPER); 1416: 1417: /** 1418: * Stores unicode lowercase attribute table. Exploit package visibility 1419: * of String.value to avoid copying the array. 1420: * @see CharData#LOWER 1421: */ 1422: private static final char[] lower = String.zeroBasedStringValue(CharData.LOWER); 1423: 1424: /** 1425: * Stores unicode direction attribute table. Exploit package visibility 1426: * of String.value to avoid copying the array. 1427: * @see CharData#DIRECTION 1428: */ 1429: // Package visible for use by String. 1430: static final char[] direction = String.zeroBasedStringValue(CharData.DIRECTION); 1431: 1432: /** 1433: * Stores unicode titlecase table. Exploit package visibility of 1434: * String.value to avoid copying the array. 1435: * @see CharData#TITLE 1436: */ 1437: private static final char[] title = String.zeroBasedStringValue(CharData.TITLE); 1438: 1439: /** 1440: * Mask for grabbing the type out of the contents of data. 1441: * @see CharData#DATA 1442: */ 1443: private static final int TYPE_MASK = 0x1F; 1444: 1445: /** 1446: * Mask for grabbing the non-breaking space flag out of the contents of 1447: * data. 1448: * @see CharData#DATA 1449: */ 1450: private static final int NO_BREAK_MASK = 0x20; 1451: 1452: /** 1453: * Mask for grabbing the mirrored directionality flag out of the contents 1454: * of data. 1455: * @see CharData#DATA 1456: */ 1457: private static final int MIRROR_MASK = 0x40; 1458: 1459: /** 1460: * Grabs an attribute offset from the Unicode attribute database. The lower 1461: * 5 bits are the character type, the next 2 bits are flags, and the top 1462: * 9 bits are the offset into the attribute tables. 1463: * 1464: * @param ch the character to look up 1465: * @return the character's attribute offset and type 1466: * @see #TYPE_MASK 1467: * @see #NO_BREAK_MASK 1468: * @see #MIRROR_MASK 1469: * @see CharData#DATA 1470: * @see CharData#SHIFT 1471: */ 1472: // Package visible for use in String. 1473: static char readChar(char ch) 1474: { 1475: // Perform 16-bit addition to find the correct entry in data. 1476: return data[(char) (blocks[ch >> CharData.SHIFT] + ch)]; 1477: } 1478: 1479: /** 1480: * Wraps up a character. 1481: * 1482: * @param value the character to wrap 1483: */ 1484: public Character(char value) 1485: { 1486: this.value = value; 1487: } 1488: 1489: /** 1490: * Returns the character which has been wrapped by this class. 1491: * 1492: * @return the character wrapped 1493: */ 1494: public char charValue() 1495: { 1496: return value; 1497: } 1498: 1499: /** 1500: * Returns the numerical value (unsigned) of the wrapped character. 1501: * Range of returned values: 0x0000-0xFFFF. 1502: * 1503: * @return the value of the wrapped character 1504: */ 1505: public int hashCode() 1506: { 1507: return value; 1508: } 1509: 1510: /** 1511: * Determines if an object is equal to this object. This is only true for 1512: * another Character object wrapping the same value. 1513: * 1514: * @param o object to compare 1515: * @return true if o is a Character with the same value 1516: */ 1517: public boolean equals(Object o) 1518: { 1519: return o instanceof Character && value == ((Character) o).value; 1520: } 1521: 1522: /** 1523: * Converts the wrapped character into a String. 1524: * 1525: * @return a String containing one character -- the wrapped character 1526: * of this instance 1527: */ 1528: public String toString() 1529: { 1530: // Package constructor avoids an array copy. 1531: return new String(new char[] { value }, 0, 1, true); 1532: } 1533: 1534: /** 1535: * Returns a String of length 1 representing the specified character. 1536: * 1537: * @param ch the character to convert 1538: * @return a String containing the character 1539: * @since 1.4 1540: */ 1541: public static String toString(char ch) 1542: { 1543: // Package constructor avoids an array copy. 1544: return new String(new char[] { ch }, 0, 1, true); 1545: } 1546: 1547: /** 1548: * Determines if a character is a Unicode lowercase letter. For example, 1549: * <code>'a'</code> is lowercase. 1550: * <br> 1551: * lowercase = [Ll] 1552: * 1553: * @param ch character to test 1554: * @return true if ch is a Unicode lowercase letter, else false 1555: * @see #isUpperCase(char) 1556: * @see #isTitleCase(char) 1557: * @see #toLowerCase(char) 1558: * @see #getType(char) 1559: */ 1560: public static boolean isLowerCase(char ch) 1561: { 1562: return getType(ch) == LOWERCASE_LETTER; 1563: } 1564: 1565: /** 1566: * Determines if a character is a Unicode uppercase letter. For example, 1567: * <code>'A'</code> is uppercase. 1568: * <br> 1569: * uppercase = [Lu] 1570: * 1571: * @param ch character to test 1572: * @return true if ch is a Unicode uppercase letter, else false 1573: * @see #isLowerCase(char) 1574: * @see #isTitleCase(char) 1575: * @see #toUpperCase(char) 1576: * @see #getType(char) 1577: */ 1578: public static boolean isUpperCase(char ch) 1579: { 1580: return getType(ch) == UPPERCASE_LETTER; 1581: } 1582: 1583: /** 1584: * Determines if a character is a Unicode titlecase letter. For example, 1585: * the character "Lj" (Latin capital L with small letter j) is titlecase. 1586: * <br> 1587: * titlecase = [Lt] 1588: * 1589: * @param ch character to test 1590: * @return true if ch is a Unicode titlecase letter, else false 1591: * @see #isLowerCase(char) 1592: * @see #isUpperCase(char) 1593: * @see #toTitleCase(char) 1594: * @see #getType(char) 1595: */ 1596: public static boolean isTitleCase(char ch) 1597: { 1598: return getType(ch) == TITLECASE_LETTER; 1599: } 1600: 1601: /** 1602: * Determines if a character is a Unicode decimal digit. For example, 1603: * <code>'0'</code> is a digit. 1604: * <br> 1605: * Unicode decimal digit = [Nd] 1606: * 1607: * @param ch character to test 1608: * @return true if ch is a Unicode decimal digit, else false 1609: * @see #digit(char, int) 1610: * @see #forDigit(int, int) 1611: * @see #getType(char) 1612: */ 1613: public static boolean isDigit(char ch) 1614: { 1615: return getType(ch) == DECIMAL_DIGIT_NUMBER; 1616: } 1617: 1618: /** 1619: * Determines if a character is part of the Unicode Standard. This is an 1620: * evolving standard, but covers every character in the data file. 1621: * <br> 1622: * defined = not [Cn] 1623: * 1624: * @param ch character to test 1625: * @return true if ch is a Unicode character, else false 1626: * @see #isDigit(char) 1627: * @see #isLetter(char) 1628: * @see #isLetterOrDigit(char) 1629: * @see #isLowerCase(char) 1630: * @see #isTitleCase(char) 1631: * @see #isUpperCase(char) 1632: */ 1633: public static boolean isDefined(char ch) 1634: { 1635: return getType(ch) != UNASSIGNED; 1636: } 1637: 1638: /** 1639: * Determines if a character is a Unicode letter. Not all letters have case, 1640: * so this may return true when isLowerCase and isUpperCase return false. 1641: * <br> 1642: * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo] 1643: * 1644: * @param ch character to test 1645: * @return true if ch is a Unicode letter, else false 1646: * @see #isDigit(char) 1647: * @see #isJavaIdentifierStart(char) 1648: * @see #isJavaLetter(char) 1649: * @see #isJavaLetterOrDigit(char) 1650: * @see #isLetterOrDigit(char) 1651: * @see #isLowerCase(char) 1652: * @see #isTitleCase(char) 1653: * @see #isUnicodeIdentifierStart(char) 1654: * @see #isUpperCase(char) 1655: */ 1656: public static boolean isLetter(char ch) 1657: { 1658: return ((1 << getType(ch)) 1659: & ((1 << UPPERCASE_LETTER) 1660: | (1 << LOWERCASE_LETTER) 1661: | (1 << TITLECASE_LETTER) 1662: | (1 << MODIFIER_LETTER) 1663: | (1 << OTHER_LETTER))) != 0; 1664: } 1665: 1666: /** 1667: * Determines if a character is a Unicode letter or a Unicode digit. This 1668: * is the combination of isLetter and isDigit. 1669: * <br> 1670: * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd] 1671: * 1672: * @param ch character to test 1673: * @return true if ch is a Unicode letter or a Unicode digit, else false 1674: * @see #isDigit(char) 1675: * @see #isJavaIdentifierPart(char) 1676: * @see #isJavaLetter(char) 1677: * @see #isJavaLetterOrDigit(char) 1678: * @see #isLetter(char) 1679: * @see #isUnicodeIdentifierPart(char) 1680: */ 1681: public static boolean isLetterOrDigit(char ch) 1682: { 1683: return ((1 << getType(ch)) 1684: & ((1 << UPPERCASE_LETTER) 1685: | (1 << LOWERCASE_LETTER) 1686: | (1 << TITLECASE_LETTER) 1687: | (1 << MODIFIER_LETTER) 1688: | (1 << OTHER_LETTER) 1689: | (1 << DECIMAL_DIGIT_NUMBER))) != 0; 1690: } 1691: 1692: /** 1693: * Determines if a character can start a Java identifier. This is the 1694: * combination of isLetter, any character where getType returns 1695: * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation 1696: * (like '_'). 1697: * 1698: * @param ch character to test 1699: * @return true if ch can start a Java identifier, else false 1700: * @deprecated Replaced by {@link #isJavaIdentifierStart(char)} 1701: * @see #isJavaLetterOrDigit(char) 1702: * @see #isJavaIdentifierStart(char) 1703: * @see #isJavaIdentifierPart(char) 1704: * @see #isLetter(char) 1705: * @see #isLetterOrDigit(char) 1706: * @see #isUnicodeIdentifierStart(char) 1707: */ 1708: public static boolean isJavaLetter(char ch) 1709: { 1710: return isJavaIdentifierStart(ch); 1711: } 1712: 1713: /** 1714: * Determines if a character can follow the first letter in 1715: * a Java identifier. This is the combination of isJavaLetter (isLetter, 1716: * type of LETTER_NUMBER, currency, connecting punctuation) and digit, 1717: * numeric letter (like Roman numerals), combining marks, non-spacing marks, 1718: * or isIdentifierIgnorable. 1719: * 1720: * @param ch character to test 1721: * @return true if ch can follow the first letter in a Java identifier 1722: * @deprecated Replaced by {@link #isJavaIdentifierPart(char)} 1723: * @see #isJavaLetter(char) 1724: * @see #isJavaIdentifierStart(char) 1725: * @see #isJavaIdentifierPart(char) 1726: * @see #isLetter(char) 1727: * @see #isLetterOrDigit(char) 1728: * @see #isUnicodeIdentifierPart(char) 1729: * @see #isIdentifierIgnorable(char) 1730: */ 1731: public static boolean isJavaLetterOrDigit(char ch) 1732: { 1733: return isJavaIdentifierPart(ch); 1734: } 1735: 1736: /** 1737: * Determines if a character can start a Java identifier. This is the 1738: * combination of isLetter, any character where getType returns 1739: * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation 1740: * (like '_'). 1741: * <br> 1742: * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc] 1743: * 1744: * @param ch character to test 1745: * @return true if ch can start a Java identifier, else false 1746: * @see #isJavaIdentifierPart(char) 1747: * @see #isLetter(char) 1748: * @see #isUnicodeIdentifierStart(char) 1749: * @since 1.1 1750: */ 1751: public static boolean isJavaIdentifierStart(char ch) 1752: { 1753: return ((1 << getType(ch)) 1754: & ((1 << UPPERCASE_LETTER) 1755: | (1 << LOWERCASE_LETTER) 1756: | (1 << TITLECASE_LETTER) 1757: | (1 << MODIFIER_LETTER) 1758: | (1 << OTHER_LETTER) 1759: | (1 << LETTER_NUMBER) 1760: | (1 << CURRENCY_SYMBOL) 1761: | (1 << CONNECTOR_PUNCTUATION))) != 0; 1762: } 1763: 1764: /** 1765: * Determines if a character can follow the first letter in 1766: * a Java identifier. This is the combination of isJavaLetter (isLetter, 1767: * type of LETTER_NUMBER, currency, connecting punctuation) and digit, 1768: * numeric letter (like Roman numerals), combining marks, non-spacing marks, 1769: * or isIdentifierIgnorable. 1770: * <br> 1771: * Java identifier extender = 1772: * [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf] 1773: * |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F 1774: * 1775: * @param ch character to test 1776: * @return true if ch can follow the first letter in a Java identifier 1777: * @see #isIdentifierIgnorable(char) 1778: * @see #isJavaIdentifierStart(char) 1779: * @see #isLetterOrDigit(char) 1780: * @see #isUnicodeIdentifierPart(char) 1781: * @since 1.1 1782: */ 1783: public static boolean isJavaIdentifierPart(char ch) 1784: { 1785: int category = getType(ch); 1786: return ((1 << category) 1787: & ((1 << UPPERCASE_LETTER) 1788: | (1 << LOWERCASE_LETTER) 1789: | (1 << TITLECASE_LETTER) 1790: | (1 << MODIFIER_LETTER) 1791: | (1 << OTHER_LETTER) 1792: | (1 << NON_SPACING_MARK) 1793: | (1 << COMBINING_SPACING_MARK) 1794: | (1 << DECIMAL_DIGIT_NUMBER) 1795: | (1 << LETTER_NUMBER) 1796: | (1 << CURRENCY_SYMBOL) 1797: | (1 << CONNECTOR_PUNCTUATION) 1798: | (1 << FORMAT))) != 0 1799: || (category == CONTROL && isIdentifierIgnorable(ch)); 1800: } 1801: 1802: /** 1803: * Determines if a character can start a Unicode identifier. Only 1804: * letters can start a Unicode identifier, but this includes characters 1805: * in LETTER_NUMBER. 1806: * <br> 1807: * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl] 1808: * 1809: * @param ch character to test 1810: * @return true if ch can start a Unicode identifier, else false 1811: * @see #isJavaIdentifierStart(char) 1812: * @see #isLetter(char) 1813: * @see #isUnicodeIdentifierPart(char) 1814: * @since 1.1 1815: */ 1816: public static boolean isUnicodeIdentifierStart(char ch) 1817: { 1818: return ((1 << getType(ch)) 1819: & ((1 << UPPERCASE_LETTER) 1820: | (1 << LOWERCASE_LETTER) 1821: | (1 << TITLECASE_LETTER) 1822: | (1 << MODIFIER_LETTER) 1823: | (1 << OTHER_LETTER) 1824: | (1 << LETTER_NUMBER))) != 0; 1825: } 1826: 1827: /** 1828: * Determines if a character can follow the first letter in 1829: * a Unicode identifier. This includes letters, connecting punctuation, 1830: * digits, numeric letters, combining marks, non-spacing marks, and 1831: * isIdentifierIgnorable. 1832: * <br> 1833: * Unicode identifier extender = 1834: * [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]| 1835: * |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F 1836: * 1837: * @param ch character to test 1838: * @return true if ch can follow the first letter in a Unicode identifier 1839: * @see #isIdentifierIgnorable(char) 1840: * @see #isJavaIdentifierPart(char) 1841: * @see #isLetterOrDigit(char) 1842: * @see #isUnicodeIdentifierStart(char) 1843: * @since 1.1 1844: */ 1845: public static boolean isUnicodeIdentifierPart(char ch) 1846: { 1847: int category = getType(ch); 1848: return ((1 << category) 1849: & ((1 << UPPERCASE_LETTER) 1850: | (1 << LOWERCASE_LETTER) 1851: | (1 << TITLECASE_LETTER) 1852: | (1 << MODIFIER_LETTER) 1853: | (1 << OTHER_LETTER) 1854: | (1 << NON_SPACING_MARK) 1855: | (1 << COMBINING_SPACING_MARK) 1856: | (1 << DECIMAL_DIGIT_NUMBER) 1857: | (1 << LETTER_NUMBER) 1858: | (1 << CONNECTOR_PUNCTUATION) 1859: | (1 << FORMAT))) != 0 1860: || (category == CONTROL && isIdentifierIgnorable(ch)); 1861: } 1862: 1863: /** 1864: * Determines if a character is ignorable in a Unicode identifier. This 1865: * includes the non-whitespace ISO control characters (<code>'\u0000'</code> 1866: * through <code>'\u0008'</code>, <code>'\u000E'</code> through 1867: * <code>'\u001B'</code>, and <code>'\u007F'</code> through 1868: * <code>'\u009F'</code>), and FORMAT characters. 1869: * <br> 1870: * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B 1871: * |U+007F-U+009F 1872: * 1873: * @param ch character to test 1874: * @return true if ch is ignorable in a Unicode or Java identifier 1875: * @see #isJavaIdentifierPart(char) 1876: * @see #isUnicodeIdentifierPart(char) 1877: * @since 1.1 1878: */ 1879: public static boolean isIdentifierIgnorable(char ch) 1880: { 1881: return (ch <= '\u009F' && (ch < '\t' || ch >= '\u007F' 1882: || (ch <= '\u001B' && ch >= '\u000E'))) 1883: || getType(ch) == FORMAT; 1884: } 1885: 1886: /** 1887: * Converts a Unicode character into its lowercase equivalent mapping. 1888: * If a mapping does not exist, then the character passed is returned. 1889: * Note that isLowerCase(toLowerCase(ch)) does not always return true. 1890: * 1891: * @param ch character to convert to lowercase 1892: * @return lowercase mapping of ch, or ch if lowercase mapping does 1893: * not exist 1894: * @see #isLowerCase(char) 1895: * @see #isUpperCase(char) 1896: * @see #toTitleCase(char) 1897: * @see #toUpperCase(char) 1898: */ 1899: public static char toLowerCase(char ch) 1900: { 1901: // Signedness doesn't matter, as result is cast back to char. 1902: return (char) (ch + lower[readChar(ch) >> 7]); 1903: } 1904: 1905: /** 1906: * Converts a Unicode character into its uppercase equivalent mapping. 1907: * If a mapping does not exist, then the character passed is returned. 1908: * Note that isUpperCase(toUpperCase(ch)) does not always return true. 1909: * 1910: * @param ch character to convert to uppercase 1911: * @return uppercase mapping of ch, or ch if uppercase mapping does 1912: * not exist 1913: * @see #isLowerCase(char) 1914: * @see #isUpperCase(char) 1915: * @see #toLowerCase(char) 1916: * @see #toTitleCase(char) 1917: */ 1918: public static char toUpperCase(char ch) 1919: { 1920: // Signedness doesn't matter, as result is cast back to char. 1921: return (char) (ch + upper[readChar(ch) >> 7]); 1922: } 1923: 1924: /** 1925: * Converts a Unicode character into its titlecase equivalent mapping. 1926: * If a mapping does not exist, then the character passed is returned. 1927: * Note that isTitleCase(toTitleCase(ch)) does not always return true. 1928: * 1929: * @param ch character to convert to titlecase 1930: * @return titlecase mapping of ch, or ch if titlecase mapping does 1931: * not exist 1932: * @see #isTitleCase(char) 1933: * @see #toLowerCase(char) 1934: * @see #toUpperCase(char) 1935: */ 1936: public static char toTitleCase(char ch) 1937: { 1938: // As title is short, it doesn't hurt to exhaustively iterate over it. 1939: for (int i = title.length - 2; i >= 0; i -= 2) 1940: if (title[i] == ch) 1941: return title[i + 1]; 1942: return toUpperCase(ch); 1943: } 1944: 1945: /** 1946: * Converts a character into a digit of the specified radix. If the radix 1947: * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch) 1948: * exceeds the radix, or if ch is not a decimal digit or in the case 1949: * insensitive set of 'a'-'z', the result is -1. 1950: * <br> 1951: * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A 1952: * |U+FF21-U+FF3A|U+FF41-U+FF5A 1953: * 1954: * @param ch character to convert into a digit 1955: * @param radix radix in which ch is a digit 1956: * @return digit which ch represents in radix, or -1 not a valid digit 1957: * @see #MIN_RADIX 1958: * @see #MAX_RADIX 1959: * @see #forDigit(int, int) 1960: * @see #isDigit(char) 1961: * @see #getNumericValue(char) 1962: */ 1963: public static int digit(char ch, int radix) 1964: { 1965: if (radix < MIN_RADIX || radix > MAX_RADIX) 1966: return -1; 1967: char attr = readChar(ch); 1968: if (((1 << (attr & TYPE_MASK)) 1969: & ((1 << UPPERCASE_LETTER) 1970: | (1 << LOWERCASE_LETTER) 1971: | (1 << DECIMAL_DIGIT_NUMBER))) != 0) 1972: { 1973: // Signedness doesn't matter; 0xffff vs. -1 are both rejected. 1974: int digit = numValue[attr >> 7]; 1975: return (digit < radix) ? digit : -1; 1976: } 1977: return -1; 1978: } 1979: 1980: /** 1981: * Returns the Unicode numeric value property of a character. For example, 1982: * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50. 1983: * 1984: * <p>This method also returns values for the letters A through Z, (not 1985: * specified by Unicode), in these ranges: <code>'\u0041'</code> 1986: * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code> 1987: * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code> 1988: * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through 1989: * <code>'\uFF5A'</code> (full width variants). 1990: * 1991: * <p>If the character lacks a numeric value property, -1 is returned. 1992: * If the character has a numeric value property which is not representable 1993: * as a nonnegative integer, such as a fraction, -2 is returned. 1994: * 1995: * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A 1996: * |U+FF21-U+FF3A|U+FF41-U+FF5A 1997: * 1998: * @param ch character from which the numeric value property will 1999: * be retrieved 2000: * @return the numeric value property of ch, or -1 if it does not exist, or 2001: * -2 if it is not representable as a nonnegative integer 2002: * @see #forDigit(int, int) 2003: * @see #digit(char, int) 2004: * @see #isDigit(char) 2005: * @since 1.1 2006: */ 2007: public static int getNumericValue(char ch) 2008: { 2009: // Treat numValue as signed. 2010: return (short) numValue[readChar(ch) >> 7]; 2011: } 2012: 2013: /** 2014: * Determines if a character is a ISO-LATIN-1 space. This is only the five 2015: * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>, 2016: * <code>'\r'</code>, and <code>' '</code>. 2017: * <br> 2018: * Java space = U+0020|U+0009|U+000A|U+000C|U+000D 2019: * 2020: * @param ch character to test 2021: * @return true if ch is a space, else false 2022: * @deprecated Replaced by {@link #isWhitespace(char)} 2023: * @see #isSpaceChar(char) 2024: * @see #isWhitespace(char) 2025: */ 2026: public static boolean isSpace(char ch) 2027: { 2028: // Performing the subtraction up front alleviates need to compare longs. 2029: return ch-- <= ' ' && ((1 << ch) 2030: & ((1 << (' ' - 1)) 2031: | (1 << ('\t' - 1)) 2032: | (1 << ('\n' - 1)) 2033: | (1 << ('\r' - 1)) 2034: | (1 << ('\f' - 1)))) != 0; 2035: } 2036: 2037: /** 2038: * Determines if a character is a Unicode space character. This includes 2039: * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR. 2040: * <br> 2041: * Unicode space = [Zs]|[Zp]|[Zl] 2042: * 2043: * @param ch character to test 2044: * @return true if ch is a Unicode space, else false 2045: * @see #isWhitespace(char) 2046: * @since 1.1 2047: */ 2048: public static boolean isSpaceChar(char ch) 2049: { 2050: return ((1 << getType(ch)) 2051: & ((1 << SPACE_SEPARATOR) 2052: | (1 << LINE_SEPARATOR) 2053: | (1 << PARAGRAPH_SEPARATOR))) != 0; 2054: } 2055: 2056: /** 2057: * Determines if a character is Java whitespace. This includes Unicode 2058: * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and 2059: * PARAGRAPH_SEPARATOR) except the non-breaking spaces 2060: * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>); 2061: * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>, 2062: * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>, 2063: * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>, 2064: * and <code>'\u001F'</code>. 2065: * <br> 2066: * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F 2067: * 2068: * @param ch character to test 2069: * @return true if ch is Java whitespace, else false 2070: * @see #isSpaceChar(char) 2071: * @since 1.1 2072: */ 2073: public static boolean isWhitespace(char ch) 2074: { 2075: int attr = readChar(ch); 2076: return ((((1 << (attr & TYPE_MASK)) 2077: & ((1 << SPACE_SEPARATOR) 2078: | (1 << LINE_SEPARATOR) 2079: | (1 << PARAGRAPH_SEPARATOR))) != 0) 2080: && (attr & NO_BREAK_MASK) == 0) 2081: || (ch <= '\u001F' && ((1 << ch) 2082: & ((1 << '\t') 2083: | (1 << '\n') 2084: | (1 << '\u000B') 2085: | (1 << '\u000C') 2086: | (1 << '\r') 2087: | (1 << '\u001C') 2088: | (1 << '\u001D') 2089: | (1 << '\u001E') 2090: | (1 << '\u001F'))) != 0); 2091: } 2092: 2093: /** 2094: * Determines if a character has the ISO Control property. 2095: * <br> 2096: * ISO Control = [Cc] 2097: * 2098: * @param ch character to test 2099: * @return true if ch is an ISO Control character, else false 2100: * @see #isSpaceChar(char) 2101: * @see #isWhitespace(char) 2102: * @since 1.1 2103: */ 2104: public static boolean isISOControl(char ch) 2105: { 2106: return getType(ch) == CONTROL; 2107: } 2108: 2109: /** 2110: * Returns the Unicode general category property of a character. 2111: * 2112: * @param ch character from which the general category property will 2113: * be retrieved 2114: * @return the character category property of ch as an integer 2115: * @see #UNASSIGNED 2116: * @see #UPPERCASE_LETTER 2117: * @see #LOWERCASE_LETTER 2118: * @see #TITLECASE_LETTER 2119: * @see #MODIFIER_LETTER 2120: * @see #OTHER_LETTER 2121: * @see #NON_SPACING_MARK 2122: * @see #ENCLOSING_MARK 2123: * @see #COMBINING_SPACING_MARK 2124: * @see #DECIMAL_DIGIT_NUMBER 2125: * @see #LETTER_NUMBER 2126: * @see #OTHER_NUMBER 2127: * @see #SPACE_SEPARATOR 2128: * @see #LINE_SEPARATOR 2129: * @see #PARAGRAPH_SEPARATOR 2130: * @see #CONTROL 2131: * @see #FORMAT 2132: * @see #PRIVATE_USE 2133: * @see #SURROGATE 2134: * @see #DASH_PUNCTUATION 2135: * @see #START_PUNCTUATION 2136: * @see #END_PUNCTUATION 2137: * @see #CONNECTOR_PUNCTUATION 2138: * @see #OTHER_PUNCTUATION 2139: * @see #MATH_SYMBOL 2140: * @see #CURRENCY_SYMBOL 2141: * @see #MODIFIER_SYMBOL 2142: * @see #INITIAL_QUOTE_PUNCTUATION 2143: * @see #FINAL_QUOTE_PUNCTUATION 2144: * @since 1.1 2145: */ 2146: public static int getType(char ch) 2147: { 2148: return readChar(ch) & TYPE_MASK; 2149: } 2150: 2151: /** 2152: * Converts a digit into a character which represents that digit 2153: * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX, 2154: * or the digit exceeds the radix, then the null character <code>'\0'</code> 2155: * is returned. Otherwise the return value is in '0'-'9' and 'a'-'z'. 2156: * <br> 2157: * return value boundary = U+0030-U+0039|U+0061-U+007A 2158: * 2159: * @param digit digit to be converted into a character 2160: * @param radix radix of digit 2161: * @return character representing digit in radix, or '\0' 2162: * @see #MIN_RADIX 2163: * @see #MAX_RADIX 2164: * @see #digit(char, int) 2165: */ 2166: public static char forDigit(int digit, int radix) 2167: { 2168: if (radix < MIN_RADIX || radix > MAX_RADIX 2169: || digit < 0 || digit >= radix) 2170: return '\0'; 2171: return Number.digits[digit]; 2172: } 2173: 2174: /** 2175: * Returns the Unicode directionality property of the character. This 2176: * is used in the visual ordering of text. 2177: * 2178: * @param ch the character to look up 2179: * @return the directionality constant, or DIRECTIONALITY_UNDEFINED 2180: * @see #DIRECTIONALITY_UNDEFINED 2181: * @see #DIRECTIONALITY_LEFT_TO_RIGHT 2182: * @see #DIRECTIONALITY_RIGHT_TO_LEFT 2183: * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 2184: * @see #DIRECTIONALITY_EUROPEAN_NUMBER 2185: * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 2186: * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 2187: * @see #DIRECTIONALITY_ARABIC_NUMBER 2188: * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 2189: * @see #DIRECTIONALITY_NONSPACING_MARK 2190: * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL 2191: * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR 2192: * @see #DIRECTIONALITY_SEGMENT_SEPARATOR 2193: * @see #DIRECTIONALITY_WHITESPACE 2194: * @see #DIRECTIONALITY_OTHER_NEUTRALS 2195: * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 2196: * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 2197: * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 2198: * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 2199: * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 2200: * @since 1.4 2201: */ 2202: public static byte getDirectionality(char ch) 2203: { 2204: // The result will correctly be signed. 2205: return (byte) (direction[readChar(ch) >> 7] >> 2); 2206: } 2207: 2208: /** 2209: * Determines whether the character is mirrored according to Unicode. For 2210: * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in 2211: * left-to-right text, but ')' in right-to-left text. 2212: * 2213: * @param ch the character to look up 2214: * @return true if the character is mirrored 2215: * @since 1.4 2216: */ 2217: public static boolean isMirrored(char ch) 2218: { 2219: return (readChar(ch) & MIRROR_MASK) != 0; 2220: } 2221: 2222: /** 2223: * Compares another Character to this Character, numerically. 2224: * 2225: * @param anotherCharacter Character to compare with this Character 2226: * @return a negative integer if this Character is less than 2227: * anotherCharacter, zero if this Character is equal, and 2228: * a positive integer if this Character is greater 2229: * @throws NullPointerException if anotherCharacter is null 2230: * @since 1.2 2231: */ 2232: public int compareTo(Character anotherCharacter) 2233: { 2234: return value - anotherCharacter.value; 2235: } 2236: 2237: /** 2238: * Compares an object to this Character. Assuming the object is a 2239: * Character object, this method performs the same comparison as 2240: * compareTo(Character). 2241: * 2242: * @param o object to compare 2243: * @return the comparison value 2244: * @throws ClassCastException if o is not a Character object 2245: * @throws NullPointerException if o is null 2246: * @see #compareTo(Character) 2247: * @since 1.2 2248: */ 2249: public int compareTo(Object o) 2250: { 2251: return compareTo((Character) o); 2252: } 2253: } // class Character
GNU Classpath (0.17) |