Source for java.lang.Character

   1: /* java.lang.Character -- Wrapper class for char, and Unicode subsets
   2:    Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.lang;
  40: 
  41: import gnu.java.lang.CharData;
  42: 
  43: import java.io.Serializable;
  44: 
  45: /**
  46:  * Wrapper class for the primitive char data type.  In addition, this class
  47:  * allows one to retrieve property information and perform transformations
  48:  * on the 57,707 defined characters in the Unicode Standard, Version 3.0.0.
  49:  * java.lang.Character is designed to be very dynamic, and as such, it
  50:  * retrieves information on the Unicode character set from a separate
  51:  * database, gnu.java.lang.CharData, which can be easily upgraded.
  52:  *
  53:  * <p>For predicates, boundaries are used to describe
  54:  * the set of characters for which the method will return true.
  55:  * This syntax uses fairly normal regular expression notation.
  56:  * See 5.13 of the Unicode Standard, Version 3.0, for the
  57:  * boundary specification.
  58:  *
  59:  * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
  60:  * for more information on the Unicode Standard.
  61:  *
  62:  * @author Tom Tromey (tromey@cygnus.com)
  63:  * @author Paul N. Fisher
  64:  * @author Jochen Hoenicke
  65:  * @author Eric Blake (ebb9@email.byu.edu)
  66:  * @see CharData
  67:  * @since 1.0
  68:  * @status updated to 1.4
  69:  */
  70: public final class Character implements Serializable, Comparable
  71: {
  72:   /**
  73:    * A subset of Unicode blocks.
  74:    *
  75:    * @author Paul N. Fisher
  76:    * @author Eric Blake (ebb9@email.byu.edu)
  77:    * @since 1.2
  78:    */
  79:   public static class Subset
  80:   {
  81:     /** The name of the subset. */
  82:     private final String name;
  83: 
  84:     /**
  85:      * Construct a new subset of characters.
  86:      *
  87:      * @param name the name of the subset
  88:      * @throws NullPointerException if name is null
  89:      */
  90:     protected Subset(String name)
  91:     {
  92:       // Note that name.toString() is name, unless name was null.
  93:       this.name = name.toString();
  94:     }
  95: 
  96:     /**
  97:      * Compares two Subsets for equality. This is <code>final</code>, and
  98:      * restricts the comparison on the <code>==</code> operator, so it returns
  99:      * true only for the same object.
 100:      *
 101:      * @param o the object to compare
 102:      * @return true if o is this
 103:      */
 104:     public final boolean equals(Object o)
 105:     {
 106:       return o == this;
 107:     }
 108: 
 109:     /**
 110:      * Makes the original hashCode of Object final, to be consistent with
 111:      * equals.
 112:      *
 113:      * @return the hash code for this object
 114:      */
 115:     public final int hashCode()
 116:     {
 117:       return super.hashCode();
 118:     }
 119: 
 120:     /**
 121:      * Returns the name of the subset.
 122:      *
 123:      * @return the name
 124:      */
 125:     public final String toString()
 126:     {
 127:       return name;
 128:     }
 129:   } // class Subset
 130: 
 131:   /**
 132:    * A family of character subsets in the Unicode specification. A character
 133:    * is in at most one of these blocks.
 134:    *
 135:    * This inner class was generated automatically from
 136:    * <code>doc/unicode/Block-3.txt</code>, by some perl scripts.
 137:    * This Unicode definition file can be found on the
 138:    * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
 139:    * JDK 1.4 uses Unicode version 3.0.0.
 140:    *
 141:    * @author scripts/unicode-blocks.pl (written by Eric Blake)
 142:    * @since 1.2
 143:    */
 144:   public static final class UnicodeBlock extends Subset
 145:   {
 146:     /** The start of the subset. */
 147:     private final char start;
 148: 
 149:     /** The end of the subset. */
 150:     private final char end;
 151: 
 152:     /**
 153:      * Constructor for strictly defined blocks.
 154:      *
 155:      * @param start the start character of the range
 156:      * @param end the end character of the range
 157:      * @param name the block name
 158:      */
 159:     private UnicodeBlock(char start, char end, String name)
 160:     {
 161:       super(name);
 162:       this.start = start;
 163:       this.end = end;
 164:     }
 165: 
 166:     /**
 167:      * Returns the Unicode character block which a character belongs to.
 168:      *
 169:      * @param ch the character to look up
 170:      * @return the set it belongs to, or null if it is not in one
 171:      */
 172:     public static UnicodeBlock of(char ch)
 173:     {
 174:       // Special case, since SPECIALS contains two ranges.
 175:       if (ch == '\uFEFF')
 176:         return SPECIALS;
 177:       // Simple binary search for the correct block.
 178:       int low = 0;
 179:       int hi = sets.length - 1;
 180:       while (low <= hi)
 181:         {
 182:           int mid = (low + hi) >> 1;
 183:           UnicodeBlock b = sets[mid];
 184:           if (ch < b.start)
 185:             hi = mid - 1;
 186:           else if (ch > b.end)
 187:             low = mid + 1;
 188:           else
 189:             return b;
 190:         }
 191:       return null;
 192:     }
 193: 
 194:     /**
 195:      * Basic Latin.
 196:      * '\u0000' - '\u007F'.
 197:      */
 198:     public static final UnicodeBlock BASIC_LATIN
 199:       = new UnicodeBlock('\u0000', '\u007F',
 200:                          "BASIC_LATIN");
 201: 
 202:     /**
 203:      * Latin-1 Supplement.
 204:      * '\u0080' - '\u00FF'.
 205:      */
 206:     public static final UnicodeBlock LATIN_1_SUPPLEMENT
 207:       = new UnicodeBlock('\u0080', '\u00FF',
 208:                          "LATIN_1_SUPPLEMENT");
 209: 
 210:     /**
 211:      * Latin Extended-A.
 212:      * '\u0100' - '\u017F'.
 213:      */
 214:     public static final UnicodeBlock LATIN_EXTENDED_A
 215:       = new UnicodeBlock('\u0100', '\u017F',
 216:                          "LATIN_EXTENDED_A");
 217: 
 218:     /**
 219:      * Latin Extended-B.
 220:      * '\u0180' - '\u024F'.
 221:      */
 222:     public static final UnicodeBlock LATIN_EXTENDED_B
 223:       = new UnicodeBlock('\u0180', '\u024F',
 224:                          "LATIN_EXTENDED_B");
 225: 
 226:     /**
 227:      * IPA Extensions.
 228:      * '\u0250' - '\u02AF'.
 229:      */
 230:     public static final UnicodeBlock IPA_EXTENSIONS
 231:       = new UnicodeBlock('\u0250', '\u02AF',
 232:                          "IPA_EXTENSIONS");
 233: 
 234:     /**
 235:      * Spacing Modifier Letters.
 236:      * '\u02B0' - '\u02FF'.
 237:      */
 238:     public static final UnicodeBlock SPACING_MODIFIER_LETTERS
 239:       = new UnicodeBlock('\u02B0', '\u02FF',
 240:                          "SPACING_MODIFIER_LETTERS");
 241: 
 242:     /**
 243:      * Combining Diacritical Marks.
 244:      * '\u0300' - '\u036F'.
 245:      */
 246:     public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
 247:       = new UnicodeBlock('\u0300', '\u036F',
 248:                          "COMBINING_DIACRITICAL_MARKS");
 249: 
 250:     /**
 251:      * Greek.
 252:      * '\u0370' - '\u03FF'.
 253:      */
 254:     public static final UnicodeBlock GREEK
 255:       = new UnicodeBlock('\u0370', '\u03FF',
 256:                          "GREEK");
 257: 
 258:     /**
 259:      * Cyrillic.
 260:      * '\u0400' - '\u04FF'.
 261:      */
 262:     public static final UnicodeBlock CYRILLIC
 263:       = new UnicodeBlock('\u0400', '\u04FF',
 264:                          "CYRILLIC");
 265: 
 266:     /**
 267:      * Armenian.
 268:      * '\u0530' - '\u058F'.
 269:      */
 270:     public static final UnicodeBlock ARMENIAN
 271:       = new UnicodeBlock('\u0530', '\u058F',
 272:                          "ARMENIAN");
 273: 
 274:     /**
 275:      * Hebrew.
 276:      * '\u0590' - '\u05FF'.
 277:      */
 278:     public static final UnicodeBlock HEBREW
 279:       = new UnicodeBlock('\u0590', '\u05FF',
 280:                          "HEBREW");
 281: 
 282:     /**
 283:      * Arabic.
 284:      * '\u0600' - '\u06FF'.
 285:      */
 286:     public static final UnicodeBlock ARABIC
 287:       = new UnicodeBlock('\u0600', '\u06FF',
 288:                          "ARABIC");
 289: 
 290:     /**
 291:      * Syriac.
 292:      * '\u0700' - '\u074F'.
 293:      * @since 1.4
 294:      */
 295:     public static final UnicodeBlock SYRIAC
 296:       = new UnicodeBlock('\u0700', '\u074F',
 297:                          "SYRIAC");
 298: 
 299:     /**
 300:      * Thaana.
 301:      * '\u0780' - '\u07BF'.
 302:      * @since 1.4
 303:      */
 304:     public static final UnicodeBlock THAANA
 305:       = new UnicodeBlock('\u0780', '\u07BF',
 306:                          "THAANA");
 307: 
 308:     /**
 309:      * Devanagari.
 310:      * '\u0900' - '\u097F'.
 311:      */
 312:     public static final UnicodeBlock DEVANAGARI
 313:       = new UnicodeBlock('\u0900', '\u097F',
 314:                          "DEVANAGARI");
 315: 
 316:     /**
 317:      * Bengali.
 318:      * '\u0980' - '\u09FF'.
 319:      */
 320:     public static final UnicodeBlock BENGALI
 321:       = new UnicodeBlock('\u0980', '\u09FF',
 322:                          "BENGALI");
 323: 
 324:     /**
 325:      * Gurmukhi.
 326:      * '\u0A00' - '\u0A7F'.
 327:      */
 328:     public static final UnicodeBlock GURMUKHI
 329:       = new UnicodeBlock('\u0A00', '\u0A7F',
 330:                          "GURMUKHI");
 331: 
 332:     /**
 333:      * Gujarati.
 334:      * '\u0A80' - '\u0AFF'.
 335:      */
 336:     public static final UnicodeBlock GUJARATI
 337:       = new UnicodeBlock('\u0A80', '\u0AFF',
 338:                          "GUJARATI");
 339: 
 340:     /**
 341:      * Oriya.
 342:      * '\u0B00' - '\u0B7F'.
 343:      */
 344:     public static final UnicodeBlock ORIYA
 345:       = new UnicodeBlock('\u0B00', '\u0B7F',
 346:                          "ORIYA");
 347: 
 348:     /**
 349:      * Tamil.
 350:      * '\u0B80' - '\u0BFF'.
 351:      */
 352:     public static final UnicodeBlock TAMIL
 353:       = new UnicodeBlock('\u0B80', '\u0BFF',
 354:                          "TAMIL");
 355: 
 356:     /**
 357:      * Telugu.
 358:      * '\u0C00' - '\u0C7F'.
 359:      */
 360:     public static final UnicodeBlock TELUGU
 361:       = new UnicodeBlock('\u0C00', '\u0C7F',
 362:                          "TELUGU");
 363: 
 364:     /**
 365:      * Kannada.
 366:      * '\u0C80' - '\u0CFF'.
 367:      */
 368:     public static final UnicodeBlock KANNADA
 369:       = new UnicodeBlock('\u0C80', '\u0CFF',
 370:                          "KANNADA");
 371: 
 372:     /**
 373:      * Malayalam.
 374:      * '\u0D00' - '\u0D7F'.
 375:      */
 376:     public static final UnicodeBlock MALAYALAM
 377:       = new UnicodeBlock('\u0D00', '\u0D7F',
 378:                          "MALAYALAM");
 379: 
 380:     /**
 381:      * Sinhala.
 382:      * '\u0D80' - '\u0DFF'.
 383:      * @since 1.4
 384:      */
 385:     public static final UnicodeBlock SINHALA
 386:       = new UnicodeBlock('\u0D80', '\u0DFF',
 387:                          "SINHALA");
 388: 
 389:     /**
 390:      * Thai.
 391:      * '\u0E00' - '\u0E7F'.
 392:      */
 393:     public static final UnicodeBlock THAI
 394:       = new UnicodeBlock('\u0E00', '\u0E7F',
 395:                          "THAI");
 396: 
 397:     /**
 398:      * Lao.
 399:      * '\u0E80' - '\u0EFF'.
 400:      */
 401:     public static final UnicodeBlock LAO
 402:       = new UnicodeBlock('\u0E80', '\u0EFF',
 403:                          "LAO");
 404: 
 405:     /**
 406:      * Tibetan.
 407:      * '\u0F00' - '\u0FFF'.
 408:      */
 409:     public static final UnicodeBlock TIBETAN
 410:       = new UnicodeBlock('\u0F00', '\u0FFF',
 411:                          "TIBETAN");
 412: 
 413:     /**
 414:      * Myanmar.
 415:      * '\u1000' - '\u109F'.
 416:      * @since 1.4
 417:      */
 418:     public static final UnicodeBlock MYANMAR
 419:       = new UnicodeBlock('\u1000', '\u109F',
 420:                          "MYANMAR");
 421: 
 422:     /**
 423:      * Georgian.
 424:      * '\u10A0' - '\u10FF'.
 425:      */
 426:     public static final UnicodeBlock GEORGIAN
 427:       = new UnicodeBlock('\u10A0', '\u10FF',
 428:                          "GEORGIAN");
 429: 
 430:     /**
 431:      * Hangul Jamo.
 432:      * '\u1100' - '\u11FF'.
 433:      */
 434:     public static final UnicodeBlock HANGUL_JAMO
 435:       = new UnicodeBlock('\u1100', '\u11FF',
 436:                          "HANGUL_JAMO");
 437: 
 438:     /**
 439:      * Ethiopic.
 440:      * '\u1200' - '\u137F'.
 441:      * @since 1.4
 442:      */
 443:     public static final UnicodeBlock ETHIOPIC
 444:       = new UnicodeBlock('\u1200', '\u137F',
 445:                          "ETHIOPIC");
 446: 
 447:     /**
 448:      * Cherokee.
 449:      * '\u13A0' - '\u13FF'.
 450:      * @since 1.4
 451:      */
 452:     public static final UnicodeBlock CHEROKEE
 453:       = new UnicodeBlock('\u13A0', '\u13FF',
 454:                          "CHEROKEE");
 455: 
 456:     /**
 457:      * Unified Canadian Aboriginal Syllabics.
 458:      * '\u1400' - '\u167F'.
 459:      * @since 1.4
 460:      */
 461:     public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
 462:       = new UnicodeBlock('\u1400', '\u167F',
 463:                          "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS");
 464: 
 465:     /**
 466:      * Ogham.
 467:      * '\u1680' - '\u169F'.
 468:      * @since 1.4
 469:      */
 470:     public static final UnicodeBlock OGHAM
 471:       = new UnicodeBlock('\u1680', '\u169F',
 472:                          "OGHAM");
 473: 
 474:     /**
 475:      * Runic.
 476:      * '\u16A0' - '\u16FF'.
 477:      * @since 1.4
 478:      */
 479:     public static final UnicodeBlock RUNIC
 480:       = new UnicodeBlock('\u16A0', '\u16FF',
 481:                          "RUNIC");
 482: 
 483:     /**
 484:      * Khmer.
 485:      * '\u1780' - '\u17FF'.
 486:      * @since 1.4
 487:      */
 488:     public static final UnicodeBlock KHMER
 489:       = new UnicodeBlock('\u1780', '\u17FF',
 490:                          "KHMER");
 491: 
 492:     /**
 493:      * Mongolian.
 494:      * '\u1800' - '\u18AF'.
 495:      * @since 1.4
 496:      */
 497:     public static final UnicodeBlock MONGOLIAN
 498:       = new UnicodeBlock('\u1800', '\u18AF',
 499:                          "MONGOLIAN");
 500: 
 501:     /**
 502:      * Latin Extended Additional.
 503:      * '\u1E00' - '\u1EFF'.
 504:      */
 505:     public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
 506:       = new UnicodeBlock('\u1E00', '\u1EFF',
 507:                          "LATIN_EXTENDED_ADDITIONAL");
 508: 
 509:     /**
 510:      * Greek Extended.
 511:      * '\u1F00' - '\u1FFF'.
 512:      */
 513:     public static final UnicodeBlock GREEK_EXTENDED
 514:       = new UnicodeBlock('\u1F00', '\u1FFF',
 515:                          "GREEK_EXTENDED");
 516: 
 517:     /**
 518:      * General Punctuation.
 519:      * '\u2000' - '\u206F'.
 520:      */
 521:     public static final UnicodeBlock GENERAL_PUNCTUATION
 522:       = new UnicodeBlock('\u2000', '\u206F',
 523:                          "GENERAL_PUNCTUATION");
 524: 
 525:     /**
 526:      * Superscripts and Subscripts.
 527:      * '\u2070' - '\u209F'.
 528:      */
 529:     public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
 530:       = new UnicodeBlock('\u2070', '\u209F',
 531:                          "SUPERSCRIPTS_AND_SUBSCRIPTS");
 532: 
 533:     /**
 534:      * Currency Symbols.
 535:      * '\u20A0' - '\u20CF'.
 536:      */
 537:     public static final UnicodeBlock CURRENCY_SYMBOLS
 538:       = new UnicodeBlock('\u20A0', '\u20CF',
 539:                          "CURRENCY_SYMBOLS");
 540: 
 541:     /**
 542:      * Combining Marks for Symbols.
 543:      * '\u20D0' - '\u20FF'.
 544:      */
 545:     public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
 546:       = new UnicodeBlock('\u20D0', '\u20FF',
 547:                          "COMBINING_MARKS_FOR_SYMBOLS");
 548: 
 549:     /**
 550:      * Letterlike Symbols.
 551:      * '\u2100' - '\u214F'.
 552:      */
 553:     public static final UnicodeBlock LETTERLIKE_SYMBOLS
 554:       = new UnicodeBlock('\u2100', '\u214F',
 555:                          "LETTERLIKE_SYMBOLS");
 556: 
 557:     /**
 558:      * Number Forms.
 559:      * '\u2150' - '\u218F'.
 560:      */
 561:     public static final UnicodeBlock NUMBER_FORMS
 562:       = new UnicodeBlock('\u2150', '\u218F',
 563:                          "NUMBER_FORMS");
 564: 
 565:     /**
 566:      * Arrows.
 567:      * '\u2190' - '\u21FF'.
 568:      */
 569:     public static final UnicodeBlock ARROWS
 570:       = new UnicodeBlock('\u2190', '\u21FF',
 571:                          "ARROWS");
 572: 
 573:     /**
 574:      * Mathematical Operators.
 575:      * '\u2200' - '\u22FF'.
 576:      */
 577:     public static final UnicodeBlock MATHEMATICAL_OPERATORS
 578:       = new UnicodeBlock('\u2200', '\u22FF',
 579:                          "MATHEMATICAL_OPERATORS");
 580: 
 581:     /**
 582:      * Miscellaneous Technical.
 583:      * '\u2300' - '\u23FF'.
 584:      */
 585:     public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
 586:       = new UnicodeBlock('\u2300', '\u23FF',
 587:                          "MISCELLANEOUS_TECHNICAL");
 588: 
 589:     /**
 590:      * Control Pictures.
 591:      * '\u2400' - '\u243F'.
 592:      */
 593:     public static final UnicodeBlock CONTROL_PICTURES
 594:       = new UnicodeBlock('\u2400', '\u243F',
 595:                          "CONTROL_PICTURES");
 596: 
 597:     /**
 598:      * Optical Character Recognition.
 599:      * '\u2440' - '\u245F'.
 600:      */
 601:     public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
 602:       = new UnicodeBlock('\u2440', '\u245F',
 603:                          "OPTICAL_CHARACTER_RECOGNITION");
 604: 
 605:     /**
 606:      * Enclosed Alphanumerics.
 607:      * '\u2460' - '\u24FF'.
 608:      */
 609:     public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
 610:       = new UnicodeBlock('\u2460', '\u24FF',
 611:                          "ENCLOSED_ALPHANUMERICS");
 612: 
 613:     /**
 614:      * Box Drawing.
 615:      * '\u2500' - '\u257F'.
 616:      */
 617:     public static final UnicodeBlock BOX_DRAWING
 618:       = new UnicodeBlock('\u2500', '\u257F',
 619:                          "BOX_DRAWING");
 620: 
 621:     /**
 622:      * Block Elements.
 623:      * '\u2580' - '\u259F'.
 624:      */
 625:     public static final UnicodeBlock BLOCK_ELEMENTS
 626:       = new UnicodeBlock('\u2580', '\u259F',
 627:                          "BLOCK_ELEMENTS");
 628: 
 629:     /**
 630:      * Geometric Shapes.
 631:      * '\u25A0' - '\u25FF'.
 632:      */
 633:     public static final UnicodeBlock GEOMETRIC_SHAPES
 634:       = new UnicodeBlock('\u25A0', '\u25FF',
 635:                          "GEOMETRIC_SHAPES");
 636: 
 637:     /**
 638:      * Miscellaneous Symbols.
 639:      * '\u2600' - '\u26FF'.
 640:      */
 641:     public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
 642:       = new UnicodeBlock('\u2600', '\u26FF',
 643:                          "MISCELLANEOUS_SYMBOLS");
 644: 
 645:     /**
 646:      * Dingbats.
 647:      * '\u2700' - '\u27BF'.
 648:      */
 649:     public static final UnicodeBlock DINGBATS
 650:       = new UnicodeBlock('\u2700', '\u27BF',
 651:                          "DINGBATS");
 652: 
 653:     /**
 654:      * Braille Patterns.
 655:      * '\u2800' - '\u28FF'.
 656:      * @since 1.4
 657:      */
 658:     public static final UnicodeBlock BRAILLE_PATTERNS
 659:       = new UnicodeBlock('\u2800', '\u28FF',
 660:                          "BRAILLE_PATTERNS");
 661: 
 662:     /**
 663:      * CJK Radicals Supplement.
 664:      * '\u2E80' - '\u2EFF'.
 665:      * @since 1.4
 666:      */
 667:     public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
 668:       = new UnicodeBlock('\u2E80', '\u2EFF',
 669:                          "CJK_RADICALS_SUPPLEMENT");
 670: 
 671:     /**
 672:      * Kangxi Radicals.
 673:      * '\u2F00' - '\u2FDF'.
 674:      * @since 1.4
 675:      */
 676:     public static final UnicodeBlock KANGXI_RADICALS
 677:       = new UnicodeBlock('\u2F00', '\u2FDF',
 678:                          "KANGXI_RADICALS");
 679: 
 680:     /**
 681:      * Ideographic Description Characters.
 682:      * '\u2FF0' - '\u2FFF'.
 683:      * @since 1.4
 684:      */
 685:     public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
 686:       = new UnicodeBlock('\u2FF0', '\u2FFF',
 687:                          "IDEOGRAPHIC_DESCRIPTION_CHARACTERS");
 688: 
 689:     /**
 690:      * CJK Symbols and Punctuation.
 691:      * '\u3000' - '\u303F'.
 692:      */
 693:     public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
 694:       = new UnicodeBlock('\u3000', '\u303F',
 695:                          "CJK_SYMBOLS_AND_PUNCTUATION");
 696: 
 697:     /**
 698:      * Hiragana.
 699:      * '\u3040' - '\u309F'.
 700:      */
 701:     public static final UnicodeBlock HIRAGANA
 702:       = new UnicodeBlock('\u3040', '\u309F',
 703:                          "HIRAGANA");
 704: 
 705:     /**
 706:      * Katakana.
 707:      * '\u30A0' - '\u30FF'.
 708:      */
 709:     public static final UnicodeBlock KATAKANA
 710:       = new UnicodeBlock('\u30A0', '\u30FF',
 711:                          "KATAKANA");
 712: 
 713:     /**
 714:      * Bopomofo.
 715:      * '\u3100' - '\u312F'.
 716:      */
 717:     public static final UnicodeBlock BOPOMOFO
 718:       = new UnicodeBlock('\u3100', '\u312F',
 719:                          "BOPOMOFO");
 720: 
 721:     /**
 722:      * Hangul Compatibility Jamo.
 723:      * '\u3130' - '\u318F'.
 724:      */
 725:     public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
 726:       = new UnicodeBlock('\u3130', '\u318F',
 727:                          "HANGUL_COMPATIBILITY_JAMO");
 728: 
 729:     /**
 730:      * Kanbun.
 731:      * '\u3190' - '\u319F'.
 732:      */
 733:     public static final UnicodeBlock KANBUN
 734:       = new UnicodeBlock('\u3190', '\u319F',
 735:                          "KANBUN");
 736: 
 737:     /**
 738:      * Bopomofo Extended.
 739:      * '\u31A0' - '\u31BF'.
 740:      * @since 1.4
 741:      */
 742:     public static final UnicodeBlock BOPOMOFO_EXTENDED
 743:       = new UnicodeBlock('\u31A0', '\u31BF',
 744:                          "BOPOMOFO_EXTENDED");
 745: 
 746:     /**
 747:      * Enclosed CJK Letters and Months.
 748:      * '\u3200' - '\u32FF'.
 749:      */
 750:     public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
 751:       = new UnicodeBlock('\u3200', '\u32FF',
 752:                          "ENCLOSED_CJK_LETTERS_AND_MONTHS");
 753: 
 754:     /**
 755:      * CJK Compatibility.
 756:      * '\u3300' - '\u33FF'.
 757:      */
 758:     public static final UnicodeBlock CJK_COMPATIBILITY
 759:       = new UnicodeBlock('\u3300', '\u33FF',
 760:                          "CJK_COMPATIBILITY");
 761: 
 762:     /**
 763:      * CJK Unified Ideographs Extension A.
 764:      * '\u3400' - '\u4DB5'.
 765:      * @since 1.4
 766:      */
 767:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
 768:       = new UnicodeBlock('\u3400', '\u4DB5',
 769:                          "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A");
 770: 
 771:     /**
 772:      * CJK Unified Ideographs.
 773:      * '\u4E00' - '\u9FFF'.
 774:      */
 775:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
 776:       = new UnicodeBlock('\u4E00', '\u9FFF',
 777:                          "CJK_UNIFIED_IDEOGRAPHS");
 778: 
 779:     /**
 780:      * Yi Syllables.
 781:      * '\uA000' - '\uA48F'.
 782:      * @since 1.4
 783:      */
 784:     public static final UnicodeBlock YI_SYLLABLES
 785:       = new UnicodeBlock('\uA000', '\uA48F',
 786:                          "YI_SYLLABLES");
 787: 
 788:     /**
 789:      * Yi Radicals.
 790:      * '\uA490' - '\uA4CF'.
 791:      * @since 1.4
 792:      */
 793:     public static final UnicodeBlock YI_RADICALS
 794:       = new UnicodeBlock('\uA490', '\uA4CF',
 795:                          "YI_RADICALS");
 796: 
 797:     /**
 798:      * Hangul Syllables.
 799:      * '\uAC00' - '\uD7A3'.
 800:      */
 801:     public static final UnicodeBlock HANGUL_SYLLABLES
 802:       = new UnicodeBlock('\uAC00', '\uD7A3',
 803:                          "HANGUL_SYLLABLES");
 804: 
 805:     /**
 806:      * Surrogates Area.
 807:      * '\uD800' - '\uDFFF'.
 808:      */
 809:     public static final UnicodeBlock SURROGATES_AREA
 810:       = new UnicodeBlock('\uD800', '\uDFFF',
 811:                          "SURROGATES_AREA");
 812: 
 813:     /**
 814:      * Private Use Area.
 815:      * '\uE000' - '\uF8FF'.
 816:      */
 817:     public static final UnicodeBlock PRIVATE_USE_AREA
 818:       = new UnicodeBlock('\uE000', '\uF8FF',
 819:                          "PRIVATE_USE_AREA");
 820: 
 821:     /**
 822:      * CJK Compatibility Ideographs.
 823:      * '\uF900' - '\uFAFF'.
 824:      */
 825:     public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
 826:       = new UnicodeBlock('\uF900', '\uFAFF',
 827:                          "CJK_COMPATIBILITY_IDEOGRAPHS");
 828: 
 829:     /**
 830:      * Alphabetic Presentation Forms.
 831:      * '\uFB00' - '\uFB4F'.
 832:      */
 833:     public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
 834:       = new UnicodeBlock('\uFB00', '\uFB4F',
 835:                          "ALPHABETIC_PRESENTATION_FORMS");
 836: 
 837:     /**
 838:      * Arabic Presentation Forms-A.
 839:      * '\uFB50' - '\uFDFF'.
 840:      */
 841:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
 842:       = new UnicodeBlock('\uFB50', '\uFDFF',
 843:                          "ARABIC_PRESENTATION_FORMS_A");
 844: 
 845:     /**
 846:      * Combining Half Marks.
 847:      * '\uFE20' - '\uFE2F'.
 848:      */
 849:     public static final UnicodeBlock COMBINING_HALF_MARKS
 850:       = new UnicodeBlock('\uFE20', '\uFE2F',
 851:                          "COMBINING_HALF_MARKS");
 852: 
 853:     /**
 854:      * CJK Compatibility Forms.
 855:      * '\uFE30' - '\uFE4F'.
 856:      */
 857:     public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
 858:       = new UnicodeBlock('\uFE30', '\uFE4F',
 859:                          "CJK_COMPATIBILITY_FORMS");
 860: 
 861:     /**
 862:      * Small Form Variants.
 863:      * '\uFE50' - '\uFE6F'.
 864:      */
 865:     public static final UnicodeBlock SMALL_FORM_VARIANTS
 866:       = new UnicodeBlock('\uFE50', '\uFE6F',
 867:                          "SMALL_FORM_VARIANTS");
 868: 
 869:     /**
 870:      * Arabic Presentation Forms-B.
 871:      * '\uFE70' - '\uFEFE'.
 872:      */
 873:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
 874:       = new UnicodeBlock('\uFE70', '\uFEFE',
 875:                          "ARABIC_PRESENTATION_FORMS_B");
 876: 
 877:     /**
 878:      * Halfwidth and Fullwidth Forms.
 879:      * '\uFF00' - '\uFFEF'.
 880:      */
 881:     public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
 882:       = new UnicodeBlock('\uFF00', '\uFFEF',
 883:                          "HALFWIDTH_AND_FULLWIDTH_FORMS");
 884: 
 885:     /**
 886:      * Specials.
 887:      * '\uFEFF', '\uFFF0' - '\uFFFD'.
 888:      */
 889:     public static final UnicodeBlock SPECIALS
 890:       = new UnicodeBlock('\uFFF0', '\uFFFD',
 891:                          "SPECIALS");
 892: 
 893:     /**
 894:      * The defined subsets.
 895:      */
 896:     private static final UnicodeBlock sets[] = {
 897:       BASIC_LATIN,
 898:       LATIN_1_SUPPLEMENT,
 899:       LATIN_EXTENDED_A,
 900:       LATIN_EXTENDED_B,
 901:       IPA_EXTENSIONS,
 902:       SPACING_MODIFIER_LETTERS,
 903:       COMBINING_DIACRITICAL_MARKS,
 904:       GREEK,
 905:       CYRILLIC,
 906:       ARMENIAN,
 907:       HEBREW,
 908:       ARABIC,
 909:       SYRIAC,
 910:       THAANA,
 911:       DEVANAGARI,
 912:       BENGALI,
 913:       GURMUKHI,
 914:       GUJARATI,
 915:       ORIYA,
 916:       TAMIL,
 917:       TELUGU,
 918:       KANNADA,
 919:       MALAYALAM,
 920:       SINHALA,
 921:       THAI,
 922:       LAO,
 923:       TIBETAN,
 924:       MYANMAR,
 925:       GEORGIAN,
 926:       HANGUL_JAMO,
 927:       ETHIOPIC,
 928:       CHEROKEE,
 929:       UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
 930:       OGHAM,
 931:       RUNIC,
 932:       KHMER,
 933:       MONGOLIAN,
 934:       LATIN_EXTENDED_ADDITIONAL,
 935:       GREEK_EXTENDED,
 936:       GENERAL_PUNCTUATION,
 937:       SUPERSCRIPTS_AND_SUBSCRIPTS,
 938:       CURRENCY_SYMBOLS,
 939:       COMBINING_MARKS_FOR_SYMBOLS,
 940:       LETTERLIKE_SYMBOLS,
 941:       NUMBER_FORMS,
 942:       ARROWS,
 943:       MATHEMATICAL_OPERATORS,
 944:       MISCELLANEOUS_TECHNICAL,
 945:       CONTROL_PICTURES,
 946:       OPTICAL_CHARACTER_RECOGNITION,
 947:       ENCLOSED_ALPHANUMERICS,
 948:       BOX_DRAWING,
 949:       BLOCK_ELEMENTS,
 950:       GEOMETRIC_SHAPES,
 951:       MISCELLANEOUS_SYMBOLS,
 952:       DINGBATS,
 953:       BRAILLE_PATTERNS,
 954:       CJK_RADICALS_SUPPLEMENT,
 955:       KANGXI_RADICALS,
 956:       IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
 957:       CJK_SYMBOLS_AND_PUNCTUATION,
 958:       HIRAGANA,
 959:       KATAKANA,
 960:       BOPOMOFO,
 961:       HANGUL_COMPATIBILITY_JAMO,
 962:       KANBUN,
 963:       BOPOMOFO_EXTENDED,
 964:       ENCLOSED_CJK_LETTERS_AND_MONTHS,
 965:       CJK_COMPATIBILITY,
 966:       CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
 967:       CJK_UNIFIED_IDEOGRAPHS,
 968:       YI_SYLLABLES,
 969:       YI_RADICALS,
 970:       HANGUL_SYLLABLES,
 971:       SURROGATES_AREA,
 972:       PRIVATE_USE_AREA,
 973:       CJK_COMPATIBILITY_IDEOGRAPHS,
 974:       ALPHABETIC_PRESENTATION_FORMS,
 975:       ARABIC_PRESENTATION_FORMS_A,
 976:       COMBINING_HALF_MARKS,
 977:       CJK_COMPATIBILITY_FORMS,
 978:       SMALL_FORM_VARIANTS,
 979:       ARABIC_PRESENTATION_FORMS_B,
 980:       HALFWIDTH_AND_FULLWIDTH_FORMS,
 981:       SPECIALS,
 982:     };
 983:   } // class UnicodeBlock
 984: 
 985:   /**
 986:    * The immutable value of this Character.
 987:    *
 988:    * @serial the value of this Character
 989:    */
 990:   private final char value;
 991: 
 992:   /**
 993:    * Compatible with JDK 1.0+.
 994:    */
 995:   private static final long serialVersionUID = 3786198910865385080L;
 996: 
 997:   /**
 998:    * Smallest value allowed for radix arguments in Java. This value is 2.
 999:    *
1000:    * @see #digit(char, int)
1001:    * @see #forDigit(int, int)
1002:    * @see Integer#toString(int, int)
1003:    * @see Integer#valueOf(String)
1004:    */
1005:   public static final int MIN_RADIX = 2;
1006: 
1007:   /**
1008:    * Largest value allowed for radix arguments in Java. This value is 36.
1009:    *
1010:    * @see #digit(char, int)
1011:    * @see #forDigit(int, int)
1012:    * @see Integer#toString(int, int)
1013:    * @see Integer#valueOf(String)
1014:    */
1015:   public static final int MAX_RADIX = 36;
1016: 
1017:   /**
1018:    * The minimum value the char data type can hold.
1019:    * This value is <code>'\\u0000'</code>.
1020:    */
1021:   public static final char MIN_VALUE = '\u0000';
1022: 
1023:   /**
1024:    * The maximum value the char data type can hold.
1025:    * This value is <code>'\\uFFFF'</code>.
1026:    */
1027:   public static final char MAX_VALUE = '\uFFFF';
1028: 
1029:   /**
1030:    * Class object representing the primitive char data type.
1031:    *
1032:    * @since 1.1
1033:    */
1034:   public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
1035: 
1036:   /**
1037:    * Lu = Letter, Uppercase (Informative).
1038:    *
1039:    * @since 1.1
1040:    */
1041:   public static final byte UPPERCASE_LETTER = 1;
1042: 
1043:   /**
1044:    * Ll = Letter, Lowercase (Informative).
1045:    *
1046:    * @since 1.1
1047:    */
1048:   public static final byte LOWERCASE_LETTER = 2;
1049: 
1050:   /**
1051:    * Lt = Letter, Titlecase (Informative).
1052:    *
1053:    * @since 1.1
1054:    */
1055:   public static final byte TITLECASE_LETTER = 3;
1056: 
1057:   /**
1058:    * Mn = Mark, Non-Spacing (Normative).
1059:    *
1060:    * @since 1.1
1061:    */
1062:   public static final byte NON_SPACING_MARK = 6;
1063: 
1064:   /**
1065:    * Mc = Mark, Spacing Combining (Normative).
1066:    *
1067:    * @since 1.1
1068:    */
1069:   public static final byte COMBINING_SPACING_MARK = 8;
1070: 
1071:   /**
1072:    * Me = Mark, Enclosing (Normative).
1073:    *
1074:    * @since 1.1
1075:    */
1076:   public static final byte ENCLOSING_MARK = 7;
1077: 
1078:   /**
1079:    * Nd = Number, Decimal Digit (Normative).
1080:    *
1081:    * @since 1.1
1082:    */
1083:   public static final byte DECIMAL_DIGIT_NUMBER = 9;
1084: 
1085:   /**
1086:    * Nl = Number, Letter (Normative).
1087:    *
1088:    * @since 1.1
1089:    */
1090:   public static final byte LETTER_NUMBER = 10;
1091: 
1092:   /**
1093:    * No = Number, Other (Normative).
1094:    *
1095:    * @since 1.1
1096:    */
1097:   public static final byte OTHER_NUMBER = 11;
1098: 
1099:   /**
1100:    * Zs = Separator, Space (Normative).
1101:    *
1102:    * @since 1.1
1103:    */
1104:   public static final byte SPACE_SEPARATOR = 12;
1105: 
1106:   /**
1107:    * Zl = Separator, Line (Normative).
1108:    *
1109:    * @since 1.1
1110:    */
1111:   public static final byte LINE_SEPARATOR = 13;
1112: 
1113:   /**
1114:    * Zp = Separator, Paragraph (Normative).
1115:    *
1116:    * @since 1.1
1117:    */
1118:   public static final byte PARAGRAPH_SEPARATOR = 14;
1119: 
1120:   /**
1121:    * Cc = Other, Control (Normative).
1122:    *
1123:    * @since 1.1
1124:    */
1125:   public static final byte CONTROL = 15;
1126: 
1127:   /**
1128:    * Cf = Other, Format (Normative).
1129:    *
1130:    * @since 1.1
1131:    */
1132:   public static final byte FORMAT = 16;
1133: 
1134:   /**
1135:    * Cs = Other, Surrogate (Normative).
1136:    *
1137:    * @since 1.1
1138:    */
1139:   public static final byte SURROGATE = 19;
1140: 
1141:   /**
1142:    * Co = Other, Private Use (Normative).
1143:    *
1144:    * @since 1.1
1145:    */
1146:   public static final byte PRIVATE_USE = 18;
1147: 
1148:   /**
1149:    * Cn = Other, Not Assigned (Normative).
1150:    *
1151:    * @since 1.1
1152:    */
1153:   public static final byte UNASSIGNED = 0;
1154: 
1155:   /**
1156:    * Lm = Letter, Modifier (Informative).
1157:    *
1158:    * @since 1.1
1159:    */
1160:   public static final byte MODIFIER_LETTER = 4;
1161: 
1162:   /**
1163:    * Lo = Letter, Other (Informative).
1164:    *
1165:    * @since 1.1
1166:    */
1167:   public static final byte OTHER_LETTER = 5;
1168: 
1169:   /**
1170:    * Pc = Punctuation, Connector (Informative).
1171:    *
1172:    * @since 1.1
1173:    */
1174:   public static final byte CONNECTOR_PUNCTUATION = 23;
1175: 
1176:   /**
1177:    * Pd = Punctuation, Dash (Informative).
1178:    *
1179:    * @since 1.1
1180:    */
1181:   public static final byte DASH_PUNCTUATION = 20;
1182: 
1183:   /**
1184:    * Ps = Punctuation, Open (Informative).
1185:    *
1186:    * @since 1.1
1187:    */
1188:   public static final byte START_PUNCTUATION = 21;
1189: 
1190:   /**
1191:    * Pe = Punctuation, Close (Informative).
1192:    *
1193:    * @since 1.1
1194:    */
1195:   public static final byte END_PUNCTUATION = 22;
1196: 
1197:   /**
1198:    * Pi = Punctuation, Initial Quote (Informative).
1199:    *
1200:    * @since 1.4
1201:    */
1202:   public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
1203: 
1204:   /**
1205:    * Pf = Punctuation, Final Quote (Informative).
1206:    *
1207:    * @since 1.4
1208:    */
1209:   public static final byte FINAL_QUOTE_PUNCTUATION = 30;
1210: 
1211:   /**
1212:    * Po = Punctuation, Other (Informative).
1213:    *
1214:    * @since 1.1
1215:    */
1216:   public static final byte OTHER_PUNCTUATION = 24;
1217: 
1218:   /**
1219:    * Sm = Symbol, Math (Informative).
1220:    *
1221:    * @since 1.1
1222:    */
1223:   public static final byte MATH_SYMBOL = 25;
1224: 
1225:   /**
1226:    * Sc = Symbol, Currency (Informative).
1227:    *
1228:    * @since 1.1
1229:    */
1230:   public static final byte CURRENCY_SYMBOL = 26;
1231: 
1232:   /**
1233:    * Sk = Symbol, Modifier (Informative).
1234:    *
1235:    * @since 1.1
1236:    */
1237:   public static final byte MODIFIER_SYMBOL = 27;
1238: 
1239:   /**
1240:    * So = Symbol, Other (Informative).
1241:    *
1242:    * @since 1.1
1243:    */
1244:   public static final byte OTHER_SYMBOL = 28;
1245: 
1246:   /**
1247:    * Undefined bidirectional character type. Undefined char values have
1248:    * undefined directionality in the Unicode specification.
1249:    *
1250:    * @since 1.4
1251:    */
1252:   public static final byte DIRECTIONALITY_UNDEFINED = -1;
1253: 
1254:   /**
1255:    * Strong bidirectional character type "L".
1256:    *
1257:    * @since 1.4
1258:    */
1259:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
1260: 
1261:   /**
1262:    * Strong bidirectional character type "R".
1263:    *
1264:    * @since 1.4
1265:    */
1266:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
1267: 
1268:   /**
1269:    * Strong bidirectional character type "AL".
1270:    *
1271:    * @since 1.4
1272:    */
1273:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
1274: 
1275:   /**
1276:    * Weak bidirectional character type "EN".
1277:    *
1278:    * @since 1.4
1279:    */
1280:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
1281: 
1282:   /**
1283:    * Weak bidirectional character type "ES".
1284:    *
1285:    * @since 1.4
1286:    */
1287:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
1288: 
1289:   /**
1290:    * Weak bidirectional character type "ET".
1291:    *
1292:    * @since 1.4
1293:    */
1294:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
1295: 
1296:   /**
1297:    * Weak bidirectional character type "AN".
1298:    *
1299:    * @since 1.4
1300:    */
1301:   public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
1302: 
1303:   /**
1304:    * Weak bidirectional character type "CS".
1305:    *
1306:    * @since 1.4
1307:    */
1308:   public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
1309: 
1310:   /**
1311:    * Weak bidirectional character type "NSM".
1312:    *
1313:    * @since 1.4
1314:    */
1315:   public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
1316: 
1317:   /**
1318:    * Weak bidirectional character type "BN".
1319:    *
1320:    * @since 1.4
1321:    */
1322:   public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
1323: 
1324:   /**
1325:    * Neutral bidirectional character type "B".
1326:    *
1327:    * @since 1.4
1328:    */
1329:   public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
1330: 
1331:   /**
1332:    * Neutral bidirectional character type "S".
1333:    *
1334:    * @since 1.4
1335:    */
1336:   public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
1337: 
1338:   /**
1339:    * Strong bidirectional character type "WS".
1340:    *
1341:    * @since 1.4
1342:    */
1343:   public static final byte DIRECTIONALITY_WHITESPACE = 12;
1344: 
1345:   /**
1346:    * Neutral bidirectional character type "ON".
1347:    *
1348:    * @since 1.4
1349:    */
1350:   public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
1351: 
1352:   /**
1353:    * Strong bidirectional character type "LRE".
1354:    *
1355:    * @since 1.4
1356:    */
1357:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
1358: 
1359:   /**
1360:    * Strong bidirectional character type "LRO".
1361:    *
1362:    * @since 1.4
1363:    */
1364:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
1365: 
1366:   /**
1367:    * Strong bidirectional character type "RLE".
1368:    *
1369:    * @since 1.4
1370:    */
1371:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
1372: 
1373:   /**
1374:    * Strong bidirectional character type "RLO".
1375:    *
1376:    * @since 1.4
1377:    */
1378:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
1379: 
1380:   /**
1381:    * Weak bidirectional character type "PDF".
1382:    *
1383:    * @since 1.4
1384:    */
1385:   public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
1386: 
1387:   /**
1388:    * Stores unicode block offset lookup table. Exploit package visibility of
1389:    * String.value to avoid copying the array.
1390:    * @see #readChar(char)
1391:    * @see CharData#BLOCKS
1392:    */
1393:   private static final char[] blocks = String.zeroBasedStringValue(CharData.BLOCKS);
1394: 
1395:   /**
1396:    * Stores unicode attribute offset lookup table. Exploit package visibility
1397:    * of String.value to avoid copying the array.
1398:    * @see CharData#DATA
1399:    */
1400:   private static final char[] data = String.zeroBasedStringValue(CharData.DATA);
1401: 
1402:   /**
1403:    * Stores unicode numeric value attribute table. Exploit package visibility
1404:    * of String.value to avoid copying the array.
1405:    * @see CharData#NUM_VALUE
1406:    */
1407:   private static final char[] numValue
1408:       = String.zeroBasedStringValue(CharData.NUM_VALUE);
1409: 
1410:   /**
1411:    * Stores unicode uppercase attribute table. Exploit package visibility
1412:    * of String.value to avoid copying the array.
1413:    * @see CharData#UPPER
1414:    */
1415:   private static final char[] upper = String.zeroBasedStringValue(CharData.UPPER);
1416: 
1417:   /**
1418:    * Stores unicode lowercase attribute table. Exploit package visibility
1419:    * of String.value to avoid copying the array.
1420:    * @see CharData#LOWER
1421:    */
1422:   private static final char[] lower = String.zeroBasedStringValue(CharData.LOWER);
1423: 
1424:   /**
1425:    * Stores unicode direction attribute table. Exploit package visibility
1426:    * of String.value to avoid copying the array.
1427:    * @see CharData#DIRECTION
1428:    */
1429:   // Package visible for use by String.
1430:   static final char[] direction = String.zeroBasedStringValue(CharData.DIRECTION);
1431: 
1432:   /**
1433:    * Stores unicode titlecase table. Exploit package visibility of
1434:    * String.value to avoid copying the array.
1435:    * @see CharData#TITLE
1436:    */
1437:   private static final char[] title = String.zeroBasedStringValue(CharData.TITLE);
1438: 
1439:   /**
1440:    * Mask for grabbing the type out of the contents of data.
1441:    * @see CharData#DATA
1442:    */
1443:   private static final int TYPE_MASK = 0x1F;
1444: 
1445:   /**
1446:    * Mask for grabbing the non-breaking space flag out of the contents of
1447:    * data.
1448:    * @see CharData#DATA
1449:    */
1450:   private static final int NO_BREAK_MASK = 0x20;
1451: 
1452:   /**
1453:    * Mask for grabbing the mirrored directionality flag out of the contents
1454:    * of data.
1455:    * @see CharData#DATA
1456:    */
1457:   private static final int MIRROR_MASK = 0x40;
1458: 
1459:   /**
1460:    * Grabs an attribute offset from the Unicode attribute database. The lower
1461:    * 5 bits are the character type, the next 2 bits are flags, and the top
1462:    * 9 bits are the offset into the attribute tables.
1463:    *
1464:    * @param ch the character to look up
1465:    * @return the character's attribute offset and type
1466:    * @see #TYPE_MASK
1467:    * @see #NO_BREAK_MASK
1468:    * @see #MIRROR_MASK
1469:    * @see CharData#DATA
1470:    * @see CharData#SHIFT
1471:    */
1472:   // Package visible for use in String.
1473:   static char readChar(char ch)
1474:   {
1475:     // Perform 16-bit addition to find the correct entry in data.
1476:     return data[(char) (blocks[ch >> CharData.SHIFT] + ch)];
1477:   }
1478: 
1479:   /**
1480:    * Wraps up a character.
1481:    *
1482:    * @param value the character to wrap
1483:    */
1484:   public Character(char value)
1485:   {
1486:     this.value = value;
1487:   }
1488: 
1489:   /**
1490:    * Returns the character which has been wrapped by this class.
1491:    *
1492:    * @return the character wrapped
1493:    */
1494:   public char charValue()
1495:   {
1496:     return value;
1497:   }
1498: 
1499:   /**
1500:    * Returns the numerical value (unsigned) of the wrapped character.
1501:    * Range of returned values: 0x0000-0xFFFF.
1502:    *
1503:    * @return the value of the wrapped character
1504:    */
1505:   public int hashCode()
1506:   {
1507:     return value;
1508:   }
1509: 
1510:   /**
1511:    * Determines if an object is equal to this object. This is only true for
1512:    * another Character object wrapping the same value.
1513:    *
1514:    * @param o object to compare
1515:    * @return true if o is a Character with the same value
1516:    */
1517:   public boolean equals(Object o)
1518:   {
1519:     return o instanceof Character && value == ((Character) o).value;
1520:   }
1521: 
1522:   /**
1523:    * Converts the wrapped character into a String.
1524:    *
1525:    * @return a String containing one character -- the wrapped character
1526:    *         of this instance
1527:    */
1528:   public String toString()
1529:   {
1530:     // Package constructor avoids an array copy.
1531:     return new String(new char[] { value }, 0, 1, true);
1532:   }
1533: 
1534:   /**
1535:    * Returns a String of length 1 representing the specified character.
1536:    *
1537:    * @param ch the character to convert
1538:    * @return a String containing the character
1539:    * @since 1.4
1540:    */
1541:   public static String toString(char ch)
1542:   {
1543:     // Package constructor avoids an array copy.
1544:     return new String(new char[] { ch }, 0, 1, true);
1545:   }
1546: 
1547:   /**
1548:    * Determines if a character is a Unicode lowercase letter. For example,
1549:    * <code>'a'</code> is lowercase.
1550:    * <br>
1551:    * lowercase = [Ll]
1552:    *
1553:    * @param ch character to test
1554:    * @return true if ch is a Unicode lowercase letter, else false
1555:    * @see #isUpperCase(char)
1556:    * @see #isTitleCase(char)
1557:    * @see #toLowerCase(char)
1558:    * @see #getType(char)
1559:    */
1560:   public static boolean isLowerCase(char ch)
1561:   {
1562:     return getType(ch) == LOWERCASE_LETTER;
1563:   }
1564: 
1565:   /**
1566:    * Determines if a character is a Unicode uppercase letter. For example,
1567:    * <code>'A'</code> is uppercase.
1568:    * <br>
1569:    * uppercase = [Lu]
1570:    *
1571:    * @param ch character to test
1572:    * @return true if ch is a Unicode uppercase letter, else false
1573:    * @see #isLowerCase(char)
1574:    * @see #isTitleCase(char)
1575:    * @see #toUpperCase(char)
1576:    * @see #getType(char)
1577:    */
1578:   public static boolean isUpperCase(char ch)
1579:   {
1580:     return getType(ch) == UPPERCASE_LETTER;
1581:   }
1582: 
1583:   /**
1584:    * Determines if a character is a Unicode titlecase letter. For example,
1585:    * the character "Lj" (Latin capital L with small letter j) is titlecase.
1586:    * <br>
1587:    * titlecase = [Lt]
1588:    *
1589:    * @param ch character to test
1590:    * @return true if ch is a Unicode titlecase letter, else false
1591:    * @see #isLowerCase(char)
1592:    * @see #isUpperCase(char)
1593:    * @see #toTitleCase(char)
1594:    * @see #getType(char)
1595:    */
1596:   public static boolean isTitleCase(char ch)
1597:   {
1598:     return getType(ch) == TITLECASE_LETTER;
1599:   }
1600: 
1601:   /**
1602:    * Determines if a character is a Unicode decimal digit. For example,
1603:    * <code>'0'</code> is a digit.
1604:    * <br>
1605:    * Unicode decimal digit = [Nd]
1606:    *
1607:    * @param ch character to test
1608:    * @return true if ch is a Unicode decimal digit, else false
1609:    * @see #digit(char, int)
1610:    * @see #forDigit(int, int)
1611:    * @see #getType(char)
1612:    */
1613:   public static boolean isDigit(char ch)
1614:   {
1615:     return getType(ch) == DECIMAL_DIGIT_NUMBER;
1616:   }
1617: 
1618:   /**
1619:    * Determines if a character is part of the Unicode Standard. This is an
1620:    * evolving standard, but covers every character in the data file.
1621:    * <br>
1622:    * defined = not [Cn]
1623:    *
1624:    * @param ch character to test
1625:    * @return true if ch is a Unicode character, else false
1626:    * @see #isDigit(char)
1627:    * @see #isLetter(char)
1628:    * @see #isLetterOrDigit(char)
1629:    * @see #isLowerCase(char)
1630:    * @see #isTitleCase(char)
1631:    * @see #isUpperCase(char)
1632:    */
1633:   public static boolean isDefined(char ch)
1634:   {
1635:     return getType(ch) != UNASSIGNED;
1636:   }
1637: 
1638:   /**
1639:    * Determines if a character is a Unicode letter. Not all letters have case,
1640:    * so this may return true when isLowerCase and isUpperCase return false.
1641:    * <br>
1642:    * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
1643:    *
1644:    * @param ch character to test
1645:    * @return true if ch is a Unicode letter, else false
1646:    * @see #isDigit(char)
1647:    * @see #isJavaIdentifierStart(char)
1648:    * @see #isJavaLetter(char)
1649:    * @see #isJavaLetterOrDigit(char)
1650:    * @see #isLetterOrDigit(char)
1651:    * @see #isLowerCase(char)
1652:    * @see #isTitleCase(char)
1653:    * @see #isUnicodeIdentifierStart(char)
1654:    * @see #isUpperCase(char)
1655:    */
1656:   public static boolean isLetter(char ch)
1657:   {
1658:     return ((1 << getType(ch))
1659:             & ((1 << UPPERCASE_LETTER)
1660:                | (1 << LOWERCASE_LETTER)
1661:                | (1 << TITLECASE_LETTER)
1662:                | (1 << MODIFIER_LETTER)
1663:                | (1 << OTHER_LETTER))) != 0;
1664:   }
1665: 
1666:   /**
1667:    * Determines if a character is a Unicode letter or a Unicode digit. This
1668:    * is the combination of isLetter and isDigit.
1669:    * <br>
1670:    * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
1671:    *
1672:    * @param ch character to test
1673:    * @return true if ch is a Unicode letter or a Unicode digit, else false
1674:    * @see #isDigit(char)
1675:    * @see #isJavaIdentifierPart(char)
1676:    * @see #isJavaLetter(char)
1677:    * @see #isJavaLetterOrDigit(char)
1678:    * @see #isLetter(char)
1679:    * @see #isUnicodeIdentifierPart(char)
1680:    */
1681:   public static boolean isLetterOrDigit(char ch)
1682:   {
1683:     return ((1 << getType(ch))
1684:             & ((1 << UPPERCASE_LETTER)
1685:                | (1 << LOWERCASE_LETTER)
1686:                | (1 << TITLECASE_LETTER)
1687:                | (1 << MODIFIER_LETTER)
1688:                | (1 << OTHER_LETTER)
1689:                | (1 << DECIMAL_DIGIT_NUMBER))) != 0;
1690:   }
1691: 
1692:   /**
1693:    * Determines if a character can start a Java identifier. This is the
1694:    * combination of isLetter, any character where getType returns
1695:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
1696:    * (like '_').
1697:    *
1698:    * @param ch character to test
1699:    * @return true if ch can start a Java identifier, else false
1700:    * @deprecated Replaced by {@link #isJavaIdentifierStart(char)}
1701:    * @see #isJavaLetterOrDigit(char)
1702:    * @see #isJavaIdentifierStart(char)
1703:    * @see #isJavaIdentifierPart(char)
1704:    * @see #isLetter(char)
1705:    * @see #isLetterOrDigit(char)
1706:    * @see #isUnicodeIdentifierStart(char)
1707:    */
1708:   public static boolean isJavaLetter(char ch)
1709:   {
1710:     return isJavaIdentifierStart(ch);
1711:   }
1712: 
1713:   /**
1714:    * Determines if a character can follow the first letter in
1715:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
1716:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
1717:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
1718:    * or isIdentifierIgnorable.
1719:    *
1720:    * @param ch character to test
1721:    * @return true if ch can follow the first letter in a Java identifier
1722:    * @deprecated Replaced by {@link #isJavaIdentifierPart(char)}
1723:    * @see #isJavaLetter(char)
1724:    * @see #isJavaIdentifierStart(char)
1725:    * @see #isJavaIdentifierPart(char)
1726:    * @see #isLetter(char)
1727:    * @see #isLetterOrDigit(char)
1728:    * @see #isUnicodeIdentifierPart(char)
1729:    * @see #isIdentifierIgnorable(char)
1730:    */
1731:   public static boolean isJavaLetterOrDigit(char ch)
1732:   {
1733:     return isJavaIdentifierPart(ch);
1734:   }
1735: 
1736:   /**
1737:    * Determines if a character can start a Java identifier. This is the
1738:    * combination of isLetter, any character where getType returns
1739:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
1740:    * (like '_').
1741:    * <br>
1742:    * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
1743:    *
1744:    * @param ch character to test
1745:    * @return true if ch can start a Java identifier, else false
1746:    * @see #isJavaIdentifierPart(char)
1747:    * @see #isLetter(char)
1748:    * @see #isUnicodeIdentifierStart(char)
1749:    * @since 1.1
1750:    */
1751:   public static boolean isJavaIdentifierStart(char ch)
1752:   {
1753:     return ((1 << getType(ch))
1754:             & ((1 << UPPERCASE_LETTER)
1755:                | (1 << LOWERCASE_LETTER)
1756:                | (1 << TITLECASE_LETTER)
1757:                | (1 << MODIFIER_LETTER)
1758:                | (1 << OTHER_LETTER)
1759:                | (1 << LETTER_NUMBER)
1760:                | (1 << CURRENCY_SYMBOL)
1761:                | (1 << CONNECTOR_PUNCTUATION))) != 0;
1762:   }
1763: 
1764:   /**
1765:    * Determines if a character can follow the first letter in
1766:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
1767:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
1768:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
1769:    * or isIdentifierIgnorable.
1770:    * <br>
1771:    * Java identifier extender =
1772:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
1773:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
1774:    *
1775:    * @param ch character to test
1776:    * @return true if ch can follow the first letter in a Java identifier
1777:    * @see #isIdentifierIgnorable(char)
1778:    * @see #isJavaIdentifierStart(char)
1779:    * @see #isLetterOrDigit(char)
1780:    * @see #isUnicodeIdentifierPart(char)
1781:    * @since 1.1
1782:    */
1783:   public static boolean isJavaIdentifierPart(char ch)
1784:   {
1785:     int category = getType(ch);
1786:     return ((1 << category)
1787:             & ((1 << UPPERCASE_LETTER)
1788:                | (1 << LOWERCASE_LETTER)
1789:                | (1 << TITLECASE_LETTER)
1790:                | (1 << MODIFIER_LETTER)
1791:                | (1 << OTHER_LETTER)
1792:                | (1 << NON_SPACING_MARK)
1793:                | (1 << COMBINING_SPACING_MARK)
1794:                | (1 << DECIMAL_DIGIT_NUMBER)
1795:                | (1 << LETTER_NUMBER)
1796:                | (1 << CURRENCY_SYMBOL)
1797:                | (1 << CONNECTOR_PUNCTUATION)
1798:                | (1 << FORMAT))) != 0
1799:       || (category == CONTROL && isIdentifierIgnorable(ch));
1800:   }
1801: 
1802:   /**
1803:    * Determines if a character can start a Unicode identifier.  Only
1804:    * letters can start a Unicode identifier, but this includes characters
1805:    * in LETTER_NUMBER.
1806:    * <br>
1807:    * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
1808:    *
1809:    * @param ch character to test
1810:    * @return true if ch can start a Unicode identifier, else false
1811:    * @see #isJavaIdentifierStart(char)
1812:    * @see #isLetter(char)
1813:    * @see #isUnicodeIdentifierPart(char)
1814:    * @since 1.1
1815:    */
1816:   public static boolean isUnicodeIdentifierStart(char ch)
1817:   {
1818:     return ((1 << getType(ch))
1819:             & ((1 << UPPERCASE_LETTER)
1820:                | (1 << LOWERCASE_LETTER)
1821:                | (1 << TITLECASE_LETTER)
1822:                | (1 << MODIFIER_LETTER)
1823:                | (1 << OTHER_LETTER)
1824:                | (1 << LETTER_NUMBER))) != 0;
1825:   }
1826: 
1827:   /**
1828:    * Determines if a character can follow the first letter in
1829:    * a Unicode identifier. This includes letters, connecting punctuation,
1830:    * digits, numeric letters, combining marks, non-spacing marks, and
1831:    * isIdentifierIgnorable.
1832:    * <br>
1833:    * Unicode identifier extender =
1834:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
1835:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
1836:    *
1837:    * @param ch character to test
1838:    * @return true if ch can follow the first letter in a Unicode identifier
1839:    * @see #isIdentifierIgnorable(char)
1840:    * @see #isJavaIdentifierPart(char)
1841:    * @see #isLetterOrDigit(char)
1842:    * @see #isUnicodeIdentifierStart(char)
1843:    * @since 1.1
1844:    */
1845:   public static boolean isUnicodeIdentifierPart(char ch)
1846:   {
1847:     int category = getType(ch);
1848:     return ((1 << category)
1849:             & ((1 << UPPERCASE_LETTER)
1850:                | (1 << LOWERCASE_LETTER)
1851:                | (1 << TITLECASE_LETTER)
1852:                | (1 << MODIFIER_LETTER)
1853:                | (1 << OTHER_LETTER)
1854:                | (1 << NON_SPACING_MARK)
1855:                | (1 << COMBINING_SPACING_MARK)
1856:                | (1 << DECIMAL_DIGIT_NUMBER)
1857:                | (1 << LETTER_NUMBER)
1858:                | (1 << CONNECTOR_PUNCTUATION)
1859:                | (1 << FORMAT))) != 0
1860:       || (category == CONTROL && isIdentifierIgnorable(ch));
1861:   }
1862: 
1863:   /**
1864:    * Determines if a character is ignorable in a Unicode identifier. This
1865:    * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
1866:    * through <code>'\u0008'</code>, <code>'\u000E'</code> through
1867:    * <code>'\u001B'</code>, and <code>'\u007F'</code> through
1868:    * <code>'\u009F'</code>), and FORMAT characters.
1869:    * <br>
1870:    * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
1871:    *    |U+007F-U+009F
1872:    *
1873:    * @param ch character to test
1874:    * @return true if ch is ignorable in a Unicode or Java identifier
1875:    * @see #isJavaIdentifierPart(char)
1876:    * @see #isUnicodeIdentifierPart(char)
1877:    * @since 1.1
1878:    */
1879:   public static boolean isIdentifierIgnorable(char ch)
1880:   {
1881:     return (ch <= '\u009F' && (ch < '\t' || ch >= '\u007F'
1882:                                || (ch <= '\u001B' && ch >= '\u000E')))
1883:       || getType(ch) == FORMAT;
1884:   }
1885: 
1886:   /**
1887:    * Converts a Unicode character into its lowercase equivalent mapping.
1888:    * If a mapping does not exist, then the character passed is returned.
1889:    * Note that isLowerCase(toLowerCase(ch)) does not always return true.
1890:    *
1891:    * @param ch character to convert to lowercase
1892:    * @return lowercase mapping of ch, or ch if lowercase mapping does
1893:    *         not exist
1894:    * @see #isLowerCase(char)
1895:    * @see #isUpperCase(char)
1896:    * @see #toTitleCase(char)
1897:    * @see #toUpperCase(char)
1898:    */
1899:   public static char toLowerCase(char ch)
1900:   {
1901:     // Signedness doesn't matter, as result is cast back to char.
1902:     return (char) (ch + lower[readChar(ch) >> 7]);
1903:   }
1904: 
1905:   /**
1906:    * Converts a Unicode character into its uppercase equivalent mapping.
1907:    * If a mapping does not exist, then the character passed is returned.
1908:    * Note that isUpperCase(toUpperCase(ch)) does not always return true.
1909:    *
1910:    * @param ch character to convert to uppercase
1911:    * @return uppercase mapping of ch, or ch if uppercase mapping does
1912:    *         not exist
1913:    * @see #isLowerCase(char)
1914:    * @see #isUpperCase(char)
1915:    * @see #toLowerCase(char)
1916:    * @see #toTitleCase(char)
1917:    */
1918:   public static char toUpperCase(char ch)
1919:   {
1920:     // Signedness doesn't matter, as result is cast back to char.
1921:     return (char) (ch + upper[readChar(ch) >> 7]);
1922:   }
1923: 
1924:   /**
1925:    * Converts a Unicode character into its titlecase equivalent mapping.
1926:    * If a mapping does not exist, then the character passed is returned.
1927:    * Note that isTitleCase(toTitleCase(ch)) does not always return true.
1928:    *
1929:    * @param ch character to convert to titlecase
1930:    * @return titlecase mapping of ch, or ch if titlecase mapping does
1931:    *         not exist
1932:    * @see #isTitleCase(char)
1933:    * @see #toLowerCase(char)
1934:    * @see #toUpperCase(char)
1935:    */
1936:   public static char toTitleCase(char ch)
1937:   {
1938:     // As title is short, it doesn't hurt to exhaustively iterate over it.
1939:     for (int i = title.length - 2; i >= 0; i -= 2)
1940:       if (title[i] == ch)
1941:         return title[i + 1];
1942:     return toUpperCase(ch);
1943:   }
1944: 
1945:   /**
1946:    * Converts a character into a digit of the specified radix. If the radix
1947:    * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
1948:    * exceeds the radix, or if ch is not a decimal digit or in the case
1949:    * insensitive set of 'a'-'z', the result is -1.
1950:    * <br>
1951:    * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
1952:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
1953:    *
1954:    * @param ch character to convert into a digit
1955:    * @param radix radix in which ch is a digit
1956:    * @return digit which ch represents in radix, or -1 not a valid digit
1957:    * @see #MIN_RADIX
1958:    * @see #MAX_RADIX
1959:    * @see #forDigit(int, int)
1960:    * @see #isDigit(char)
1961:    * @see #getNumericValue(char)
1962:    */
1963:   public static int digit(char ch, int radix)
1964:   {
1965:     if (radix < MIN_RADIX || radix > MAX_RADIX)
1966:       return -1;
1967:     char attr = readChar(ch);
1968:     if (((1 << (attr & TYPE_MASK))
1969:          & ((1 << UPPERCASE_LETTER)
1970:             | (1 << LOWERCASE_LETTER)
1971:             | (1 << DECIMAL_DIGIT_NUMBER))) != 0)
1972:       {
1973:         // Signedness doesn't matter; 0xffff vs. -1 are both rejected.
1974:         int digit = numValue[attr >> 7];
1975:         return (digit < radix) ? digit : -1;
1976:       }
1977:     return -1;
1978:   }
1979: 
1980:   /**
1981:    * Returns the Unicode numeric value property of a character. For example,
1982:    * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
1983:    *
1984:    * <p>This method also returns values for the letters A through Z, (not
1985:    * specified by Unicode), in these ranges: <code>'\u0041'</code>
1986:    * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
1987:    * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
1988:    * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
1989:    * <code>'\uFF5A'</code> (full width variants).
1990:    *
1991:    * <p>If the character lacks a numeric value property, -1 is returned.
1992:    * If the character has a numeric value property which is not representable
1993:    * as a nonnegative integer, such as a fraction, -2 is returned.
1994:    *
1995:    * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
1996:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
1997:    *
1998:    * @param ch character from which the numeric value property will
1999:    *        be retrieved
2000:    * @return the numeric value property of ch, or -1 if it does not exist, or
2001:    *         -2 if it is not representable as a nonnegative integer
2002:    * @see #forDigit(int, int)
2003:    * @see #digit(char, int)
2004:    * @see #isDigit(char)
2005:    * @since 1.1
2006:    */
2007:   public static int getNumericValue(char ch)
2008:   {
2009:     // Treat numValue as signed.
2010:     return (short) numValue[readChar(ch) >> 7];
2011:   }
2012: 
2013:   /**
2014:    * Determines if a character is a ISO-LATIN-1 space. This is only the five
2015:    * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>,
2016:    * <code>'\r'</code>, and <code>' '</code>.
2017:    * <br>
2018:    * Java space = U+0020|U+0009|U+000A|U+000C|U+000D
2019:    *
2020:    * @param ch character to test
2021:    * @return true if ch is a space, else false
2022:    * @deprecated Replaced by {@link #isWhitespace(char)}
2023:    * @see #isSpaceChar(char)
2024:    * @see #isWhitespace(char)
2025:    */
2026:   public static boolean isSpace(char ch)
2027:   {
2028:     // Performing the subtraction up front alleviates need to compare longs.
2029:     return ch-- <= ' ' && ((1 << ch)
2030:                            & ((1 << (' ' - 1))
2031:                               | (1 << ('\t' - 1))
2032:                               | (1 << ('\n' - 1))
2033:                               | (1 << ('\r' - 1))
2034:                               | (1 << ('\f' - 1)))) != 0;
2035:   }
2036: 
2037:   /**
2038:    * Determines if a character is a Unicode space character. This includes
2039:    * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
2040:    * <br>
2041:    * Unicode space = [Zs]|[Zp]|[Zl]
2042:    *
2043:    * @param ch character to test
2044:    * @return true if ch is a Unicode space, else false
2045:    * @see #isWhitespace(char)
2046:    * @since 1.1
2047:    */
2048:   public static boolean isSpaceChar(char ch)
2049:   {
2050:     return ((1 << getType(ch))
2051:             & ((1 << SPACE_SEPARATOR)
2052:                | (1 << LINE_SEPARATOR)
2053:                | (1 << PARAGRAPH_SEPARATOR))) != 0;
2054:   }
2055: 
2056:   /**
2057:    * Determines if a character is Java whitespace. This includes Unicode
2058:    * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
2059:    * PARAGRAPH_SEPARATOR) except the non-breaking spaces
2060:    * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
2061:    * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
2062:    * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
2063:    * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
2064:    * and <code>'\u001F'</code>.
2065:    * <br>
2066:    * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
2067:    *
2068:    * @param ch character to test
2069:    * @return true if ch is Java whitespace, else false
2070:    * @see #isSpaceChar(char)
2071:    * @since 1.1
2072:    */
2073:   public static boolean isWhitespace(char ch)
2074:   {
2075:     int attr = readChar(ch);
2076:     return ((((1 << (attr & TYPE_MASK))
2077:               & ((1 << SPACE_SEPARATOR)
2078:                  | (1 << LINE_SEPARATOR)
2079:                  | (1 << PARAGRAPH_SEPARATOR))) != 0)
2080:             && (attr & NO_BREAK_MASK) == 0)
2081:       || (ch <= '\u001F' && ((1 << ch)
2082:                              & ((1 << '\t')
2083:                                 | (1 << '\n')
2084:                                 | (1 << '\u000B')
2085:                                 | (1 << '\u000C')
2086:                                 | (1 << '\r')
2087:                                 | (1 << '\u001C')
2088:                                 | (1 << '\u001D')
2089:                                 | (1 << '\u001E')
2090:                                 | (1 << '\u001F'))) != 0);
2091:   }
2092: 
2093:   /**
2094:    * Determines if a character has the ISO Control property.
2095:    * <br>
2096:    * ISO Control = [Cc]
2097:    *
2098:    * @param ch character to test
2099:    * @return true if ch is an ISO Control character, else false
2100:    * @see #isSpaceChar(char)
2101:    * @see #isWhitespace(char)
2102:    * @since 1.1
2103:    */
2104:   public static boolean isISOControl(char ch)
2105:   {
2106:     return getType(ch) == CONTROL;
2107:   }
2108: 
2109:   /**
2110:    * Returns the Unicode general category property of a character.
2111:    *
2112:    * @param ch character from which the general category property will
2113:    *        be retrieved
2114:    * @return the character category property of ch as an integer
2115:    * @see #UNASSIGNED
2116:    * @see #UPPERCASE_LETTER
2117:    * @see #LOWERCASE_LETTER
2118:    * @see #TITLECASE_LETTER
2119:    * @see #MODIFIER_LETTER
2120:    * @see #OTHER_LETTER
2121:    * @see #NON_SPACING_MARK
2122:    * @see #ENCLOSING_MARK
2123:    * @see #COMBINING_SPACING_MARK
2124:    * @see #DECIMAL_DIGIT_NUMBER
2125:    * @see #LETTER_NUMBER
2126:    * @see #OTHER_NUMBER
2127:    * @see #SPACE_SEPARATOR
2128:    * @see #LINE_SEPARATOR
2129:    * @see #PARAGRAPH_SEPARATOR
2130:    * @see #CONTROL
2131:    * @see #FORMAT
2132:    * @see #PRIVATE_USE
2133:    * @see #SURROGATE
2134:    * @see #DASH_PUNCTUATION
2135:    * @see #START_PUNCTUATION
2136:    * @see #END_PUNCTUATION
2137:    * @see #CONNECTOR_PUNCTUATION
2138:    * @see #OTHER_PUNCTUATION
2139:    * @see #MATH_SYMBOL
2140:    * @see #CURRENCY_SYMBOL
2141:    * @see #MODIFIER_SYMBOL
2142:    * @see #INITIAL_QUOTE_PUNCTUATION
2143:    * @see #FINAL_QUOTE_PUNCTUATION
2144:    * @since 1.1
2145:    */
2146:   public static int getType(char ch)
2147:   {
2148:     return readChar(ch) & TYPE_MASK;
2149:   }
2150: 
2151:   /**
2152:    * Converts a digit into a character which represents that digit
2153:    * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX,
2154:    * or the digit exceeds the radix, then the null character <code>'\0'</code>
2155:    * is returned.  Otherwise the return value is in '0'-'9' and 'a'-'z'.
2156:    * <br>
2157:    * return value boundary = U+0030-U+0039|U+0061-U+007A
2158:    *
2159:    * @param digit digit to be converted into a character
2160:    * @param radix radix of digit
2161:    * @return character representing digit in radix, or '\0'
2162:    * @see #MIN_RADIX
2163:    * @see #MAX_RADIX
2164:    * @see #digit(char, int)
2165:    */
2166:   public static char forDigit(int digit, int radix)
2167:   {
2168:     if (radix < MIN_RADIX || radix > MAX_RADIX
2169:         || digit < 0 || digit >= radix)
2170:       return '\0';
2171:     return Number.digits[digit];
2172:   }
2173: 
2174:   /**
2175:    * Returns the Unicode directionality property of the character. This
2176:    * is used in the visual ordering of text.
2177:    *
2178:    * @param ch the character to look up
2179:    * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
2180:    * @see #DIRECTIONALITY_UNDEFINED
2181:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT
2182:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT
2183:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
2184:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER
2185:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
2186:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
2187:    * @see #DIRECTIONALITY_ARABIC_NUMBER
2188:    * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
2189:    * @see #DIRECTIONALITY_NONSPACING_MARK
2190:    * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
2191:    * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
2192:    * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
2193:    * @see #DIRECTIONALITY_WHITESPACE
2194:    * @see #DIRECTIONALITY_OTHER_NEUTRALS
2195:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
2196:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
2197:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
2198:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
2199:    * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
2200:    * @since 1.4
2201:    */
2202:   public static byte getDirectionality(char ch)
2203:   {
2204:     // The result will correctly be signed.
2205:     return (byte) (direction[readChar(ch) >> 7] >> 2);
2206:   }
2207: 
2208:   /**
2209:    * Determines whether the character is mirrored according to Unicode. For
2210:    * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
2211:    * left-to-right text, but ')' in right-to-left text.
2212:    *
2213:    * @param ch the character to look up
2214:    * @return true if the character is mirrored
2215:    * @since 1.4
2216:    */
2217:   public static boolean isMirrored(char ch)
2218:   {
2219:     return (readChar(ch) & MIRROR_MASK) != 0;
2220:   }
2221: 
2222:   /**
2223:    * Compares another Character to this Character, numerically.
2224:    *
2225:    * @param anotherCharacter Character to compare with this Character
2226:    * @return a negative integer if this Character is less than
2227:    *         anotherCharacter, zero if this Character is equal, and
2228:    *         a positive integer if this Character is greater
2229:    * @throws NullPointerException if anotherCharacter is null
2230:    * @since 1.2
2231:    */
2232:   public int compareTo(Character anotherCharacter)
2233:   {
2234:     return value - anotherCharacter.value;
2235:   }
2236: 
2237:   /**
2238:    * Compares an object to this Character.  Assuming the object is a
2239:    * Character object, this method performs the same comparison as
2240:    * compareTo(Character).
2241:    *
2242:    * @param o object to compare
2243:    * @return the comparison value
2244:    * @throws ClassCastException if o is not a Character object
2245:    * @throws NullPointerException if o is null
2246:    * @see #compareTo(Character)
2247:    * @since 1.2
2248:    */
2249:   public int compareTo(Object o)
2250:   {
2251:     return compareTo((Character) o);
2252:   }
2253: } // class Character