001 /* CollationElementIterator.java -- Walks through collation elements 002 Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004 Free Software Foundation 003 004 This file is part of GNU Classpath. 005 006 GNU Classpath is free software; you can redistribute it and/or modify 007 it under the terms of the GNU General Public License as published by 008 the Free Software Foundation; either version 2, or (at your option) 009 any later version. 010 011 GNU Classpath is distributed in the hope that it will be useful, but 012 WITHOUT ANY WARRANTY; without even the implied warranty of 013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 General Public License for more details. 015 016 You should have received a copy of the GNU General Public License 017 along with GNU Classpath; see the file COPYING. If not, write to the 018 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 019 02110-1301 USA. 020 021 Linking this library statically or dynamically with other modules is 022 making a combined work based on this library. Thus, the terms and 023 conditions of the GNU General Public License cover the whole 024 combination. 025 026 As a special exception, the copyright holders of this library give you 027 permission to link this library with independent modules to produce an 028 executable, regardless of the license terms of these independent 029 modules, and to copy and distribute the resulting executable under 030 terms of your choice, provided that you also meet, for each linked 031 independent module, the terms and conditions of the license of that 032 module. An independent module is a module which is not derived from 033 or based on this library. If you modify this library, you may extend 034 this exception to your version of the library, but you are not 035 obligated to do so. If you do not wish to do so, delete this 036 exception statement from your version. */ 037 038 039 package java.text; 040 041 import java.util.ArrayList; 042 043 /* Written using "Java Class Libraries", 2nd edition, plus online 044 * API docs for JDK 1.2 from http://www.javasoft.com. 045 * Status: Believed complete and correct to JDK 1.1. 046 */ 047 048 /** 049 * This class walks through the character collation elements of a 050 * <code>String</code> as defined by the collation rules in an instance of 051 * <code>RuleBasedCollator</code>. There is no public constructor for 052 * this class. An instance is created by calling the 053 * <code>getCollationElementIterator</code> method on 054 * <code>RuleBasedCollator</code>. 055 * 056 * @author Aaron M. Renn (arenn@urbanophile.com) 057 * @author Tom Tromey (tromey@cygnus.com) 058 * @author Guilhem Lavaux (guilhem.lavaux@free.fr) 059 */ 060 public final class CollationElementIterator 061 { 062 /** 063 * This is a constant value that is returned to indicate that the end of 064 * the string was encountered. 065 */ 066 public static final int NULLORDER = -1; 067 068 /** 069 * This is the RuleBasedCollator this object was created from. 070 */ 071 RuleBasedCollator collator; 072 073 /** 074 * This is the String that is being iterated over. 075 */ 076 String text; 077 078 /** 079 * This is the index into the collation decomposition where we are currently scanning. 080 */ 081 int index; 082 083 /** 084 * This is the index into the String where we are currently scanning. 085 */ 086 int textIndex; 087 088 /** 089 * Array containing the collation decomposition of the 090 * text given to the constructor. 091 */ 092 private RuleBasedCollator.CollationElement[] text_decomposition; 093 094 /** 095 * Array containing the index of the specified block. 096 */ 097 private int[] text_indexes; 098 099 /** 100 * This method initializes a new instance of <code>CollationElementIterator</code> 101 * to iterate over the specified <code>String</code> using the rules in the 102 * specified <code>RuleBasedCollator</code>. 103 * 104 * @param collator The <code>RuleBasedCollation</code> used for calculating collation values 105 * @param text The <code>String</code> to iterate over. 106 */ 107 CollationElementIterator(RuleBasedCollator collator, String text) 108 { 109 this.collator = collator; 110 111 setText (text); 112 } 113 114 RuleBasedCollator.CollationElement nextBlock() 115 { 116 if (index >= text_decomposition.length) 117 return null; 118 119 RuleBasedCollator.CollationElement e = text_decomposition[index]; 120 121 textIndex = text_indexes[index+1]; 122 123 index++; 124 125 return e; 126 } 127 128 RuleBasedCollator.CollationElement previousBlock() 129 { 130 if (index == 0) 131 return null; 132 133 index--; 134 RuleBasedCollator.CollationElement e = text_decomposition[index]; 135 136 textIndex = text_indexes[index+1]; 137 138 return e; 139 } 140 141 /** 142 * This method returns the collation ordering value of the next character sequence 143 * in the string (it may be an extended character following collation rules). 144 * This method will return <code>NULLORDER</code> if the 145 * end of the string was reached. 146 * 147 * @return The collation ordering value. 148 */ 149 public int next() 150 { 151 RuleBasedCollator.CollationElement e = nextBlock(); 152 153 if (e == null) 154 return NULLORDER; 155 156 return e.getValue(); 157 } 158 159 /** 160 * This method returns the collation ordering value of the previous character 161 * in the string. This method will return <code>NULLORDER</code> if the 162 * beginning of the string was reached. 163 * 164 * @return The collation ordering value. 165 */ 166 public int previous() 167 { 168 RuleBasedCollator.CollationElement e = previousBlock(); 169 170 if (e == null) 171 return NULLORDER; 172 173 return e.getValue(); 174 } 175 176 /** 177 * This method returns the primary order value for the given collation 178 * value. 179 * 180 * @param order The collation value returned from <code>next()</code> or 181 * <code>previous()</code>. 182 * 183 * @return The primary order value of the specified collation value. This is 184 * the high 16 bits. 185 */ 186 public static int primaryOrder(int order) 187 { 188 // From the JDK 1.2 spec. 189 return order >>> 16; 190 } 191 192 /** 193 * This method resets the internal position pointer to read from the 194 * beginning of the <code>String</code> again. 195 */ 196 public void reset() 197 { 198 index = 0; 199 textIndex = 0; 200 } 201 202 /** 203 * This method returns the secondary order value for the given collation 204 * value. 205 * 206 * @param order The collation value returned from <code>next()</code> or 207 * <code>previous()</code>. 208 * 209 * @return The secondary order value of the specified collation value. This 210 * is the bits 8-15. 211 */ 212 public static short secondaryOrder(int order) 213 { 214 // From the JDK 1.2 spec. 215 return (short) ((order >>> 8) & 255); 216 } 217 218 /** 219 * This method returns the tertiary order value for the given collation 220 * value. 221 * 222 * @param order The collation value returned from <code>next()</code> or 223 * <code>previous()</code>. 224 * 225 * @return The tertiary order value of the specified collation value. This 226 * is the low eight bits. 227 */ 228 public static short tertiaryOrder(int order) 229 { 230 // From the JDK 1.2 spec. 231 return (short) (order & 255); 232 } 233 234 /** 235 * This method sets the <code>String</code> that it is iterating over 236 * to the specified <code>String</code>. 237 * 238 * @param text The new <code>String</code> to iterate over. 239 * 240 * @since 1.2 241 */ 242 public void setText(String text) 243 { 244 int idx = 0; 245 int idx_idx = 0; 246 int alreadyExpanded = 0; 247 int idxToMove = 0; 248 249 this.text = text; 250 this.index = 0; 251 252 String work_text = text.intern(); 253 254 ArrayList a_element = new ArrayList(); 255 ArrayList a_idx = new ArrayList(); 256 257 // Build element collection ordered as they come in "text". 258 while (idx < work_text.length()) 259 { 260 String key, key_old; 261 262 Object object = null; 263 int p = 1; 264 265 // IMPROVE: use a TreeMap with a prefix-ordering rule. 266 key_old = key = null; 267 do 268 { 269 if (object != null) 270 key_old = key; 271 key = work_text.substring (idx, idx+p); 272 object = collator.prefix_tree.get (key); 273 if (object != null && idx < alreadyExpanded) 274 { 275 RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object; 276 if (prefix.expansion != null && 277 prefix.expansion.startsWith(work_text.substring(0, idx))) 278 { 279 object = null; 280 key = key_old; 281 } 282 } 283 p++; 284 } 285 while (idx+p <= work_text.length()); 286 287 if (object == null) 288 key = key_old; 289 290 RuleBasedCollator.CollationElement prefix = 291 (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key); 292 293 /* 294 * First case: There is no such sequence in the database. 295 * We will have to build one from the context. 296 */ 297 if (prefix == null) 298 { 299 /* 300 * We are dealing with sequences in an expansion. They 301 * are treated as accented characters (tertiary order). 302 */ 303 if (alreadyExpanded > 0) 304 { 305 RuleBasedCollator.CollationElement e = 306 collator.getDefaultAccentedElement (work_text.charAt (idx)); 307 308 a_element.add (e); 309 a_idx.add (new Integer(idx_idx)); 310 idx++; 311 alreadyExpanded--; 312 if (alreadyExpanded == 0) 313 { 314 /* There is not any characters left in the expansion set. 315 * We can increase the pointer in the source string. 316 */ 317 idx_idx += idxToMove; 318 idxToMove = 0; 319 } 320 else 321 idx_idx++; 322 } 323 else 324 { 325 /* This is a normal character. */ 326 RuleBasedCollator.CollationElement e = 327 collator.getDefaultElement (work_text.charAt (idx)); 328 Integer i_ref = new Integer(idx_idx); 329 330 /* Don't forget to mark it as a special sequence so the 331 * string can be ordered. 332 */ 333 a_element.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ); 334 a_idx.add (i_ref); 335 a_element.add (e); 336 a_idx.add (i_ref); 337 idx_idx++; 338 idx++; 339 } 340 continue; 341 } 342 343 /* 344 * Second case: Here we have found a matching sequence. 345 * Here we have an expansion string prepend it to the "work text" and 346 * add the corresponding sorting element. We must also mark 347 */ 348 if (prefix.expansion != null) 349 { 350 work_text = prefix.expansion 351 + work_text.substring (idx+prefix.key.length()); 352 idx = 0; 353 a_element.add (prefix); 354 a_idx.add (new Integer(idx_idx)); 355 if (alreadyExpanded == 0) 356 idxToMove = prefix.key.length(); 357 alreadyExpanded += prefix.expansion.length()-prefix.key.length(); 358 } 359 else 360 { 361 /* Third case: the simplest. We have got the prefix and it 362 * has not to be expanded. 363 */ 364 a_element.add (prefix); 365 a_idx.add (new Integer(idx_idx)); 366 idx += prefix.key.length(); 367 /* If the sequence is in an expansion, we must decrease the 368 * counter. 369 */ 370 if (alreadyExpanded > 0) 371 { 372 alreadyExpanded -= prefix.key.length(); 373 if (alreadyExpanded == 0) 374 { 375 idx_idx += idxToMove; 376 idxToMove = 0; 377 } 378 } 379 else 380 idx_idx += prefix.key.length(); 381 } 382 } 383 384 text_decomposition = (RuleBasedCollator.CollationElement[]) 385 a_element.toArray(new RuleBasedCollator.CollationElement[a_element.size()]); 386 text_indexes = new int[a_idx.size()+1]; 387 for (int i = 0; i < a_idx.size(); i++) 388 { 389 text_indexes[i] = ((Integer)a_idx.get(i)).intValue(); 390 } 391 text_indexes[a_idx.size()] = text.length(); 392 } 393 394 /** 395 * This method sets the <code>String</code> that it is iterating over 396 * to the <code>String</code> represented by the specified 397 * <code>CharacterIterator</code>. 398 * 399 * @param source The <code>CharacterIterator</code> containing the new 400 * <code>String</code> to iterate over. 401 */ 402 public void setText(CharacterIterator source) 403 { 404 StringBuffer expand = new StringBuffer(); 405 406 // For now assume we read from the beginning of the string. 407 for (char c = source.first(); 408 c != CharacterIterator.DONE; 409 c = source.next()) 410 expand.append(c); 411 412 setText(expand.toString()); 413 } 414 415 /** 416 * This method returns the current offset into the <code>String</code> 417 * that is being iterated over. 418 * 419 * @return The iteration index position. 420 * 421 * @since 1.2 422 */ 423 public int getOffset() 424 { 425 return textIndex; 426 } 427 428 /** 429 * This method sets the iteration index position into the current 430 * <code>String</code> to the specified value. This value must not 431 * be negative and must not be greater than the last index position 432 * in the <code>String</code>. 433 * 434 * @param offset The new iteration index position. 435 * 436 * @exception IllegalArgumentException If the new offset is not valid. 437 */ 438 public void setOffset(int offset) 439 { 440 if (offset < 0) 441 throw new IllegalArgumentException("Negative offset: " + offset); 442 443 if (offset > (text.length() - 1)) 444 throw new IllegalArgumentException("Offset too large: " + offset); 445 446 for (index = 0; index < text_decomposition.length; index++) 447 { 448 if (offset <= text_indexes[index]) 449 break; 450 } 451 /* 452 * As text_indexes[0] == 0, we should not have to take care whether index is 453 * greater than 0. It is always. 454 */ 455 if (text_indexes[index] == offset) 456 textIndex = offset; 457 else 458 textIndex = text_indexes[index-1]; 459 } 460 461 /** 462 * This method returns the maximum length of any expansion sequence that 463 * ends with the specified collation order value. (Whatever that means). 464 * 465 * @param value The collation order value 466 * 467 * @return The maximum length of an expansion sequence. 468 */ 469 public int getMaxExpansion(int value) 470 { 471 return 1; 472 } 473 }