Class | REXML::Text |
In: |
lib/rexml/text.rb
|
Parent: | Child |
Represents text nodes in an XML document
SPECIALS | = | [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ] | The order in which the substitutions occur | |
SUBSTITUTES | = | ['&', '<', '>', '"', ''', ' '] | ||
SLAICEPS | = | [ '<', '>', '"', "'", '&' ] | Characters which are substituted in written strings | |
SETUTITSBUS | = | [ /</u, />/u, /"/u, /'/u, /&/u ] | ||
ILLEGAL | = | /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um | ||
NUMERICENTITY | = | /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ | ||
REFERENCE | = | /#{Entity::REFERENCE}/ | ||
EREFERENCE | = | /&(?!#{Entity::NAME};)/ |
Constructor arg if a String, the content is set to the String. If a Text, the object is shallowly cloned.
respect_whitespace (boolean, false) if true, whitespace is respected
parent (nil) if this is a Parent object, the parent will be set to this.
raw (nil) This argument can be given three values. If true, then the value of used to construct this object is expected to contain no unescaped XML markup, and REXML will not change the text. If this value is false, the string may contain any characters, and REXML will escape any and all defined entities whose values are contained in the text. If this value is nil (the default), then the raw value of the parent will be used as the raw value for this node. If there is no raw value for the parent, and no value is supplied, the default is false. Use this field if you have entities defined for some text, and you don‘t want REXML to escape that text in output.
Text.new( "<&", false, nil, false ) #-> "<&" Text.new( "<&", false, nil, false ) #-> "&lt;&amp;" Text.new( "<&", false, nil, true ) #-> Parse exception Text.new( "<&", false, nil, true ) #-> "<&" # Assume that the entity "s" is defined to be "sean" # and that the entity "r" is defined to be "russell" Text.new( "sean russell" ) #-> "&s; &r;" Text.new( "sean russell", false, nil, true ) #-> "sean russell"
entity_filter (nil) This can be an array of entities to match in the supplied text. This argument is only useful if raw is set to false.
Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell" Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
In the last example, the entity_filter argument is ignored.
pattern INTERNAL USE ONLY
# File lib/rexml/text.rb, line 60 60: def initialize(arg, respect_whitespace=false, parent=nil, raw=nil, 61: entity_filter=nil, illegal=ILLEGAL ) 62: 63: @raw = false 64: 65: if parent 66: super( parent ) 67: @raw = parent.raw 68: else 69: @parent = nil 70: end 71: 72: @raw = raw unless raw.nil? 73: @entity_filter = entity_filter 74: @normalized = @unnormalized = nil 75: 76: if arg.kind_of? String 77: @string = arg.clone 78: @string.squeeze!(" \n\t") unless respect_whitespace 79: elsif arg.kind_of? Text 80: @string = arg.to_s 81: @raw = arg.raw 82: elsif 83: raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})" 84: end 85: 86: @string.gsub!( /\r\n?/, "\n" ) 87: 88: # check for illegal characters 89: if @raw 90: if @string =~ illegal 91: raise "Illegal character '#{$1}' in raw string \"#{@string}\"" 92: end 93: end 94: end
Escapes all possible entities
# File lib/rexml/text.rb, line 288 288: def Text::normalize( input, doctype=nil, entity_filter=nil ) 289: copy = input 290: # Doing it like this rather than in a loop improves the speed 291: #copy = copy.gsub( EREFERENCE, '&' ) 292: copy = copy.gsub( "&", "&" ) 293: if doctype 294: # Replace all ampersands that aren't part of an entity 295: doctype.entities.each_value do |entity| 296: copy = copy.gsub( entity.value, 297: "&#{entity.name};" ) if entity.value and 298: not( entity_filter and entity_filter.include?(entity) ) 299: end 300: else 301: # Replace all ampersands that aren't part of an entity 302: DocType::DEFAULT_ENTITIES.each_value do |entity| 303: copy = copy.gsub(entity.value, "&#{entity.name};" ) 304: end 305: end 306: copy 307: end
Reads text, substituting entities
# File lib/rexml/text.rb, line 262 262: def Text::read_with_substitution( input, illegal=nil ) 263: copy = input.clone 264: 265: if copy =~ illegal 266: raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" ) 267: end if illegal 268: 269: copy.gsub!( /\r\n?/, "\n" ) 270: if copy.include? ?& 271: copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] ) 272: copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] ) 273: copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] ) 274: copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] ) 275: copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] ) 276: copy.gsub!( /�*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m| 277: m=$1 278: #m='0' if m=='' 279: m = "0#{m}" if m[0] == ?x 280: [Integer(m)].pack('U*') 281: } 282: end 283: copy 284: end
Unescapes all possible entities
# File lib/rexml/text.rb, line 310 310: def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil ) 311: rv = string.clone 312: rv.gsub!( /\r\n?/, "\n" ) 313: matches = rv.scan( REFERENCE ) 314: return rv if matches.size == 0 315: rv.gsub!( NUMERICENTITY ) {|m| 316: m=$1 317: m = "0#{m}" if m[0] == ?x 318: [Integer(m)].pack('U*') 319: } 320: matches.collect!{|x|x[0]}.compact! 321: if matches.size > 0 322: if doctype 323: matches.each do |entity_reference| 324: unless filter and filter.include?(entity_reference) 325: entity_value = doctype.entity( entity_reference ) 326: re = /&#{entity_reference};/ 327: rv.gsub!( re, entity_value ) if entity_value 328: end 329: end 330: else 331: matches.each do |entity_reference| 332: unless filter and filter.include?(entity_reference) 333: entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ] 334: re = /&#{entity_reference};/ 335: rv.gsub!( re, entity_value.value ) if entity_value 336: end 337: end 338: end 339: rv.gsub!( /&/, '&' ) 340: end 341: rv 342: end
Appends text to this text node. The text is appended in the raw mode of this text node.
# File lib/rexml/text.rb, line 112 112: def <<( to_append ) 113: @string << to_append.gsub( /\r\n?/, "\n" ) 114: end
# File lib/rexml/text.rb, line 202 202: def indent_text(string, level=1, style="\t", indentfirstline=true) 203: return string if level < 0 204: new_string = '' 205: string.each { |line| 206: indent_string = style * level 207: new_line = (indent_string + line).sub(/[\s]+$/,'') 208: new_string << new_line 209: } 210: new_string.strip! unless indentfirstline 211: return new_string 212: end
Returns the string value of this text node. This string is always escaped, meaning that it is a valid XML text node string, and all entities that can be escaped, have been inserted. This method respects the entity filter set in the constructor.
# Assume that the entity "s" is defined to be "sean", and that the # entity "r" is defined to be "russell" t = Text.new( "< & sean russell", false, nil, false, ['s'] ) t.to_s #-> "< & &s; russell" t = Text.new( "< & &s; russell", false, nil, false ) t.to_s #-> "< & &s; russell" u = Text.new( "sean russell", false, nil, true ) u.to_s #-> "sean russell"
# File lib/rexml/text.rb, line 137 137: def to_s 138: return @string if @raw 139: return @normalized if @normalized 140: 141: doctype = nil 142: if @parent 143: doc = @parent.document 144: doctype = doc.doctype if doc 145: end 146: 147: @normalized = Text::normalize( @string, doctype, @entity_filter ) 148: end
Returns the string value of this text. This is the text without entities, as it might be used programmatically, or printed to the console. This ignores the ‘raw’ attribute setting, and any entity_filter.
# Assume that the entity "s" is defined to be "sean", and that the # entity "r" is defined to be "russell" t = Text.new( "< & sean russell", false, nil, false, ['s'] ) t.value #-> "< & sean russell" t = Text.new( "< & &s; russell", false, nil, false ) t.value #-> "< & sean russell" u = Text.new( "sean russell", false, nil, true ) u.value #-> "sean russell"
# File lib/rexml/text.rb, line 167 167: def value 168: @unnormalized if @unnormalized 169: doctype = nil 170: if @parent 171: doc = @parent.document 172: doctype = doc.doctype if doc 173: end 174: @unnormalized = Text::unnormalize( @string, doctype ) 175: end
Sets the contents of this text node. This expects the text to be unnormalized. It returns self.
e = Element.new( "a" ) e.add_text( "foo" ) # <a>foo</a> e[0].value = "bar" # <a>bar</a> e[0].value = "<a>" # <a><a></a>
# File lib/rexml/text.rb, line 184 184: def value=( val ) 185: @string = val.gsub( /\r\n?/, "\n" ) 186: @unnormalized = nil 187: @normalized = nil 188: @raw = false 189: end
# File lib/rexml/text.rb, line 191 191: def wrap(string, width, addnewline=false) 192: # Recursively wrap string at width. 193: return string if string.length <= width 194: place = string.rindex(' ', width) # Position in string with last ' ' before cutoff 195: if addnewline then 196: return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width) 197: else 198: return string[0,place] + "\n" + wrap(string[place+1..-1], width) 199: end 200: end
# File lib/rexml/text.rb, line 217 217: def write( writer, indent=-1, transitive=false, ie_hack=false ) 218: Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters") 219: formatter = if indent > -1 220: REXML::Formatters::Pretty.new( indent ) 221: else 222: REXML::Formatters::Default.new 223: end 224: formatter.write( self, writer ) 225: end
Writes out text, substituting special characters beforehand. out A String, IO, or any other object supporting <<( String ) input the text to substitute and the write out
z=utf8.unpack("U*") ascOut="" z.each{|r| if r < 0x100 ascOut.concat(r.chr) else ascOut.concat(sprintf("&#x%x;", r)) end } puts ascOut
# File lib/rexml/text.rb, line 249 249: def write_with_substitution out, input 250: copy = input.clone 251: # Doing it like this rather than in a loop improves the speed 252: copy.gsub!( SPECIALS[0], SUBSTITUTES[0] ) 253: copy.gsub!( SPECIALS[1], SUBSTITUTES[1] ) 254: copy.gsub!( SPECIALS[2], SUBSTITUTES[2] ) 255: copy.gsub!( SPECIALS[3], SUBSTITUTES[3] ) 256: copy.gsub!( SPECIALS[4], SUBSTITUTES[4] ) 257: copy.gsub!( SPECIALS[5], SUBSTITUTES[5] ) 258: out << copy 259: end