Class REXML::Text
In: lib/rexml/text.rb
Parent: Child

Represents text nodes in an XML document

Methods

Included Modules

Comparable

Constants

SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]   The order in which the substitutions occur
SUBSTITUTES = ['&amp;', '&lt;', '&gt;', '&quot;', '&apos;', '&#13;']
SLAICEPS = [ '<', '>', '"', "'", '&' ]   Characters which are substituted in written strings
SETUTITSBUS = [ /&lt;/u, /&gt;/u, /&quot;/u, /&apos;/u, /&amp;/u ]
ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um
NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
REFERENCE = /#{Entity::REFERENCE}/
EREFERENCE = /&(?!#{Entity::NAME};)/

Attributes

raw  [RW]  If raw is true, then REXML leaves the value alone

Public Class methods

Constructor arg if a String, the content is set to the String. If a Text, the object is shallowly cloned.

respect_whitespace (boolean, false) if true, whitespace is respected

parent (nil) if this is a Parent object, the parent will be set to this.

raw (nil) This argument can be given three values. If true, then the value of used to construct this object is expected to contain no unescaped XML markup, and REXML will not change the text. If this value is false, the string may contain any characters, and REXML will escape any and all defined entities whose values are contained in the text. If this value is nil (the default), then the raw value of the parent will be used as the raw value for this node. If there is no raw value for the parent, and no value is supplied, the default is false. Use this field if you have entities defined for some text, and you don‘t want REXML to escape that text in output.

  Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
  Text.new( "&lt;&amp;", false, nil, false ) #-> "&amp;lt;&amp;amp;"
  Text.new( "<&", false, nil, true )  #-> Parse exception
  Text.new( "&lt;&amp;", false, nil, true )  #-> "&lt;&amp;"
  # Assume that the entity "s" is defined to be "sean"
  # and that the entity    "r" is defined to be "russell"
  Text.new( "sean russell" )          #-> "&s; &r;"
  Text.new( "sean russell", false, nil, true ) #-> "sean russell"

entity_filter (nil) This can be an array of entities to match in the supplied text. This argument is only useful if raw is set to false.

  Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
  Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"

In the last example, the entity_filter argument is ignored.

pattern INTERNAL USE ONLY

[Source]

    # File lib/rexml/text.rb, line 60
60:     def initialize(arg, respect_whitespace=false, parent=nil, raw=nil, 
61:       entity_filter=nil, illegal=ILLEGAL )
62: 
63:       @raw = false
64: 
65:       if parent
66:         super( parent )
67:         @raw = parent.raw 
68:       else
69:         @parent = nil
70:       end
71: 
72:       @raw = raw unless raw.nil?
73:       @entity_filter = entity_filter
74:       @normalized = @unnormalized = nil
75: 
76:       if arg.kind_of? String
77:         @string = arg.clone
78:         @string.squeeze!(" \n\t") unless respect_whitespace
79:       elsif arg.kind_of? Text
80:         @string = arg.to_s
81:         @raw = arg.raw
82:       elsif
83:         raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
84:       end
85: 
86:       @string.gsub!( /\r\n?/, "\n" )
87: 
88:       # check for illegal characters
89:       if @raw
90:         if @string =~ illegal
91:           raise "Illegal character '#{$1}' in raw string \"#{@string}\""
92:         end
93:       end
94:     end

Escapes all possible entities

[Source]

     # File lib/rexml/text.rb, line 288
288:     def Text::normalize( input, doctype=nil, entity_filter=nil )
289:       copy = input
290:       # Doing it like this rather than in a loop improves the speed
291:       #copy = copy.gsub( EREFERENCE, '&amp;' )
292:       copy = copy.gsub( "&", "&amp;" )
293:       if doctype
294:         # Replace all ampersands that aren't part of an entity
295:         doctype.entities.each_value do |entity|
296:           copy = copy.gsub( entity.value, 
297:             "&#{entity.name};" ) if entity.value and 
298:               not( entity_filter and entity_filter.include?(entity) )
299:         end
300:       else
301:         # Replace all ampersands that aren't part of an entity
302:         DocType::DEFAULT_ENTITIES.each_value do |entity|
303:           copy = copy.gsub(entity.value, "&#{entity.name};" )
304:         end
305:       end
306:       copy
307:     end

Reads text, substituting entities

[Source]

     # File lib/rexml/text.rb, line 262
262:     def Text::read_with_substitution( input, illegal=nil )
263:       copy = input.clone
264: 
265:       if copy =~ illegal
266:         raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
267:       end if illegal
268:       
269:       copy.gsub!( /\r\n?/, "\n" )
270:       if copy.include? ?&
271:         copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
272:         copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
273:         copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
274:         copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
275:         copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
276:         copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
277:           m=$1
278:           #m='0' if m==''
279:           m = "0#{m}" if m[0] == ?x
280:           [Integer(m)].pack('U*')
281:         }
282:       end
283:       copy
284:     end

Unescapes all possible entities

[Source]

     # File lib/rexml/text.rb, line 310
310:     def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
311:       rv = string.clone
312:       rv.gsub!( /\r\n?/, "\n" )
313:       matches = rv.scan( REFERENCE )
314:       return rv if matches.size == 0
315:       rv.gsub!( NUMERICENTITY ) {|m|
316:         m=$1
317:         m = "0#{m}" if m[0] == ?x
318:         [Integer(m)].pack('U*')
319:       }
320:       matches.collect!{|x|x[0]}.compact!
321:       if matches.size > 0
322:         if doctype
323:           matches.each do |entity_reference|
324:             unless filter and filter.include?(entity_reference)
325:               entity_value = doctype.entity( entity_reference )
326:               re = /&#{entity_reference};/
327:               rv.gsub!( re, entity_value ) if entity_value
328:             end
329:           end
330:         else
331:           matches.each do |entity_reference|
332:             unless filter and filter.include?(entity_reference)
333:               entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
334:               re = /&#{entity_reference};/
335:               rv.gsub!( re, entity_value.value ) if entity_value
336:             end
337:           end
338:         end
339:         rv.gsub!( /&amp;/, '&' )
340:       end
341:       rv
342:     end

Public Instance methods

Appends text to this text node. The text is appended in the raw mode of this text node.

[Source]

     # File lib/rexml/text.rb, line 112
112:     def <<( to_append )
113:       @string << to_append.gsub( /\r\n?/, "\n" )
114:     end

other a String or a Text returns the result of (to_s <=> arg.to_s)

[Source]

     # File lib/rexml/text.rb, line 119
119:     def <=>( other )
120:       to_s() <=> other.to_s
121:     end

[Source]

     # File lib/rexml/text.rb, line 105
105:     def clone
106:       return Text.new(self)
107:     end

[Source]

     # File lib/rexml/text.rb, line 100
100:     def empty?
101:       @string.size==0
102:     end

[Source]

     # File lib/rexml/text.rb, line 202
202:     def indent_text(string, level=1, style="\t", indentfirstline=true)
203:       return string if level < 0
204:       new_string = ''
205:       string.each { |line|
206:         indent_string = style * level
207:         new_line = (indent_string + line).sub(/[\s]+$/,'')
208:         new_string << new_line
209:       }
210:       new_string.strip! unless indentfirstline
211:       return new_string
212:     end

[Source]

     # File lib/rexml/text.rb, line 150
150:     def inspect
151:       @string.inspect
152:     end

[Source]

    # File lib/rexml/text.rb, line 96
96:     def node_type
97:       :text
98:     end

Returns the string value of this text node. This string is always escaped, meaning that it is a valid XML text node string, and all entities that can be escaped, have been inserted. This method respects the entity filter set in the constructor.

  # Assume that the entity "s" is defined to be "sean", and that the
  # entity "r" is defined to be "russell"
  t = Text.new( "< & sean russell", false, nil, false, ['s'] )
  t.to_s   #-> "&lt; &amp; &s; russell"
  t = Text.new( "< & &s; russell", false, nil, false )
  t.to_s   #-> "&lt; &amp; &s; russell"
  u = Text.new( "sean russell", false, nil, true )
  u.to_s   #-> "sean russell"

[Source]

     # File lib/rexml/text.rb, line 137
137:     def to_s
138:       return @string if @raw
139:       return @normalized if @normalized
140: 
141:       doctype = nil
142:       if @parent
143:         doc = @parent.document
144:         doctype = doc.doctype if doc
145:       end
146: 
147:       @normalized = Text::normalize( @string, doctype, @entity_filter )
148:     end

Returns the string value of this text. This is the text without entities, as it might be used programmatically, or printed to the console. This ignores the ‘raw’ attribute setting, and any entity_filter.

  # Assume that the entity "s" is defined to be "sean", and that the
  # entity "r" is defined to be "russell"
  t = Text.new( "< & sean russell", false, nil, false, ['s'] )
  t.value   #-> "< & sean russell"
  t = Text.new( "< & &s; russell", false, nil, false )
  t.value   #-> "< & sean russell"
  u = Text.new( "sean russell", false, nil, true )
  u.value   #-> "sean russell"

[Source]

     # File lib/rexml/text.rb, line 167
167:     def value
168:       @unnormalized if @unnormalized
169:       doctype = nil
170:       if @parent
171:         doc = @parent.document
172:         doctype = doc.doctype if doc
173:       end
174:       @unnormalized = Text::unnormalize( @string, doctype )
175:     end

Sets the contents of this text node. This expects the text to be unnormalized. It returns self.

  e = Element.new( "a" )
  e.add_text( "foo" )   # <a>foo</a>
  e[0].value = "bar"    # <a>bar</a>
  e[0].value = "<a>"    # <a>&lt;a&gt;</a>

[Source]

     # File lib/rexml/text.rb, line 184
184:     def value=( val )
185:       @string = val.gsub( /\r\n?/, "\n" )
186:       @unnormalized = nil
187:       @normalized = nil
188:       @raw = false
189:     end

[Source]

     # File lib/rexml/text.rb, line 191
191:      def wrap(string, width, addnewline=false)
192:        # Recursively wrap string at width.
193:        return string if string.length <= width
194:        place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
195:        if addnewline then
196:          return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
197:        else
198:          return string[0,place] + "\n" + wrap(string[place+1..-1], width)
199:        end
200:      end

DEPRECATED

See REXML::Formatters

[Source]

     # File lib/rexml/text.rb, line 217
217:     def write( writer, indent=-1, transitive=false, ie_hack=false ) 
218:       Kernel.warn("#{self.class.name}.write is deprecated.  See REXML::Formatters")
219:       formatter = if indent > -1
220:           REXML::Formatters::Pretty.new( indent )
221:         else
222:           REXML::Formatters::Default.new
223:         end
224:       formatter.write( self, writer )
225:     end

Writes out text, substituting special characters beforehand. out A String, IO, or any other object supporting <<( String ) input the text to substitute and the write out

  z=utf8.unpack("U*")
  ascOut=""
  z.each{|r|
    if r <  0x100
      ascOut.concat(r.chr)
    else
      ascOut.concat(sprintf("&#x%x;", r))
    end
  }
  puts ascOut

[Source]

     # File lib/rexml/text.rb, line 249
249:     def write_with_substitution out, input
250:       copy = input.clone
251:       # Doing it like this rather than in a loop improves the speed
252:       copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
253:       copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
254:       copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
255:       copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
256:       copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
257:       copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
258:       out << copy
259:     end

FIXME This probably won‘t work properly

[Source]

     # File lib/rexml/text.rb, line 229
229:     def xpath
230:       path = @parent.xpath
231:       path += "/text()"
232:       return path
233:     end

[Validate]