1. URI::
  2. RFC2396_Parser

class URI::RFC2396_Parser

Class that parses String’s into URI’s.

It contains aHash set of patterns and Regexp’s that match and validate.

Attributes

pattern[R]

TheHash of patterns.

See alsoinitialize_pattern.

regexp[R]

TheHash ofRegexp.

See alsoinitialize_regexp.

Public Class Methods

Source
# File lib/uri/rfc2396_parser.rb, line 99definitialize(opts = {})@pattern =initialize_pattern(opts)@pattern.each_value(&:freeze)@pattern.freeze@regexp =initialize_regexp(@pattern)@regexp.each_value(&:freeze)@regexp.freezeend

Synopsis

URI::RFC2396_Parser.new([opts])

Args

The constructor accepts a hash as options for parser. Keys of options are pattern names ofURI components and values of options are pattern strings. The constructor generates set of regexps for parsing URIs.

You can use the following keys:

* :ESCAPED (URI::PATTERN::ESCAPED in default)* :UNRESERVED (URI::PATTERN::UNRESERVED in default)* :DOMLABEL (URI::PATTERN::DOMLABEL in default)* :TOPLABEL (URI::PATTERN::TOPLABEL in default)* :HOSTNAME (URI::PATTERN::HOSTNAME in default)

Examples

p =URI::RFC2396_Parser.new(:ESCAPED=>"(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})")u =p.parse("http://example.jp/%uABCD")#=> #<URI::HTTP http://example.jp/%uABCD>URI.parse(u.to_s)#=> raises URI::InvalidURIErrors ="http://example.com/ABCD"u1 =p.parse(s)#=> #<URI::HTTP http://example.com/ABCD>u2 =URI.parse(s)#=> #<URI::HTTP http://example.com/ABCD>u1==u2#=> trueu1.eql?(u2)#=> false

Public Instance Methods

Source
# File lib/uri/rfc2396_parser.rb, line 286defescape(str,unsafe =@regexp[:UNSAFE])unlessunsafe.kind_of?(Regexp)# perhaps unsafe is String objectunsafe =Regexp.new("[#{Regexp.quote(unsafe)}]",false)endstr.gsub(unsafe)dous =$&tmp =''us.each_bytedo|uc|tmp<<sprintf('%%%02X',uc)endtmpend.force_encoding(Encoding::US_ASCII)end

Args

str

String to make safe

unsafe

Regexp to apply. Defaults toself.regexp[:UNSAFE]

Description

Constructs a safeString fromstr, removing unsafe characters, replacing them with codes.

Source
# File lib/uri/rfc2396_parser.rb, line 248defextract(str,schemes =nil)ifblock_given?str.scan(make_regexp(schemes)) {yield$& }nilelseresult = []str.scan(make_regexp(schemes)) {result.push$& }resultendend

Args

str

String to search

schemes

Patterns to apply tostr

Description

Attempts to parse and merge a set of URIs. If noblock given, then returns the result, else it callsblock for each element in result.

See alsomake_regexp.

Source
# File lib/uri/rfc2396_parser.rb, line 222defjoin(*uris)uris[0] =convert_to_uri(uris[0])uris.inject:mergeend

Args

uris

anArray of Strings

Description

Attempts to parse and merge a set of URIs.

Source
# File lib/uri/rfc2396_parser.rb, line 261defmake_regexp(schemes =nil)unlessschemes@regexp[:ABS_URI_REF]else/(?=(?i:#{Regexp.union(*schemes).source}):)#{@pattern[:X_ABS_URI]}/xendend

ReturnsRegexp that is defaultself.regexp[:ABS_URI_REF], unlessschemes is provided. Then it is aRegexp.union withself.pattern[:X_ABS_URI].

Source
# File lib/uri/rfc2396_parser.rb, line 208defparse(uri)URI.for(*self.split(uri),self)end

Args

uri

String

Description

Parsesuri and constructs either matchingURI scheme object (File,FTP,HTTP,HTTPS,LDAP,LDAPS, orMailTo) orURI::Generic.

Usage

URI::RFC2396_PARSER.parse("ldap://ldap.example.com/dc=example?user=john")#=> #<URI::LDAP ldap://ldap.example.com/dc=example?user=john>
Source
# File lib/uri/rfc2396_parser.rb, line 120defsplit(uri)caseuriwhen''# null uriwhen@regexp[:ABS_URI]scheme,opaque,userinfo,host,port,registry,path,query,fragment =$~[1..-1]# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]# absoluteURI   = scheme ":" ( hier_part | opaque_part )# hier_part     = ( net_path | abs_path ) [ "?" query ]# opaque_part   = uric_no_slash *uric# abs_path      = "/"  path_segments# net_path      = "//" authority [ abs_path ]# authority     = server | reg_name# server        = [ [ userinfo "@" ] hostport ]if!schemeraiseInvalidURIError,"bad URI (absolute but no scheme): #{uri}"endif!opaque&& (!path&& (!host&&!registry))raiseInvalidURIError,"bad URI (absolute but no path): #{uri}"endwhen@regexp[:REL_URI]scheme =nilopaque =niluserinfo,host,port,registry,rel_segment,abs_path,query,fragment =$~[1..-1]ifrel_segment&&abs_pathpath =rel_segment+abs_pathelsifrel_segmentpath =rel_segmentelsifabs_pathpath =abs_pathend# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]# relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]# net_path      = "//" authority [ abs_path ]# abs_path      = "/"  path_segments# rel_path      = rel_segment [ abs_path ]# authority     = server | reg_name# server        = [ [ userinfo "@" ] hostport ]elseraiseInvalidURIError,"bad URI (is not URI?): #{uri}"endpath =''if!path&&!opaque# (see RFC2396 Section 5.2)ret = [scheme,userinfo,host,port,# Xregistry,# Xpath,# Yopaque,# Yquery,fragment  ]returnretend

Returns a splitURI againstregexp[:ABS_URI].

Source
# File lib/uri/rfc2396_parser.rb, line 317defunescape(str,escaped =@regexp[:ESCAPED])enc =str.encodingenc =Encoding::UTF_8ifenc==Encoding::US_ASCIIstr.gsub(escaped) { [$&[1,2]].pack('H2').force_encoding(enc) }end

Args

str

String to remove escapes from

escaped

Regexp to apply. Defaults toself.regexp[:ESCAPED]

Description

Removes escapes fromstr.

Private Instance Methods

Source
# File lib/uri/rfc2396_parser.rb, line 528defconvert_to_uri(uri)ifuri.is_a?(URI::Generic)urielsifuri =String.try_convert(uri)parse(uri)elseraiseArgumentError,"bad argument (expected URI object or URI string)"endend

Returnsuri as-is if it isURI, or convert it toURI if it is aString.

Source
# File lib/uri/rfc2396_parser.rb, line 337definitialize_pattern(opts = {})ret = {}ret[:ESCAPED] =escaped = (opts.delete(:ESCAPED)||PATTERN::ESCAPED)ret[:UNRESERVED] =unreserved =opts.delete(:UNRESERVED)||PATTERN::UNRESERVEDret[:RESERVED] =reserved =opts.delete(:RESERVED)||PATTERN::RESERVEDret[:DOMLABEL] =opts.delete(:DOMLABEL)||PATTERN::DOMLABELret[:TOPLABEL] =opts.delete(:TOPLABEL)||PATTERN::TOPLABELret[:HOSTNAME] =hostname =opts.delete(:HOSTNAME)# RFC 2396 (URI Generic Syntax)# RFC 2732 (IPv6 Literal Addresses in URL's)# RFC 2373 (IPv6 Addressing Architecture)# uric          = reserved | unreserved | escapedret[:URIC] =uric ="(?:[#{unreserved}#{reserved}]|#{escaped})"# uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |#                 "&" | "=" | "+" | "$" | ","ret[:URIC_NO_SLASH] =uric_no_slash ="(?:[#{unreserved};?:@&=+$,]|#{escaped})"# query         = *uricret[:QUERY] =query ="#{uric}*"# fragment      = *uricret[:FRAGMENT] =fragment ="#{uric}*"# hostname      = *( domainlabel "." ) toplabel [ "." ]# reg-name      = *( unreserved / pct-encoded / sub-delims ) # RFC3986unlesshostnameret[:HOSTNAME] =hostname ="(?:[a-zA-Z0-9\\-.]|%\\h\\h)+"end# RFC 2373, APPENDIX B:# IPv6address = hexpart [ ":" IPv4address ]# IPv4address   = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT# hexpart = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ]# hexseq  = hex4 *( ":" hex4)# hex4    = 1*4HEXDIG## XXX: This definition has a flaw. "::" + IPv4address must be# allowed too.  Here is a replacement.## IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGITret[:IPV4ADDR] =ipv4addr ="\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"# hex4     = 1*4HEXDIGhex4 ="[#{PATTERN::HEX}]{1,4}"# lastpart = hex4 | IPv4addresslastpart ="(?:#{hex4}|#{ipv4addr})"# hexseq1  = *( hex4 ":" ) hex4hexseq1 ="(?:#{hex4}:)*#{hex4}"# hexseq2  = *( hex4 ":" ) lastparthexseq2 ="(?:#{hex4}:)*#{lastpart}"# IPv6address = hexseq2 | [ hexseq1 ] "::" [ hexseq2 ]ret[:IPV6ADDR] =ipv6addr ="(?:#{hexseq2}|(?:#{hexseq1})?::(?:#{hexseq2})?)"# IPv6prefix  = ( hexseq1 | [ hexseq1 ] "::" [ hexseq1 ] ) "/" 1*2DIGIT# unused# ipv6reference = "[" IPv6address "]" (RFC 2732)ret[:IPV6REF] =ipv6ref ="\\[#{ipv6addr}\\]"# host          = hostname | IPv4address# host          = hostname | IPv4address | IPv6reference (RFC 2732)ret[:HOST] =host ="(?:#{hostname}|#{ipv4addr}|#{ipv6ref})"# port          = *digitret[:PORT] =port ='\d*'# hostport      = host [ ":" port ]ret[:HOSTPORT] =hostport ="#{host}(?::#{port})?"# userinfo      = *( unreserved | escaped |#                    ";" | ":" | "&" | "=" | "+" | "$" | "," )ret[:USERINFO] =userinfo ="(?:[#{unreserved};:&=+$,]|#{escaped})*"# pchar         = unreserved | escaped |#                 ":" | "@" | "&" | "=" | "+" | "$" | ","pchar ="(?:[#{unreserved}:@&=+$,]|#{escaped})"# param         = *pcharparam ="#{pchar}*"# segment       = *pchar *( ";" param )segment ="#{pchar}*(?:;#{param})*"# path_segments = segment *( "/" segment )ret[:PATH_SEGMENTS] =path_segments ="#{segment}(?:/#{segment})*"# server        = [ [ userinfo "@" ] hostport ]server ="(?:#{userinfo}@)?#{hostport}"# reg_name      = 1*( unreserved | escaped | "$" | "," |#                     ";" | ":" | "@" | "&" | "=" | "+" )ret[:REG_NAME] =reg_name ="(?:[#{unreserved}$,;:@&=+]|#{escaped})+"# authority     = server | reg_nameauthority ="(?:#{server}|#{reg_name})"# rel_segment   = 1*( unreserved | escaped |#                     ";" | "@" | "&" | "=" | "+" | "$" | "," )ret[:REL_SEGMENT] =rel_segment ="(?:[#{unreserved};@&=+$,]|#{escaped})+"# scheme        = alpha *( alpha | digit | "+" | "-" | "." )ret[:SCHEME] =scheme ="[#{PATTERN::ALPHA}][\\-+.#{PATTERN::ALPHA}\\d]*"# abs_path      = "/"  path_segmentsret[:ABS_PATH] =abs_path ="/#{path_segments}"# rel_path      = rel_segment [ abs_path ]ret[:REL_PATH] =rel_path ="#{rel_segment}(?:#{abs_path})?"# net_path      = "//" authority [ abs_path ]ret[:NET_PATH] =net_path ="//#{authority}(?:#{abs_path})?"# hier_part     = ( net_path | abs_path ) [ "?" query ]ret[:HIER_PART] =hier_part ="(?:#{net_path}|#{abs_path})(?:\\?(?:#{query}))?"# opaque_part   = uric_no_slash *uricret[:OPAQUE_PART] =opaque_part ="#{uric_no_slash}#{uric}*"# absoluteURI   = scheme ":" ( hier_part | opaque_part )ret[:ABS_URI] =abs_uri ="#{scheme}:(?:#{hier_part}|#{opaque_part})"# relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]ret[:REL_URI] =rel_uri ="(?:#{net_path}|#{abs_path}|#{rel_path})(?:\\?#{query})?"# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]ret[:URI_REF] ="(?:#{abs_uri}|#{rel_uri})?(?:##{fragment})?"ret[:X_ABS_URI] ="    (#{scheme}):                           (?# 1: scheme)    (?:       (#{opaque_part})                    (?# 2: opaque)    |       (?:(?:         //(?:             (?:(?:(#{userinfo})@)?        (?# 3: userinfo)               (?:(#{host})(?::(\\d*))?))? (?# 4: host, 5: port)           |             (#{reg_name})                 (?# 6: registry)           )         |         (?!//))                           (?# XXX: '//' is the mark for hostport)         (#{abs_path})?                    (?# 7: path)       )(?:\\?(#{query}))?                 (?# 8: query)    )    (?:\\#(#{fragment}))?                  (?# 9: fragment)  "ret[:X_REL_URI] ="    (?:      (?:        //        (?:          (?:(#{userinfo})@)?       (?# 1: userinfo)            (#{host})?(?::(\\d*))?  (?# 2: host, 3: port)        |          (#{reg_name})             (?# 4: registry)        )      )    |      (#{rel_segment})              (?# 5: rel_segment)    )?    (#{abs_path})?                  (?# 6: abs_path)    (?:\\?(#{query}))?              (?# 7: query)    (?:\\#(#{fragment}))?           (?# 8: fragment)  "retend

Constructs the defaultHash of patterns.

Source
# File lib/uri/rfc2396_parser.rb, line 495definitialize_regexp(pattern)ret = {}# for URI::splitret[:ABS_URI] =Regexp.new('\A\s*+'+pattern[:X_ABS_URI]+'\s*\z',Regexp::EXTENDED)ret[:REL_URI] =Regexp.new('\A\s*+'+pattern[:X_REL_URI]+'\s*\z',Regexp::EXTENDED)# for URI::extractret[:URI_REF]     =Regexp.new(pattern[:URI_REF])ret[:ABS_URI_REF] =Regexp.new(pattern[:X_ABS_URI],Regexp::EXTENDED)ret[:REL_URI_REF] =Regexp.new(pattern[:X_REL_URI],Regexp::EXTENDED)# for URI::escape/unescaperet[:ESCAPED] =Regexp.new(pattern[:ESCAPED])ret[:UNSAFE]  =Regexp.new("[^#{pattern[:UNRESERVED]}#{pattern[:RESERVED]}]")# for Generic#initializeret[:SCHEME]   =Regexp.new("\\A#{pattern[:SCHEME]}\\z")ret[:USERINFO] =Regexp.new("\\A#{pattern[:USERINFO]}\\z")ret[:HOST]     =Regexp.new("\\A#{pattern[:HOST]}\\z")ret[:PORT]     =Regexp.new("\\A#{pattern[:PORT]}\\z")ret[:OPAQUE]   =Regexp.new("\\A#{pattern[:OPAQUE_PART]}\\z")ret[:REGISTRY] =Regexp.new("\\A#{pattern[:REG_NAME]}\\z")ret[:ABS_PATH] =Regexp.new("\\A#{pattern[:ABS_PATH]}\\z")ret[:REL_PATH] =Regexp.new("\\A#{pattern[:REL_PATH]}\\z")ret[:QUERY]    =Regexp.new("\\A#{pattern[:QUERY]}\\z")ret[:FRAGMENT] =Regexp.new("\\A#{pattern[:FRAGMENT]}\\z")retend

Constructs the defaultHash of Regexp’s.