class URI::RFC2396_Parser
Class that parses String’s into URI’s.
It contains aHash set of patterns and Regexp’s that match and validate.
Attributes
TheHash of patterns.
See alsoinitialize_pattern.
See alsoinitialize_regexp.
Public Class Methods
Source
# File lib/uri/rfc2396_parser.rb, line 99definitialize(opts = {})@pattern =initialize_pattern(opts)@pattern.each_value(&:freeze)@pattern.freeze@regexp =initialize_regexp(@pattern)@regexp.each_value(&:freeze)@regexp.freezeend
Synopsis¶↑
URI::RFC2396_Parser.new([opts])
Args¶↑
The constructor accepts a hash as options for parser. Keys of options are pattern names ofURI components and values of options are pattern strings. The constructor generates set of regexps for parsing URIs.
You can use the following keys:
* :ESCAPED (URI::PATTERN::ESCAPED in default)* :UNRESERVED (URI::PATTERN::UNRESERVED in default)* :DOMLABEL (URI::PATTERN::DOMLABEL in default)* :TOPLABEL (URI::PATTERN::TOPLABEL in default)* :HOSTNAME (URI::PATTERN::HOSTNAME in default)
Examples¶↑
p =URI::RFC2396_Parser.new(:ESCAPED=>"(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})")u =p.parse("http://example.jp/%uABCD")#=> #<URI::HTTP http://example.jp/%uABCD>URI.parse(u.to_s)#=> raises URI::InvalidURIErrors ="http://example.com/ABCD"u1 =p.parse(s)#=> #<URI::HTTP http://example.com/ABCD>u2 =URI.parse(s)#=> #<URI::HTTP http://example.com/ABCD>u1==u2#=> trueu1.eql?(u2)#=> false
Public Instance Methods
Source
# File lib/uri/rfc2396_parser.rb, line 286defescape(str,unsafe =@regexp[:UNSAFE])unlessunsafe.kind_of?(Regexp)# perhaps unsafe is String objectunsafe =Regexp.new("[#{Regexp.quote(unsafe)}]",false)endstr.gsub(unsafe)dous =$&tmp =''us.each_bytedo|uc|tmp<<sprintf('%%%02X',uc)endtmpend.force_encoding(Encoding::US_ASCII)end
Args¶↑
Description¶↑
Constructs a safeString fromstr, removing unsafe characters, replacing them with codes.
Source
# File lib/uri/rfc2396_parser.rb, line 248defextract(str,schemes =nil)ifblock_given?str.scan(make_regexp(schemes)) {yield$& }nilelseresult = []str.scan(make_regexp(schemes)) {result.push$& }resultendend
Args¶↑
strStringto searchschemesPatterns to apply to
str
Description¶↑
Attempts to parse and merge a set of URIs. If noblock given, then returns the result, else it callsblock for each element in result.
See alsomake_regexp.
Source
Source
# File lib/uri/rfc2396_parser.rb, line 261defmake_regexp(schemes =nil)unlessschemes@regexp[:ABS_URI_REF]else/(?=(?i:#{Regexp.union(*schemes).source}):)#{@pattern[:X_ABS_URI]}/xendend
ReturnsRegexp that is defaultself.regexp[:ABS_URI_REF], unlessschemes is provided. Then it is aRegexp.union withself.pattern[:X_ABS_URI].
Source
# File lib/uri/rfc2396_parser.rb, line 208defparse(uri)URI.for(*self.split(uri),self)end
Args¶↑
uri
Description¶↑
Parsesuri and constructs either matchingURI scheme object (File,FTP,HTTP,HTTPS,LDAP,LDAPS, orMailTo) orURI::Generic.
Usage¶↑
URI::RFC2396_PARSER.parse("ldap://ldap.example.com/dc=example?user=john")#=> #<URI::LDAP ldap://ldap.example.com/dc=example?user=john>
Source
# File lib/uri/rfc2396_parser.rb, line 120defsplit(uri)caseuriwhen''# null uriwhen@regexp[:ABS_URI]scheme,opaque,userinfo,host,port,registry,path,query,fragment =$~[1..-1]# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]# absoluteURI = scheme ":" ( hier_part | opaque_part )# hier_part = ( net_path | abs_path ) [ "?" query ]# opaque_part = uric_no_slash *uric# abs_path = "/" path_segments# net_path = "//" authority [ abs_path ]# authority = server | reg_name# server = [ [ userinfo "@" ] hostport ]if!schemeraiseInvalidURIError,"bad URI (absolute but no scheme): #{uri}"endif!opaque&& (!path&& (!host&&!registry))raiseInvalidURIError,"bad URI (absolute but no path): #{uri}"endwhen@regexp[:REL_URI]scheme =nilopaque =niluserinfo,host,port,registry,rel_segment,abs_path,query,fragment =$~[1..-1]ifrel_segment&&abs_pathpath =rel_segment+abs_pathelsifrel_segmentpath =rel_segmentelsifabs_pathpath =abs_pathend# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]# relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]# net_path = "//" authority [ abs_path ]# abs_path = "/" path_segments# rel_path = rel_segment [ abs_path ]# authority = server | reg_name# server = [ [ userinfo "@" ] hostport ]elseraiseInvalidURIError,"bad URI (is not URI?): #{uri}"endpath =''if!path&&!opaque# (see RFC2396 Section 5.2)ret = [scheme,userinfo,host,port,# Xregistry,# Xpath,# Yopaque,# Yquery,fragment ]returnretend
Returns a splitURI againstregexp[:ABS_URI].
Private Instance Methods
Source
Source
# File lib/uri/rfc2396_parser.rb, line 337definitialize_pattern(opts = {})ret = {}ret[:ESCAPED] =escaped = (opts.delete(:ESCAPED)||PATTERN::ESCAPED)ret[:UNRESERVED] =unreserved =opts.delete(:UNRESERVED)||PATTERN::UNRESERVEDret[:RESERVED] =reserved =opts.delete(:RESERVED)||PATTERN::RESERVEDret[:DOMLABEL] =opts.delete(:DOMLABEL)||PATTERN::DOMLABELret[:TOPLABEL] =opts.delete(:TOPLABEL)||PATTERN::TOPLABELret[:HOSTNAME] =hostname =opts.delete(:HOSTNAME)# RFC 2396 (URI Generic Syntax)# RFC 2732 (IPv6 Literal Addresses in URL's)# RFC 2373 (IPv6 Addressing Architecture)# uric = reserved | unreserved | escapedret[:URIC] =uric ="(?:[#{unreserved}#{reserved}]|#{escaped})"# uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |# "&" | "=" | "+" | "$" | ","ret[:URIC_NO_SLASH] =uric_no_slash ="(?:[#{unreserved};?:@&=+$,]|#{escaped})"# query = *uricret[:QUERY] =query ="#{uric}*"# fragment = *uricret[:FRAGMENT] =fragment ="#{uric}*"# hostname = *( domainlabel "." ) toplabel [ "." ]# reg-name = *( unreserved / pct-encoded / sub-delims ) # RFC3986unlesshostnameret[:HOSTNAME] =hostname ="(?:[a-zA-Z0-9\\-.]|%\\h\\h)+"end# RFC 2373, APPENDIX B:# IPv6address = hexpart [ ":" IPv4address ]# IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT# hexpart = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ]# hexseq = hex4 *( ":" hex4)# hex4 = 1*4HEXDIG## XXX: This definition has a flaw. "::" + IPv4address must be# allowed too. Here is a replacement.## IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGITret[:IPV4ADDR] =ipv4addr ="\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"# hex4 = 1*4HEXDIGhex4 ="[#{PATTERN::HEX}]{1,4}"# lastpart = hex4 | IPv4addresslastpart ="(?:#{hex4}|#{ipv4addr})"# hexseq1 = *( hex4 ":" ) hex4hexseq1 ="(?:#{hex4}:)*#{hex4}"# hexseq2 = *( hex4 ":" ) lastparthexseq2 ="(?:#{hex4}:)*#{lastpart}"# IPv6address = hexseq2 | [ hexseq1 ] "::" [ hexseq2 ]ret[:IPV6ADDR] =ipv6addr ="(?:#{hexseq2}|(?:#{hexseq1})?::(?:#{hexseq2})?)"# IPv6prefix = ( hexseq1 | [ hexseq1 ] "::" [ hexseq1 ] ) "/" 1*2DIGIT# unused# ipv6reference = "[" IPv6address "]" (RFC 2732)ret[:IPV6REF] =ipv6ref ="\\[#{ipv6addr}\\]"# host = hostname | IPv4address# host = hostname | IPv4address | IPv6reference (RFC 2732)ret[:HOST] =host ="(?:#{hostname}|#{ipv4addr}|#{ipv6ref})"# port = *digitret[:PORT] =port ='\d*'# hostport = host [ ":" port ]ret[:HOSTPORT] =hostport ="#{host}(?::#{port})?"# userinfo = *( unreserved | escaped |# ";" | ":" | "&" | "=" | "+" | "$" | "," )ret[:USERINFO] =userinfo ="(?:[#{unreserved};:&=+$,]|#{escaped})*"# pchar = unreserved | escaped |# ":" | "@" | "&" | "=" | "+" | "$" | ","pchar ="(?:[#{unreserved}:@&=+$,]|#{escaped})"# param = *pcharparam ="#{pchar}*"# segment = *pchar *( ";" param )segment ="#{pchar}*(?:;#{param})*"# path_segments = segment *( "/" segment )ret[:PATH_SEGMENTS] =path_segments ="#{segment}(?:/#{segment})*"# server = [ [ userinfo "@" ] hostport ]server ="(?:#{userinfo}@)?#{hostport}"# reg_name = 1*( unreserved | escaped | "$" | "," |# ";" | ":" | "@" | "&" | "=" | "+" )ret[:REG_NAME] =reg_name ="(?:[#{unreserved}$,;:@&=+]|#{escaped})+"# authority = server | reg_nameauthority ="(?:#{server}|#{reg_name})"# rel_segment = 1*( unreserved | escaped |# ";" | "@" | "&" | "=" | "+" | "$" | "," )ret[:REL_SEGMENT] =rel_segment ="(?:[#{unreserved};@&=+$,]|#{escaped})+"# scheme = alpha *( alpha | digit | "+" | "-" | "." )ret[:SCHEME] =scheme ="[#{PATTERN::ALPHA}][\\-+.#{PATTERN::ALPHA}\\d]*"# abs_path = "/" path_segmentsret[:ABS_PATH] =abs_path ="/#{path_segments}"# rel_path = rel_segment [ abs_path ]ret[:REL_PATH] =rel_path ="#{rel_segment}(?:#{abs_path})?"# net_path = "//" authority [ abs_path ]ret[:NET_PATH] =net_path ="//#{authority}(?:#{abs_path})?"# hier_part = ( net_path | abs_path ) [ "?" query ]ret[:HIER_PART] =hier_part ="(?:#{net_path}|#{abs_path})(?:\\?(?:#{query}))?"# opaque_part = uric_no_slash *uricret[:OPAQUE_PART] =opaque_part ="#{uric_no_slash}#{uric}*"# absoluteURI = scheme ":" ( hier_part | opaque_part )ret[:ABS_URI] =abs_uri ="#{scheme}:(?:#{hier_part}|#{opaque_part})"# relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]ret[:REL_URI] =rel_uri ="(?:#{net_path}|#{abs_path}|#{rel_path})(?:\\?#{query})?"# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]ret[:URI_REF] ="(?:#{abs_uri}|#{rel_uri})?(?:##{fragment})?"ret[:X_ABS_URI] =" (#{scheme}): (?# 1: scheme) (?: (#{opaque_part}) (?# 2: opaque) | (?:(?: //(?: (?:(?:(#{userinfo})@)? (?# 3: userinfo) (?:(#{host})(?::(\\d*))?))? (?# 4: host, 5: port) | (#{reg_name}) (?# 6: registry) ) | (?!//)) (?# XXX: '//' is the mark for hostport) (#{abs_path})? (?# 7: path) )(?:\\?(#{query}))? (?# 8: query) ) (?:\\#(#{fragment}))? (?# 9: fragment) "ret[:X_REL_URI] =" (?: (?: // (?: (?:(#{userinfo})@)? (?# 1: userinfo) (#{host})?(?::(\\d*))? (?# 2: host, 3: port) | (#{reg_name}) (?# 4: registry) ) ) | (#{rel_segment}) (?# 5: rel_segment) )? (#{abs_path})? (?# 6: abs_path) (?:\\?(#{query}))? (?# 7: query) (?:\\#(#{fragment}))? (?# 8: fragment) "retend
Constructs the defaultHash of patterns.
Source
# File lib/uri/rfc2396_parser.rb, line 495definitialize_regexp(pattern)ret = {}# for URI::splitret[:ABS_URI] =Regexp.new('\A\s*+'+pattern[:X_ABS_URI]+'\s*\z',Regexp::EXTENDED)ret[:REL_URI] =Regexp.new('\A\s*+'+pattern[:X_REL_URI]+'\s*\z',Regexp::EXTENDED)# for URI::extractret[:URI_REF] =Regexp.new(pattern[:URI_REF])ret[:ABS_URI_REF] =Regexp.new(pattern[:X_ABS_URI],Regexp::EXTENDED)ret[:REL_URI_REF] =Regexp.new(pattern[:X_REL_URI],Regexp::EXTENDED)# for URI::escape/unescaperet[:ESCAPED] =Regexp.new(pattern[:ESCAPED])ret[:UNSAFE] =Regexp.new("[^#{pattern[:UNRESERVED]}#{pattern[:RESERVED]}]")# for Generic#initializeret[:SCHEME] =Regexp.new("\\A#{pattern[:SCHEME]}\\z")ret[:USERINFO] =Regexp.new("\\A#{pattern[:USERINFO]}\\z")ret[:HOST] =Regexp.new("\\A#{pattern[:HOST]}\\z")ret[:PORT] =Regexp.new("\\A#{pattern[:PORT]}\\z")ret[:OPAQUE] =Regexp.new("\\A#{pattern[:OPAQUE_PART]}\\z")ret[:REGISTRY] =Regexp.new("\\A#{pattern[:REG_NAME]}\\z")ret[:ABS_PATH] =Regexp.new("\\A#{pattern[:ABS_PATH]}\\z")ret[:REL_PATH] =Regexp.new("\\A#{pattern[:REL_PATH]}\\z")ret[:QUERY] =Regexp.new("\\A#{pattern[:QUERY]}\\z")ret[:FRAGMENT] =Regexp.new("\\A#{pattern[:FRAGMENT]}\\z")retend
Constructs the defaultHash of Regexp’s.