AString object holds and manipulates anarbitrary sequence of bytes, typically representing characters.String objects may be created using::new or as literals.
Because of aliasing issues, users of strings should be aware of the methodsthat modify the contents of aString object. Typically, methods with names ending in “!'' modify their receiver,while those without a “!'' return a newString. However, there are exceptions, such as#[]=.
Returns a new String that is a copy ofstring
.
With no arguments, returns the empty string with theEncodingASCII-8BIT
:
s =String.news# => ""s.encoding# => #<Encoding:ASCII-8BIT>
With the single String argumentstring
, returns a copy ofstring
with the same encoding asstring
:
s =String.new("Que veut dire \u{e7}a?")s# => "Que veut dire \u{e7}a?"s.encoding# => #<Encoding:UTF-8>
Literal strings like""
or here-documents always usescriptencoding, unlike::new.
With keywordencoding
, returns a copy ofstr
withthe specified encoding:
s =String.new(encoding:'ASCII')s.encoding# => #<Encoding:US-ASCII>s =String.new('foo',encoding:'ASCII')s.encoding# => #<Encoding:US-ASCII>
Note that these are equivalent:
s0 =String.new('foo',encoding:'ASCII')s1 ='foo'.force_encoding('ASCII')s0.encoding==s1.encoding# => true
With keywordcapacity
, returns a copy ofstr
; thegivencapacity
may set the size of the internal buffer, whichmay affect performance:
String.new(capacity:1)# => ""String.new(capacity:4096)# => ""
Thestring
,encoding
, andcapacity
arguments may all be used together:
String.new('hello',encoding:'UTF-8',capacity:25)
static VALUErb_str_init(int argc, VALUE *argv, VALUE str){ static ID keyword_ids[2]; VALUE orig, opt, venc, vcapa; VALUE kwargs[2]; rb_encoding *enc = 0; int n; if (!keyword_ids[0]) { keyword_ids[0] = rb_id_encoding(); CONST_ID(keyword_ids[1], "capacity"); } n = rb_scan_args(argc, argv, "01:", &orig, &opt); if (!NIL_P(opt)) { rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs); venc = kwargs[0]; vcapa = kwargs[1]; if (venc != Qundef && !NIL_P(venc)) { enc = rb_to_encoding(venc); } if (vcapa != Qundef && !NIL_P(vcapa)) { long capa = NUM2LONG(vcapa); long len = 0; int termlen = enc ? rb_enc_mbminlen(enc) : 1; if (capa < STR_BUF_MIN_SIZE) { capa = STR_BUF_MIN_SIZE; } if (n == 1) { StringValue(orig); len = RSTRING_LEN(orig); if (capa < len) { capa = len; } if (orig == str) n = 0; } str_modifiable(str); if (STR_EMBED_P(str)) { /* make noembed always */ char *new_ptr = ALLOC_N(char, (size_t)capa + termlen); memcpy(new_ptr, RSTRING(str)->as.ary, RSTRING_EMBED_LEN_MAX + 1); RSTRING(str)->as.heap.ptr = new_ptr; } else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) { const size_t size = (size_t)capa + termlen; const char *const old_ptr = RSTRING_PTR(str); const size_t osize = RSTRING(str)->as.heap.len + TERM_LEN(str); char *new_ptr = ALLOC_N(char, (size_t)capa + termlen); memcpy(new_ptr, old_ptr, osize < size ? osize : size); FL_UNSET_RAW(str, STR_SHARED); RSTRING(str)->as.heap.ptr = new_ptr; } else if (STR_HEAP_SIZE(str) != (size_t)capa + termlen) { SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, (size_t)capa + termlen, STR_HEAP_SIZE(str)); } RSTRING(str)->as.heap.len = len; TERM_FILL(&RSTRING(str)->as.heap.ptr[len], termlen); if (n == 1) { memcpy(RSTRING(str)->as.heap.ptr, RSTRING_PTR(orig), len); rb_enc_cr_str_exact_copy(str, orig); } FL_SET(str, STR_NOEMBED); RSTRING(str)->as.heap.aux.capa = capa; } else if (n == 1) { rb_str_replace(str, orig); } if (enc) { rb_enc_associate(str, enc); ENC_CODERANGE_CLEAR(str); } } else if (n == 1) { rb_str_replace(str, orig); } return str;}
Ifobject
is a String object, returnsobject
.
Otherwise ifobject
responds to:to_str
, callsobject.to_str
and returns the result.
Returnsnil
ifobject
does not respond to:to_str
Raises an exception unlessobject.to_str
returns a Stringobject.
static VALUErb_str_s_try_convert(VALUE dummy, VALUE str){ return rb_check_string_type(str);}
Returns the result of formattingobject
into the formatspecificationself
(seeKernel#sprintf for formattingdetails):
"%05d"%123# => "00123"
Ifself
contains multiple substitutions,object
must be an Array or Hash containing the values to be substituted:
"%-5s: %016x"% ["ID",self.object_id ]# => "ID : 00002b054ec93168""foo = %{foo}"% {foo:'bar'}# => "foo = bar""foo = %{foo}, baz = %{baz}"% {foo:'bar',baz:'bat'}# => "foo = bar, baz = bat"
static VALUErb_str_format_m(VALUE str, VALUE arg){ VALUE tmp = rb_check_array_type(arg); if (!NIL_P(tmp)) { return rb_str_format(RARRAY_LENINT(tmp), RARRAY_CONST_PTR(tmp), str); } return rb_str_format(1, &arg, str);}
Returns a new String containinginteger
copies ofself
:
"Ho! "*3# => "Ho! Ho! Ho! ""Ho! "*0# => ""
VALUErb_str_times(VALUE str, VALUE times){ VALUE str2; long n, len; char *ptr2; int termlen; if (times == INT2FIX(1)) { return str_duplicate(rb_cString, str); } if (times == INT2FIX(0)) { str2 = str_alloc(rb_cString); rb_enc_copy(str2, str); return str2; } len = NUM2LONG(times); if (len < 0) { rb_raise(rb_eArgError, "negative argument"); } if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) { str2 = str_alloc(rb_cString); if (!STR_EMBEDDABLE_P(len, 1)) { RSTRING(str2)->as.heap.aux.capa = len; RSTRING(str2)->as.heap.ptr = ZALLOC_N(char, (size_t)len + 1); STR_SET_NOEMBED(str2); } STR_SET_LEN(str2, len); rb_enc_copy(str2, str); return str2; } if (len && LONG_MAX/len < RSTRING_LEN(str)) { rb_raise(rb_eArgError, "argument too big"); } len *= RSTRING_LEN(str); termlen = TERM_LEN(str); str2 = str_new0(rb_cString, 0, len, termlen); ptr2 = RSTRING_PTR(str2); if (len) { n = RSTRING_LEN(str); memcpy(ptr2, RSTRING_PTR(str), n); while (n <= len/2) { memcpy(ptr2 + n, ptr2, n); n *= 2; } memcpy(ptr2 + n, ptr2, len-n); } STR_SET_LEN(str2, len); TERM_FILL(&ptr2[len], termlen); rb_enc_cr_str_copy_for_substr(str2, str); return str2;}
Returns a new String containingother_string
concatenated toself
:
"Hello from "+self.to_s# => "Hello from main"
VALUErb_str_plus(VALUE str1, VALUE str2){ VALUE str3; rb_encoding *enc; char *ptr1, *ptr2, *ptr3; long len1, len2; int termlen; StringValue(str2); enc = rb_enc_check_str(str1, str2); RSTRING_GETMEM(str1, ptr1, len1); RSTRING_GETMEM(str2, ptr2, len2); termlen = rb_enc_mbminlen(enc); if (len1 > LONG_MAX - len2) { rb_raise(rb_eArgError, "string size too big"); } str3 = str_new0(rb_cString, 0, len1+len2, termlen); ptr3 = RSTRING_PTR(str3); memcpy(ptr3, ptr1, len1); memcpy(ptr3+len1, ptr2, len2); TERM_FILL(&ptr3[len1+len2], termlen); ENCODING_CODERANGE_SET(str3, rb_enc_to_index(enc), ENC_CODERANGE_AND(ENC_CODERANGE(str1), ENC_CODERANGE(str2))); RB_GC_GUARD(str1); RB_GC_GUARD(str2); return str3;}
Returnsself
ifself
is not frozen.
Otherwise. returnsself.dup
, which is not frozen.
static VALUEstr_uplus(VALUE str){ if (OBJ_FROZEN(str)) { return rb_str_dup(str); } else { return str; }}
Returns a frozen, possibly pre-existing copy of the string.
The returned String will be deduplicated as long as it does not have anyinstance variables set on it.
static VALUEstr_uminus(VALUE str){ if (!BARE_STRING_P(str) && !rb_obj_frozen_p(str)) { str = rb_str_dup(str); } return rb_fstring(str);}
Returns a new String containing the concatenation ofself
andobject
:
s ='foo's<<'bar'# => "foobar"
Ifobject
is an Integer, the value is considered a codepointand converted to a character before concatenation:
s ='foo's<<33# => "foo!"
Related:#concat, which takesmultiple arguments.
VALUErb_str_concat(VALUE str1, VALUE str2){ unsigned int code; rb_encoding *enc = STR_ENC_GET(str1); int encidx; if (RB_INTEGER_TYPE_P(str2)) { if (rb_num_to_uint(str2, &code) == 0) { } else if (FIXNUM_P(str2)) { rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2)); } else { rb_raise(rb_eRangeError, "bignum out of char range"); } } else { return rb_str_append(str1, str2); } encidx = rb_enc_to_index(enc); if (encidx == ENCINDEX_ASCII || encidx == ENCINDEX_US_ASCII) { /* US-ASCII automatically extended to ASCII-8BIT */ char buf[1]; buf[0] = (char)code; if (code > 0xFF) { rb_raise(rb_eRangeError, "%u out of char range", code); } rb_str_cat(str1, buf, 1); if (encidx == ENCINDEX_US_ASCII && code > 127) { rb_enc_associate_index(str1, ENCINDEX_ASCII); ENC_CODERANGE_SET(str1, ENC_CODERANGE_VALID); } } else { long pos = RSTRING_LEN(str1); int cr = ENC_CODERANGE(str1); int len; char *buf; switch (len = rb_enc_codelen(code, enc)) { case ONIGERR_INVALID_CODE_POINT_VALUE: rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc)); break; case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE: case 0: rb_raise(rb_eRangeError, "%u out of char range", code); break; } buf = ALLOCA_N(char, len + 1); rb_enc_mbcput(code, buf, enc); if (rb_enc_precise_mbclen(buf, buf + len + 1, enc) != len) { rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc)); } rb_str_resize(str1, pos+len); memcpy(RSTRING_PTR(str1) + pos, buf, len); if (cr == ENC_CODERANGE_7BIT && code > 127) cr = ENC_CODERANGE_VALID; ENC_CODERANGE_SET(str1, cr); } return str1;}
Comparesself
andother_string
, returning:
-1 ifother_string
is smaller.
0 if the two are equal.
1 ifother_string
is larger.
nil
if the two are incomparable.
Examples:
'foo'<=>'foo'# => 0'foo'<=>'food'# => -1'food'<=>'foo'# => 1'FOO'<=>'foo'# => -1'foo'<=>'FOO'# => 1'foo'<=>1# => nil
static VALUErb_str_cmp_m(VALUE str1, VALUE str2){ int result; VALUE s = rb_check_string_type(str2); if (NIL_P(s)) { return rb_invcmp(str1, str2); } result = rb_str_cmp(str1, s); return INT2FIX(result);}
Returnstrue
ifobject
has the same length andcontent; asself
;false
otherwise:
s ='foo's=='foo'# => trues=='food'# => falses=='FOO'# => false
Returnsfalse
if the two strings' encodings are notcompatible:
"\u{e4 f6 fc}".encode("ISO-8859-1")== ("\u{c4 d6 dc}")# => false
Ifobject
is not an instance of String but responds toto_str
, then the two strings are compared usingobject.==
.
VALUErb_str_equal(VALUE str1, VALUE str2){ if (str1 == str2) return Qtrue; if (!RB_TYPE_P(str2, T_STRING)) { if (!rb_respond_to(str2, idTo_str)) { return Qfalse; } return rb_equal(str2, str1); } return rb_str_eql_internal(str1, str2);}
Returnstrue
ifobject
has the same length andcontent; asself
;false
otherwise:
s ='foo's=='foo'# => trues=='food'# => falses=='FOO'# => false
Returnsfalse
if the two strings' encodings are notcompatible:
"\u{e4 f6 fc}".encode("ISO-8859-1")== ("\u{c4 d6 dc}")# => false
Ifobject
is not an instance of String but responds toto_str
, then the two strings are compared usingobject.==
.
VALUErb_str_equal(VALUE str1, VALUE str2){ if (str1 == str2) return Qtrue; if (!RB_TYPE_P(str2, T_STRING)) { if (!rb_respond_to(str2, idTo_str)) { return Qfalse; } return rb_equal(str2, str1); } return rb_str_eql_internal(str1, str2);}
Returns the Integer index of the first substring that matches the givenregexp
, ornil
if no match found:
'foo'=~/f/# => 0'foo'=~/o/# => 1'foo'=~/x/# => nil
Note: also updatesRegexp-relatedglobal variables.
If the givenobject
is not a Regexp, returns the valuereturned byobject =~ self
.
Note thatstring =~ regexp
is different fromregexp =~string
(seeRegexp#=~):
number=nil"no. 9"=~/(?<number>\d+)/number# => nil (not assigned)/(?<number>\d+)/=~"no. 9"number#=> "9"
static VALUErb_str_match(VALUE x, VALUE y){ switch (OBJ_BUILTIN_TYPE(y)) { case T_STRING: rb_raise(rb_eTypeError, "type mismatch: String given"); case T_REGEXP: return rb_reg_match(y, x); default: return rb_funcall(y, idEqTilde, 1, x); }}
Returns the substring ofself
specified by the arguments.
When the single Integer argumentindex
is given, returns the1-character substring found inself
at offsetindex
:
'bar'[2]# => "r"
Counts backward from the end ofself
ifindex
isnegative:
'foo'[-3]# => "f"
Returnsnil
ifindex
is out of range:
'foo'[3]# => nil'foo'[-4]# => nil
When the two Integer argumentsstart
andlength
are given, returns the substring of the givenlength
found inself
at offsetstart
:
'foo'[0,2]# => "fo"'foo'[0,0]# => ""
Counts backward from the end ofself
ifstart
isnegative:
'foo'[-2,2]# => "oo"
Special case: returns a new empty String ifstart
is equal tothe length ofself
:
'foo'[3,2]# => ""
Returnsnil
ifstart
is out of range:
'foo'[4,2]# => nil'foo'[-4,2]# => nil
Returns the trailing substring ofself
iflength
is large:
'foo'[1,50]# => "oo"
Returnsnil
iflength
is negative:
'foo'[0,-1]# => nil
When the single Range argumentrange
is given, derivesstart
andlength
values from the givenrange
, and returns values as above:
'foo'[0..1]
is equivalent to'foo'[0,2]
.
'foo'[0...1]
is equivalent to'foo'[0,1]
.
When the Regexp argumentregexp
is given, and thecapture
argument is0
, returns the first matchingsubstring found inself
, ornil
if none found:
'foo'[/o/]# => "o"'foo'[/x/]# => nils ='hello there's[/[aeiou](.)\1/]# => "ell"s[/[aeiou](.)\1/,0]# => "ell"
If argumentcapture
is given and not0
, it shouldbe either an Integer capture group index or a String or Symbol capturegroup name; the method call returns only the specified capture (seeRegexp Capturing):
s ='hello there's[/[aeiou](.)\1/,1]# => "l"s[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/,"non_vowel"]# => "l"s[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, :vowel]# => "e"
If an invalid capture group index is given,nil
is returned. If an invalid capture group name is given,IndexError
israised.
When the single String argumentsubstring
is given, returnsthe substring fromself
if found, otherwisenil
:
'foo'['oo']# => "oo"'foo'['xx']# => nil
static VALUErb_str_aref_m(int argc, VALUE *argv, VALUE str){ if (argc == 2) { if (RB_TYPE_P(argv[0], T_REGEXP)) { return rb_str_subpat(str, argv[0], argv[1]); } else { long beg = NUM2LONG(argv[0]); long len = NUM2LONG(argv[1]); return rb_str_substr(str, beg, len); } } rb_check_arity(argc, 1, 2); return rb_str_aref(str, argv[0]);}
Element Assignment—Replaces some or all of the content ofstr. Theportion of the string affected is determined using the same criteria as#[]. If the replacement string is notthe same length as the text it is replacing, the string will be adjustedaccordingly. If the regular expression or string is used as the indexdoesn't match a position in the string,IndexError is raised. If the regular expressionform is used, the optional secondInteger allowsyou to specify which portion of the match to replace (effectively using theMatchData indexing rules. The forms that takeanInteger will raise anIndexError if the value is out of range; theRange form will raise aRangeError, and theRegexp andString willraise anIndexError on negative match.
static VALUErb_str_aset_m(int argc, VALUE *argv, VALUE str){ if (argc == 3) { if (RB_TYPE_P(argv[0], T_REGEXP)) { rb_str_subpat_set(str, argv[0], argv[1], argv[2]); } else { rb_str_splice(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]); } return argv[2]; } rb_check_arity(argc, 2, 3); return rb_str_aset(str, argv[0], argv[1]);}
Returns true for a string which has only ASCII characters.
"abc".force_encoding("UTF-8").ascii_only?#=> true"abc\u{6666}".force_encoding("UTF-8").ascii_only?#=> false
static VALUErb_str_is_ascii_only_p(VALUE str){ int cr = rb_enc_str_coderange(str); return cr == ENC_CODERANGE_7BIT ? Qtrue : Qfalse;}
Returns a copied string whose encoding is ASCII-8BIT.
static VALUErb_str_b(VALUE str){ VALUE str2 = str_alloc(rb_cString); str_replace_shared_without_enc(str2, str); ENC_CODERANGE_CLEAR(str2); return str2;}
Returns an array of bytes instr. This is a shorthand forstr.each_byte.to_a
.
If a block is given, which is a deprecated form, works the same aseach_byte
.
static VALUErb_str_bytes(VALUE str){ VALUE ary = WANTARRAY("bytes", RSTRING_LEN(str)); return rb_str_enumerate_bytes(str, ary);}
Returns the count of bytes inself
:
"\x80\u3042".bytesize# => 4"hello".bytesize# => 5
Related:#length.
static VALUErb_str_bytesize(VALUE str){ return LONG2NUM(RSTRING_LEN(str));}
Byte Reference—If passed a singleInteger,returns a substring of one byte at that position. If passed twoInteger objects, returns a substring starting atthe offset given by the first, and a length given by the second. If given aRange, a substring containing bytes at offsetsgiven by the range is returned. In all three cases, if an offset isnegative, it is counted from the end ofstr. Returnsnil
if the initial offset falls outside the string, the lengthis negative, or the beginning of the range is greater than the end. Theencoding of the resulted string keeps original encoding.
"hello".byteslice(1)#=> "e""hello".byteslice(-1)#=> "o""hello".byteslice(1,2)#=> "el""\x80\u3042".byteslice(1,3)#=> "\u3042""\x03\u3042\xff".byteslice(1..3)#=> "\u3042"
static VALUErb_str_byteslice(int argc, VALUE *argv, VALUE str){ if (argc == 2) { long beg = NUM2LONG(argv[0]); long end = NUM2LONG(argv[1]); return str_byte_substr(str, beg, end, TRUE); } rb_check_arity(argc, 1, 2); return str_byte_aref(str, argv[0]);}
Returns a copy ofstr with the first character converted touppercase and the remainder to lowercase.
See#downcase for meaning ofoptions
and use with different encodings.
"hello".capitalize#=> "Hello""HELLO".capitalize#=> "Hello""123ABC".capitalize#=> "123abc"
static VALUErb_str_capitalize(int argc, VALUE *argv, VALUE str){ rb_encoding *enc; OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE; VALUE ret; flags = check_case_options(argc, argv, flags); enc = str_true_enc(str); if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return str; if (flags&ONIGENC_CASE_ASCII_ONLY) { ret = rb_str_new(0, RSTRING_LEN(str)); rb_str_ascii_casemap(str, ret, &flags, enc); } else { ret = rb_str_casemap(str, &flags, enc); } return ret;}
Modifiesstr by converting the first character to uppercase andthe remainder to lowercase. Returnsnil
if no changes aremade. There is an exception for modern Georgian (mkhedruli/MTAVRULI), wherethe result is the same as for#downcase, to avoid mixed case.
See#downcase for meaning ofoptions
and use with different encodings.
a ="hello"a.capitalize!#=> "Hello"a#=> "Hello"a.capitalize!#=> nil
static VALUErb_str_capitalize_bang(int argc, VALUE *argv, VALUE str){ rb_encoding *enc; OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE; flags = check_case_options(argc, argv, flags); str_modify_keep_cr(str); enc = str_true_enc(str); if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil; if (flags&ONIGENC_CASE_ASCII_ONLY) rb_str_ascii_casemap(str, str, &flags, enc); else str_shared_replace(str, rb_str_casemap(str, &flags, enc)); if (ONIGENC_CASE_MODIFIED&flags) return str; return Qnil;}
Comparesself
andother_string
, ignoring case,and returning:
-1 ifother_string
is smaller.
0 if the two are equal.
1 ifother_string
is larger.
nil
if the two are incomparable.
Examples:
'foo'.casecmp('foo')# => 0'foo'.casecmp('food')# => -1'food'.casecmp('foo')# => 1'FOO'.casecmp('foo')# => 0'foo'.casecmp('FOO')# => 0'foo'.casecmp(1)# => nil
static VALUErb_str_casecmp(VALUE str1, VALUE str2){ VALUE s = rb_check_string_type(str2); if (NIL_P(s)) { return Qnil; } return str_casecmp(str1, s);}
Returnstrue
ifself
andother_string
are equal after Unicode case folding, otherwisefalse
:
'foo'.casecmp?('foo')# => true'foo'.casecmp?('food')# => false'food'.casecmp?('foo')# => true'FOO'.casecmp?('foo')# => true'foo'.casecmp?('FOO')# => true
Returnsnil
if the two values are incomparable:
'foo'.casecmp?(1)# => nil
static VALUErb_str_casecmp_p(VALUE str1, VALUE str2){ VALUE s = rb_check_string_type(str2); if (NIL_P(s)) { return Qnil; } return str_casecmp_p(str1, s);}
Centersstr
inwidth
. Ifwidth
isgreater than the length ofstr
, returns a newString of lengthwidth
withstr
centered and padded withpadstr
; otherwise,returnsstr
.
"hello".center(4)#=> "hello""hello".center(20)#=> " hello ""hello".center(20,'123')#=> "1231231hello12312312"
static VALUErb_str_center(int argc, VALUE *argv, VALUE str){ return rb_str_justify(argc, argv, str, 'c');}
Returns an array of characters instr. This is a shorthand forstr.each_char.to_a
.
If a block is given, which is a deprecated form, works the same aseach_char
.
static VALUErb_str_chars(VALUE str){ VALUE ary = WANTARRAY("chars", rb_str_strlen(str)); return rb_str_enumerate_chars(str, ary);}
Returns a newString with the given recordseparator removed from the end ofstr (if present). If$/
has not been changed from the default Ruby recordseparator, thenchomp
also removes carriage return characters(that is it will remove\n
,\r
, and\r\n
). If$/
is an empty string, it will removeall trailing newlines from the string.
"hello".chomp#=> "hello""hello\n".chomp#=> "hello""hello\r\n".chomp#=> "hello""hello\n\r".chomp#=> "hello\n""hello\r".chomp#=> "hello""hello \n there".chomp#=> "hello \n there""hello".chomp("llo")#=> "he""hello\r\n\r\n".chomp('')#=> "hello""hello\r\n\r\r\n".chomp('')#=> "hello\r\n\r"
static VALUErb_str_chomp(int argc, VALUE *argv, VALUE str){ VALUE rs = chomp_rs(argc, argv); if (NIL_P(rs)) return str_duplicate(rb_cString, str); return rb_str_subseq(str, 0, chompped_length(str, rs));}
Modifiesstr in place as described for#chomp, returningstr, ornil
if no modifications were made.
static VALUErb_str_chomp_bang(int argc, VALUE *argv, VALUE str){ VALUE rs; str_modifiable(str); if (RSTRING_LEN(str) == 0) return Qnil; rs = chomp_rs(argc, argv); if (NIL_P(rs)) return Qnil; return rb_str_chomp_string(str, rs);}
Returns a newString with the last characterremoved. If the string ends with\r\n
, both characters areremoved. Applyingchop
to an empty string returns an emptystring.#chomp is often a saferalternative, as it leaves the string unchanged if it doesn't end in arecord separator.
"string\r\n".chop#=> "string""string\n\r".chop#=> "string\n""string\n".chop#=> "string""string".chop#=> "strin""x".chop.chop#=> ""
static VALUErb_str_chop(VALUE str){ return rb_str_subseq(str, 0, chopped_length(str));}
Processesstr as for#chop, returningstr, ornil
ifstr is the empty string. See also#chomp!.
static VALUErb_str_chop_bang(VALUE str){ str_modify_keep_cr(str); if (RSTRING_LEN(str) > 0) { long len; len = chopped_length(str); STR_SET_LEN(str, len); TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str)); if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) { ENC_CODERANGE_CLEAR(str); } return str; } return Qnil;}
Returns a one-character string at the beginning of the string.
a ="abcde"a.chr#=> "a"
static VALUErb_str_chr(VALUE str){ return rb_str_substr(str, 0, 1);}
Makes string empty.
a ="abcde"a.clear#=> ""
static VALUErb_str_clear(VALUE str){ str_discard(str); STR_SET_EMBED(str); STR_SET_EMBED_LEN(str, 0); RSTRING_PTR(str)[0] = 0; if (rb_enc_asciicompat(STR_ENC_GET(str))) ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT); else ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID); return str;}
Returns an array of theInteger ordinals of thecharacters instr. This is a shorthand forstr.each_codepoint.to_a
.
If a block is given, which is a deprecated form, works the same aseach_codepoint
.
static VALUErb_str_codepoints(VALUE str){ VALUE ary = WANTARRAY("codepoints", rb_str_strlen(str)); return rb_str_enumerate_codepoints(str, ary);}
Returns a new String containing the concatenation ofself
andall objects inobjects
:
s ='foo's.concat('bar','baz')# => "foobarbaz"
For each given objectobject
that is an Integer, the value isconsidered a codepoint and converted to a character before concatenation:
s ='foo's.concat(32,'bar',32,'baz')# => "foo bar baz"
Related:#<<, which takes a singleargument.
static VALUErb_str_concat_multi(int argc, VALUE *argv, VALUE str){ str_modifiable(str); if (argc == 1) { return rb_str_concat(str, argv[0]); } else if (argc > 1) { int i; VALUE arg_str = rb_str_tmp_new(0); rb_enc_copy(arg_str, str); for (i = 0; i < argc; i++) { rb_str_concat(arg_str, argv[i]); } rb_str_buf_append(str, arg_str); } return str;}
Eachother_str
parameter defines a set of characters to count.The intersection of these sets defines the characters to count instr
. Anyother_str
that starts with a caret^
is negated. The sequencec1-c2
means allcharacters between c1 and c2. The backslash character\
canbe used to escape^
or-
and is otherwise ignoredunless it appears at the end of a sequence or the end of aother_str
.
a ="hello world"a.count"lo"#=> 5a.count"lo","o"#=> 2a.count"hello","^l"#=> 4a.count"ej-m"#=> 4"hello^world".count"\\^aeiou"#=> 4"hello-world".count"a\\-eo"#=> 4c ="hello world\\r\\n"c.count"\\"#=> 2c.count"\\A"#=> 0c.count"X-\\w"#=> 3
static VALUErb_str_count(int argc, VALUE *argv, VALUE str){ char table[TR_TABLE_SIZE]; rb_encoding *enc = 0; VALUE del = 0, nodel = 0, tstr; char *s, *send; int i; int ascompat; rb_check_arity(argc, 1, UNLIMITED_ARGUMENTS); tstr = argv[0]; StringValue(tstr); enc = rb_enc_check(str, tstr); if (argc == 1) { const char *ptstr; if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) && (ptstr = RSTRING_PTR(tstr), ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (const unsigned char *)ptstr, (const unsigned char *)ptstr+1)) && !is_broken_string(str)) { int n = 0; int clen; unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc); s = RSTRING_PTR(str); if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0); send = RSTRING_END(str); while (s < send) { if (*(unsigned char*)s++ == c) n++; } return INT2NUM(n); } } tr_setup_table(tstr, table, TRUE, &del, &nodel, enc); for (i=1; i<argc; i++) { tstr = argv[i]; StringValue(tstr); enc = rb_enc_check(str, tstr); tr_setup_table(tstr, table, FALSE, &del, &nodel, enc); } s = RSTRING_PTR(str); if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0); send = RSTRING_END(str); ascompat = rb_enc_asciicompat(enc); i = 0; while (s < send) { unsigned int c; if (ascompat && (c = *(unsigned char*)s) < 0x80) { if (table[c]) { i++; } s++; } else { int clen; c = rb_enc_codepoint_len(s, send, &clen, enc); if (tr_find(c, table, del, nodel)) { i++; } s += clen; } } return INT2NUM(i);}
Returns the string generated by callingcrypt(3)
standardlibrary function withstr
andsalt_str
, in thisorder, as its arguments. Please do not use this method any longer. It islegacy; provided only for backward compatibility with ruby scripts inearlier days. It is bad to use in contemporary programs for severalreasons:
Behaviour of C'scrypt(3)
depends on the OS it is run. The generated string lacks data portability.
On some OSes such as Mac OS,crypt(3)
never fails (i.e.silently ends up in unexpected results).
On some OSes such as Mac OS,crypt(3)
is not thread safe.
So-called “traditional” usage ofcrypt(3)
is very very veryweak. According to its manpage, Linux's traditionalcrypt(3)
output has only 2**56 variations; too easy to bruteforce today. And this is the default behaviour.
In order to make things robust some OSes implement so-called “modular”usage. To go through, you have to do a complex build-up of thesalt_str
parameter, by hand. Failure in generation of a propersalt string tends not to yield any errors; typos in parameters are normallynot detectable.
For instance, in the following example, the second invocation of#crypt is wrong; it has a typo in“round=” (lacks “s”). However the call does not fail and somethingunexpected is generated.
"foo".crypt("$5$rounds=1000$salt$")# OK, proper usage"foo".crypt("$5$round=1000$salt$")# Typo not detected
Even in the “modular” mode, some hash functions are considered archaic andno longer recommended at all; for instance module$1$
isofficially abandoned by its author: seephk.freebsd.dk/sagas/md5crypt_eol.html. For another instance module$3$
is considered completelybroken: see the manpage of FreeBSD.
On some OS such as Mac OS, there is no modular mode. Yet, as written above,crypt(3)
on Mac OS never fails. This means even if you buildup a proper salt string it generates a traditional DES hash anyways, andthere is no way for you to be aware of.
"foo".crypt("$5$rounds=1000$salt$")# => "$5fNPQMxC5j6."
If for some reason you cannot migrate to other secure contemporary passwordhashing algorithms, install the string-crypt gem andrequire'string/crypt'
to continue using it.
static VALUErb_str_crypt(VALUE str, VALUE salt){#ifdef HAVE_CRYPT_R VALUE databuf; struct crypt_data *data;# define CRYPT_END() ALLOCV_END(databuf)#else extern char *crypt(const char *, const char *);# define CRYPT_END() (void)0#endif VALUE result; const char *s, *saltp; char *res;#ifdef BROKEN_CRYPT char salt_8bit_clean[3];#endif StringValue(salt); mustnot_wchar(str); mustnot_wchar(salt); if (RSTRING_LEN(salt) < 2) { goto short_salt; } s = StringValueCStr(str); saltp = RSTRING_PTR(salt); if (!saltp[0] || !saltp[1]) goto short_salt;#ifdef BROKEN_CRYPT if (!ISASCII((unsigned char)saltp[0]) || !ISASCII((unsigned char)saltp[1])) { salt_8bit_clean[0] = saltp[0] & 0x7f; salt_8bit_clean[1] = saltp[1] & 0x7f; salt_8bit_clean[2] = '\0'; saltp = salt_8bit_clean; }#endif#ifdef HAVE_CRYPT_R data = ALLOCV(databuf, sizeof(struct crypt_data));# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED data->initialized = 0;# endif res = crypt_r(s, saltp, data);#else res = crypt(s, saltp);#endif if (!res) { int err = errno; CRYPT_END(); rb_syserr_fail(err, "crypt"); } result = rb_str_new_cstr(res); CRYPT_END(); return result; short_salt: rb_raise(rb_eArgError, "salt too short (need >=2 bytes)"); UNREACHABLE_RETURN(Qundef);}
Returns a copy ofstr with all characters in the intersection ofits arguments deleted. Uses the same rules for building the set ofcharacters as#count.
"hello".delete"l","lo"#=> "heo""hello".delete"lo"#=> "he""hello".delete"aeiou","^e"#=> "hell""hello".delete"ej-m"#=> "ho"
static VALUErb_str_delete(int argc, VALUE *argv, VALUE str){ str = str_duplicate(rb_cString, str); rb_str_delete_bang(argc, argv, str); return str;}
Performs adelete
operation in place, returningstr,ornil
ifstr was not modified.
static VALUErb_str_delete_bang(int argc, VALUE *argv, VALUE str){ char squeez[TR_TABLE_SIZE]; rb_encoding *enc = 0; char *s, *send, *t; VALUE del = 0, nodel = 0; int modify = 0; int i, ascompat, cr; if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil; rb_check_arity(argc, 1, UNLIMITED_ARGUMENTS); for (i=0; i<argc; i++) { VALUE s = argv[i]; StringValue(s); enc = rb_enc_check(str, s); tr_setup_table(s, squeez, i==0, &del, &nodel, enc); } str_modify_keep_cr(str); ascompat = rb_enc_asciicompat(enc); s = t = RSTRING_PTR(str); send = RSTRING_END(str); cr = ascompat ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID; while (s < send) { unsigned int c; int clen; if (ascompat && (c = *(unsigned char*)s) < 0x80) { if (squeez[c]) { modify = 1; } else { if (t != s) *t = c; t++; } s++; } else { c = rb_enc_codepoint_len(s, send, &clen, enc); if (tr_find(c, squeez, del, nodel)) { modify = 1; } else { if (t != s) rb_enc_mbcput(c, t, enc); t += clen; if (cr == ENC_CODERANGE_7BIT) cr = ENC_CODERANGE_VALID; } s += clen; } } TERM_FILL(t, TERM_LEN(str)); STR_SET_LEN(str, t - RSTRING_PTR(str)); ENC_CODERANGE_SET(str, cr); if (modify) return str; return Qnil;}
Returns a copy ofstr with leadingprefix
deleted.
"hello".delete_prefix("hel")#=> "lo""hello".delete_prefix("llo")#=> "hello"
static VALUErb_str_delete_prefix(VALUE str, VALUE prefix){ long prefixlen; prefixlen = deleted_prefix_length(str, prefix); if (prefixlen <= 0) return str_duplicate(rb_cString, str); return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);}
Deletes leadingprefix
fromstr, returningnil
if no change was made.
"hello".delete_prefix!("hel")#=> "lo""hello".delete_prefix!("llo")#=> nil
static VALUErb_str_delete_prefix_bang(VALUE str, VALUE prefix){ long prefixlen; str_modify_keep_cr(str); prefixlen = deleted_prefix_length(str, prefix); if (prefixlen <= 0) return Qnil; return rb_str_drop_bytes(str, prefixlen);}
Returns a copy ofstr with trailingsuffix
deleted.
"hello".delete_suffix("llo")#=> "he""hello".delete_suffix("hel")#=> "hello"
static VALUErb_str_delete_suffix(VALUE str, VALUE suffix){ long suffixlen; suffixlen = deleted_suffix_length(str, suffix); if (suffixlen <= 0) return str_duplicate(rb_cString, str); return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);}
Deletes trailingsuffix
fromstr, returningnil
if no change was made.
"hello".delete_suffix!("llo")#=> "he""hello".delete_suffix!("hel")#=> nil
static VALUErb_str_delete_suffix_bang(VALUE str, VALUE suffix){ long olen, suffixlen, len; str_modifiable(str); suffixlen = deleted_suffix_length(str, suffix); if (suffixlen <= 0) return Qnil; olen = RSTRING_LEN(str); str_modify_keep_cr(str); len = olen - suffixlen; STR_SET_LEN(str, len); TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str)); if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) { ENC_CODERANGE_CLEAR(str); } return str;}
Returns a copy ofstr with all uppercase letters replaced withtheir lowercase counterparts. Which letters exactly are replaced, and bywhich other letters, depends on the presence or absence of options, and ontheencoding
of the string.
The meaning of theoptions
is as follows:
Full Unicode case mapping, suitable for most languages (see :turkic and:lithuanian options below for exceptions). Context-dependent case mappingas described in Table 3-14 of the Unicode standard is currently notsupported.
Only the ASCII region, i.e. the characters “A'' to “Z'' and“a'' to “z'', are affected. This option cannot be combinedwith any other option.
Full Unicode case mapping, adapted for Turkic languages (Turkish,Azerbaijani, …). This means that upper case I is mapped to lower casedotless i, and so on.
Currently, just full Unicode case mapping. In the future, full Unicode casemapping adapted for Lithuanian (keeping the dot on the lower case i even ifthere is an accent on top).
Only available ondowncase
anddowncase!
. Unicodecasefolding, which is more far-reaching than Unicode casemapping. This option currently cannot be combined with any other option(i.e. there is currently no variant for turkic languages).
Please note that several assumptions that are valid for ASCII-only caseconversions do not hold for more general case conversions. For example, thelength of the result may not be the same as the length of the input(neither in characters nor in bytes), some roundtrip assumptions (e.g.str.downcase == str.upcase.downcase) may not apply, and Unicodenormalization (i.e.#unicode_normalize) isnot necessarily maintained by case mapping operations.
Non-ASCII case mapping/folding is currently supported for UTF-8,UTF-16BE/LE, UTF-32BE/LE, and ISO-8859-1~16 Strings/Symbols. This supportwill be extended to other encodings.
"hEllO".downcase#=> "hello"
static VALUErb_str_downcase(int argc, VALUE *argv, VALUE str){ rb_encoding *enc; OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE; VALUE ret; flags = check_case_options(argc, argv, flags); enc = str_true_enc(str); if (case_option_single_p(flags, enc, str)) { ret = rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str)); str_enc_copy(ret, str); downcase_single(ret); } else if (flags&ONIGENC_CASE_ASCII_ONLY) { ret = rb_str_new(0, RSTRING_LEN(str)); rb_str_ascii_casemap(str, ret, &flags, enc); } else { ret = rb_str_casemap(str, &flags, enc); } return ret;}
Downcases the contents ofstr, returningnil
if nochanges were made.
See#downcase for meaning ofoptions
and use with different encodings.
static VALUErb_str_downcase_bang(int argc, VALUE *argv, VALUE str){ rb_encoding *enc; OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE; flags = check_case_options(argc, argv, flags); str_modify_keep_cr(str); enc = str_true_enc(str); if (case_option_single_p(flags, enc, str)) { if (downcase_single(str)) flags |= ONIGENC_CASE_MODIFIED; } else if (flags&ONIGENC_CASE_ASCII_ONLY) rb_str_ascii_casemap(str, str, &flags, enc); else str_shared_replace(str, rb_str_casemap(str, &flags, enc)); if (ONIGENC_CASE_MODIFIED&flags) return str; return Qnil;}
Returns a quoted version of the string with all non-printing charactersreplaced by\xHH
notation and all special characters escaped.
This method can be used for round-trip: if the resultingnew_str
is eval'ed, it will produce the original string.
"hello \n ''".dump#=> "\"hello \\n ''\"""\f\x00\xff\\\"".dump#=> "\"\\f\\x00\\xFF\\\\\\\"\""
See also#undump.
VALUErb_str_dump(VALUE str){ int encidx = rb_enc_get_index(str); rb_encoding *enc = rb_enc_from_index(encidx); long len; const char *p, *pend; char *q, *qend; VALUE result; int u8 = (encidx == rb_utf8_encindex()); static const char nonascii_suffix[] = ".dup.force_encoding(\"%s\")"; len = 2; /* "" */ if (!rb_enc_asciicompat(enc)) { len += strlen(nonascii_suffix) - rb_strlen_lit("%s"); len += strlen(enc->name); } p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); while (p < pend) { int clen; unsigned char c = *p++; switch (c) { case '"': case '\\': case '\n': case '\r': case '\t': case '\f': case '\013': case '\010': case '\007': case '\033': clen = 2; break; case '#': clen = IS_EVSTR(p, pend) ? 2 : 1; break; default: if (ISPRINT(c)) { clen = 1; } else { if (u8 && c > 0x7F) { /* \u notation */ int n = rb_enc_precise_mbclen(p-1, pend, enc); if (MBCLEN_CHARFOUND_P(n)) { unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc); if (cc <= 0xFFFF) clen = 6; /* \uXXXX */ else if (cc <= 0xFFFFF) clen = 9; /* \u{XXXXX} */ else clen = 10; /* \u{XXXXXX} */ p += MBCLEN_CHARFOUND_LEN(n)-1; break; } } clen = 4; /* \xNN */ } break; } if (clen > LONG_MAX - len) { rb_raise(rb_eRuntimeError, "string size too big"); } len += clen; } result = rb_str_new(0, len); p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); q = RSTRING_PTR(result); qend = q + len + 1; *q++ = '"'; while (p < pend) { unsigned char c = *p++; if (c == '"' || c == '\\') { *q++ = '\\'; *q++ = c; } else if (c == '#') { if (IS_EVSTR(p, pend)) *q++ = '\\'; *q++ = '#'; } else if (c == '\n') { *q++ = '\\'; *q++ = 'n'; } else if (c == '\r') { *q++ = '\\'; *q++ = 'r'; } else if (c == '\t') { *q++ = '\\'; *q++ = 't'; } else if (c == '\f') { *q++ = '\\'; *q++ = 'f'; } else if (c == '\013') { *q++ = '\\'; *q++ = 'v'; } else if (c == '\010') { *q++ = '\\'; *q++ = 'b'; } else if (c == '\007') { *q++ = '\\'; *q++ = 'a'; } else if (c == '\033') { *q++ = '\\'; *q++ = 'e'; } else if (ISPRINT(c)) { *q++ = c; } else { *q++ = '\\'; if (u8) { int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1; if (MBCLEN_CHARFOUND_P(n)) { int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc); p += n; if (cc <= 0xFFFF) snprintf(q, qend-q, "u%04X", cc); /* \uXXXX */ else snprintf(q, qend-q, "u{%X}", cc); /* \u{XXXXX} or \u{XXXXXX} */ q += strlen(q); continue; } } snprintf(q, qend-q, "x%02X", c); q += 3; } } *q++ = '"'; *q = '\0'; if (!rb_enc_asciicompat(enc)) { snprintf(q, qend-q, nonascii_suffix, enc->name); encidx = rb_ascii8bit_encindex(); } /* result from dump is ASCII */ rb_enc_associate_index(result, encidx); ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT); return result;}
Passes each byte instr to the given block, or returns anenumerator if no block is given.
"hello".each_byte {|c|printc,' ' }
produces:
104 101 108 108 111
static VALUErb_str_each_byte(VALUE str){ RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_byte_size); return rb_str_enumerate_bytes(str, 0);}
Passes each character instr to the given block, or returns anenumerator if no block is given.
"hello".each_char {|c|printc,' ' }
produces:
hello
static VALUErb_str_each_char(VALUE str){ RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size); return rb_str_enumerate_chars(str, 0);}
Passes theInteger ordinal of each character instr, also known as acodepoint when applied to Unicodestrings to the given block. For encodings other thanUTF-8/UTF-16(BE|LE)/UTF-32(BE|LE), values are directly derived from thebinary representation of each character.
If no block is given, an enumerator is returned instead.
"hello\u0639".each_codepoint {|c|printc,' ' }
produces:
104 101 108 108 111 1593
static VALUErb_str_each_codepoint(VALUE str){ RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size); return rb_str_enumerate_codepoints(str, 0);}
Passes each grapheme cluster instr to the given block, or returnsan enumerator if no block is given. Unlike#each_char, this enumerates bygrapheme clusters defined by Unicode Standard Annex #29unicode.org/reports/tr29/
"a\u0300".each_char.to_a.size#=> 2"a\u0300".each_grapheme_cluster.to_a.size#=> 1
static VALUErb_str_each_grapheme_cluster(VALUE str){ RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_grapheme_cluster_size); return rb_str_enumerate_grapheme_clusters(str, 0);}
Splitsstr using the supplied parameter as the record separator($/
by default), passing each substring in turn to thesupplied block. If a zero-length record separator is supplied, the stringis split into paragraphs delimited by multiple successive newlines.
Ifchomp
istrue
,separator
will beremoved from the end of each line.
If no block is given, an enumerator is returned instead.
"hello\nworld".each_line {|s|ps}# prints:# "hello\n"# "world""hello\nworld".each_line('l') {|s|ps}# prints:# "hel"# "l"# "o\nworl"# "d""hello\n\n\nworld".each_line('') {|s|ps}# prints# "hello\n\n"# "world""hello\nworld".each_line(chomp:true) {|s|ps}# prints:# "hello"# "world""hello\nworld".each_line('l',chomp:true) {|s|ps}# prints:# "he"# ""# "o\nwor"# "d"
static VALUErb_str_each_line(int argc, VALUE *argv, VALUE str){ RETURN_SIZED_ENUMERATOR(str, argc, argv, 0); return rb_str_enumerate_lines(argc, argv, str, 0);}
Returnstrue
if the length ofself
is zero,false
otherwise:
"hello".empty?# => false" ".empty?# => false"".empty?# => true
static VALUErb_str_empty(VALUE str){ if (RSTRING_LEN(str) == 0) return Qtrue; return Qfalse;}
The first form returns a copy ofstr
transcoded to encodingencoding
. The second form returns a copy ofstr
transcoded from src_encoding to dst_encoding. The last form returns a copyofstr
transcoded toEncoding.default_internal
.
By default, the first and second form raiseEncoding::UndefinedConversionErrorfor characters that are undefined in the destination encoding, andEncoding::InvalidByteSequenceErrorfor invalid byte sequences in the source encoding. The last form by defaultdoes not raise exceptions but uses replacement strings.
Theoptions
keyword arguments give details for conversion. Thearguments are:
If the value is:replace
,encode replaces invalid bytesequences instr
with the replacement character. The defaultis to raise theEncoding::InvalidByteSequenceErrorexception
If the value is:replace
,encode replaces characters which areundefined in the destination encoding with the replacement character. Thedefault is to raise theEncoding::UndefinedConversionError.
Sets the replacement string to the given value. The default replacementstring is “uFFFD” for Unicode encoding forms, and “?” otherwise.
Sets the replacement string by the given object for undefined character. The object should be aHash, aProc, aMethod, or an objectwhich has [] method. Its key is an undefined character encoded in thesource encoding of current transcoder. Its value can be any encoding untilit can be converted into the destination encoding of the transcoder.
The value must be:text
or:attr
. If the value is:text
encodereplaces undefined characters with their (upper-case hexadecimal) numericcharacter references. '&', '<', and '>'are converted to “&”, “<”, and “>”, respectively. Ifthe value is:attr
,encode also quotes the replacementresult (using '“'), and replaces '”' with “"”.
Replaces LF (“n”) with CR (“r”) if value is true.
Replaces LF (“n”) with CRLF (“rn”) if value is true.
Replaces CRLF (“rn”) and CR (“r”) with LF (“n”) if value is true.
static VALUEstr_encode(int argc, VALUE *argv, VALUE str){ VALUE newstr = str; int encidx = str_transcode(argc, argv, &newstr); return encoded_dup(newstr, str, encidx);}
The first form transcodes the contents ofstr from str.encoding toencoding
. The second form transcodes the contents ofstr from src_encoding to dst_encoding. Theoptions
keyword arguments give details for conversion. See#encode for details. Returns thestring even if no changes were made.
static VALUEstr_encode_bang(int argc, VALUE *argv, VALUE str){ VALUE newstr; int encidx; rb_check_frozen(str); newstr = str; encidx = str_transcode(argc, argv, &newstr); if (encidx < 0) return str; if (newstr == str) { rb_enc_associate_index(str, encidx); return str; } rb_str_shared_replace(str, newstr); return str_encode_associate(str, encidx);}
Returns theEncoding object that represents theencoding of obj.
VALUErb_obj_encoding(VALUE obj){ int idx = rb_enc_get_index(obj); if (idx < 0) {rb_raise(rb_eTypeError, "unknown encoding"); } return rb_enc_from_encoding_index(idx & ENC_INDEX_MASK);}
Returns true ifstr
ends with one of thesuffixes
given.
"hello".end_with?("ello")#=> true# returns true if one of the +suffixes+ matches."hello".end_with?("heaven","ello")#=> true"hello".end_with?("heaven","paradise")#=> false
static VALUErb_str_end_with(int argc, VALUE *argv, VALUE str){ int i; char *p, *s, *e; rb_encoding *enc; for (i=0; i<argc; i++) { VALUE tmp = argv[i]; StringValue(tmp); enc = rb_enc_check(str, tmp); if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue; p = RSTRING_PTR(str); e = p + RSTRING_LEN(str); s = e - RSTRING_LEN(tmp); if (rb_enc_left_char_head(p, s, e, enc) != s) continue; if (memcmp(s, RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0) return Qtrue; } return Qfalse;}
Returnstrue
ifobject
has the same length andcontent; asself
;false
otherwise:
s ='foo's.eql?('foo')# => trues.eql?('food')# => falses.eql?('FOO')# => false
Returnsfalse
if the two strings' encodings are notcompatible:
"\u{e4 f6 fc}".encode("ISO-8859-1").eql?("\u{c4 d6 dc}")# => false
MJIT_FUNC_EXPORTED VALUErb_str_eql(VALUE str1, VALUE str2){ if (str1 == str2) return Qtrue; if (!RB_TYPE_P(str2, T_STRING)) return Qfalse; return rb_str_eql_internal(str1, str2);}
Changes the encoding toencoding
and returns self.
static VALUErb_str_force_encoding(VALUE str, VALUE enc){ str_modifiable(str); rb_enc_associate(str, rb_to_encoding(enc)); ENC_CODERANGE_CLEAR(str); return str;}
VALUErb_str_freeze(VALUE str){ if (OBJ_FROZEN(str)) return str; rb_str_resize(str, RSTRING_LEN(str)); return rb_obj_freeze(str);}
returns theindexth byte as an integer.
static VALUErb_str_getbyte(VALUE str, VALUE index){ long pos = NUM2LONG(index); if (pos < 0) pos += RSTRING_LEN(str); if (pos < 0 || RSTRING_LEN(str) <= pos) return Qnil; return INT2FIX((unsigned char)RSTRING_PTR(str)[pos]);}
Returns an array of grapheme clusters instr. This is a shorthandforstr.each_grapheme_cluster.to_a
.
If a block is given, which is a deprecated form, works the same aseach_grapheme_cluster
.
static VALUErb_str_grapheme_clusters(VALUE str){ VALUE ary = WANTARRAY("grapheme_clusters", rb_str_strlen(str)); return rb_str_enumerate_grapheme_clusters(str, ary);}
Returns a copy ofstr withall occurrences ofpattern substituted for the second argument. Thepatternis typically aRegexp; if given as aString, any regular expression metacharacters itcontains will be interpreted literally, e.g.\d
will match abackslash followed by 'd', instead of a digit.
Ifreplacement
is aString it willbe substituted for the matched text. It may contain back-references to thepattern's capture groups of the form\d
, wheredis a group number, or\k<n>
, wheren is a groupname. Similarly,\&
,\'
,\`
,and+
correspond to special variables,$&
,$'
,$`
, and$+
, respectively.(See regexp.rdoc for details.)\0
is the same as\&
.\\
is interpreted as an escape, i.e., asingle backslash. Note that, withinreplacement
the specialmatch variables, such as$&
, will not refer to the currentmatch.
If the second argument is aHash, and the matchedtext is one of its keys, the corresponding value is the replacement string.
In the block form, the current match string is passed in as a parameter,and variables such as$1
,$2
,$`
,$&
, and$'
will be set appropriately.(See regexp.rdoc for details.) The value returned by the block will besubstituted for the match on each call.
When neither a block nor a second argument is supplied, anEnumerator is returned.
"hello".gsub(/[aeiou]/,'*')#=> "h*ll*""hello".gsub(/([aeiou])/,'<\1>')#=> "h<e>ll<o>""hello".gsub(/./) {|s|s.ord.to_s+' '}#=> "104 101 108 108 111 ""hello".gsub(/(?<foo>[aeiou])/,'{\k<foo>}')#=> "h{e}ll{o}"'hello'.gsub(/[eo]/,'e' =>3,'o' =>'*')#=> "h3ll*"
Note that a string literal consumes backslashes. (See syntax/literals.rdocfor details on string literals.) Back-references are typically preceded byan additional backslash. For example, if you want to write a back-reference\&
inreplacement
with a double-quoted stringliteral, you need to write:"..\\&.."
. If youwant to write a non-back-reference string\&
inreplacement
, you need first to escape the backslash to preventthis method from interpreting it as a back-reference, and then you need toescape the backslashes again to prevent a string literal from consumingthem:"..\\\\&.."
. You may want to use the blockform to avoid a lot of backslashes.
static VALUErb_str_gsub(int argc, VALUE *argv, VALUE str){ return str_gsub(argc, argv, str, 0);}
Performs the substitutions of#gsubin place, returningstr, ornil
if no substitutionswere performed. If no block and noreplacement is given, anenumerator is returned instead.
static VALUErb_str_gsub_bang(int argc, VALUE *argv, VALUE str){ str_modify_keep_cr(str); return str_gsub(argc, argv, str, 1);}
Returns the integer hash value forself
. The value is based onthe length, content and encoding ofself
.
static VALUErb_str_hash_m(VALUE str){ st_index_t hval = rb_str_hash(str); return ST2FIX(hval);}
Treats leading characters fromstr as a string of hexadecimaldigits (with an optional sign and an optional0x
) and returnsthe corresponding number. Zero is returned on error.
"0x0a".hex#=> 10"-1234".hex#=> -4660"0".hex#=> 0"wombat".hex#=> 0
static VALUErb_str_hex(VALUE str){ return rb_str_to_inum(str, 16, FALSE);}
Returnstrue
ifstr contains the given string orcharacter.
"hello".include?"lo"#=> true"hello".include?"ol"#=> false"hello".include??h#=> true
static VALUErb_str_include(VALUE str, VALUE arg){ long i; StringValue(arg); i = rb_str_index(str, arg, 0); if (i == -1) return Qfalse; return Qtrue;}
Returns the Integer index of the first occurrence of the givensubstring
, ornil
if none found:
'foo'.index('f')# => 0'foo'.index('o')# => 1'foo'.index('oo')# => 1'foo'.index('ooo')# => nil
Returns the Integer index of the first match for the given Regexpregexp
, ornil
if none found:
'foo'.index(/f/)# => 0'foo'.index(/o/)# => 1'foo'.index(/oo/)# => 1'foo'.index(/ooo/)# => nil
Integer argumentoffset
, if given, specifies the position inthe string to begin the search:
'foo'.index('o',1)# => 1'foo'.index('o',2)# => 2'foo'.index('o',3)# => nil
Ifoffset
is negative, counts backward from the end ofself
:
'foo'.index('o',-1)# => 2'foo'.index('o',-2)# => 1'foo'.index('o',-3)# => 1'foo'.index('o',-4)# => nil
Related:#rindex
static VALUErb_str_index_m(int argc, VALUE *argv, VALUE str){ VALUE sub; VALUE initpos; long pos; if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) { pos = NUM2LONG(initpos); } else { pos = 0; } if (pos < 0) { pos += str_strlen(str, NULL); if (pos < 0) { if (RB_TYPE_P(sub, T_REGEXP)) { rb_backref_set(Qnil); } return Qnil; } } if (RB_TYPE_P(sub, T_REGEXP)) { if (pos > str_strlen(str, NULL)) return Qnil; pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos, rb_enc_check(str, sub), single_byte_optimizable(str)); if (rb_reg_search(sub, str, pos, 0) < 0) { return Qnil; } else { VALUE match = rb_backref_get(); struct re_registers *regs = RMATCH_REGS(match); pos = rb_str_sublen(str, BEG(0)); return LONG2NUM(pos); } } else { StringValue(sub); pos = rb_str_index(str, sub, pos); pos = rb_str_sublen(str, pos); } if (pos == -1) return Qnil; return LONG2NUM(pos);}
Replaces the contents ofstr with the corresponding values inother_str.
s ="hello"#=> "hello"s.replace"world"#=> "world"
VALUErb_str_replace(VALUE str, VALUE str2){ str_modifiable(str); if (str == str2) return str; StringValue(str2); str_discard(str); return str_replace(str, str2);}
Inserts the givenother_string
intoself
; returnsself
.
If the Integerindex
is positive, insertsother_string
at offsetindex
:
'foo'.insert(1,'bar')# => "fbaroo"
If the Integerindex
is negative, counts backward from the endofself
and insertsother_string
at offsetindex+1
(that is,afterself[index]
):
'foo'.insert(-2,'bar')# => "fobaro"
static VALUErb_str_insert(VALUE str, VALUE idx, VALUE str2){ long pos = NUM2LONG(idx); if (pos == -1) { return rb_str_append(str, str2); } else if (pos < 0) { pos++; } rb_str_splice(str, pos, 0, str2); return str;}
Returns a printable version ofstr, surrounded by quote marks,with special characters escaped.
str ="hello"str[3] ="\b"str.inspect#=> "\"hel\\bo\""
VALUErb_str_inspect(VALUE str){ int encidx = ENCODING_GET(str); rb_encoding *enc = rb_enc_from_index(encidx), *actenc; const char *p, *pend, *prev; char buf[CHAR_ESC_LEN + 1]; VALUE result = rb_str_buf_new(0); rb_encoding *resenc = rb_default_internal_encoding(); int unicode_p = rb_enc_unicode_p(enc); int asciicompat = rb_enc_asciicompat(enc); if (resenc == NULL) resenc = rb_default_external_encoding(); if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding(); rb_enc_associate(result, resenc); str_buf_cat2(result, "\""); p = RSTRING_PTR(str); pend = RSTRING_END(str); prev = p; actenc = get_actual_encoding(encidx, str); if (actenc != enc) { enc = actenc; if (unicode_p) unicode_p = rb_enc_unicode_p(enc); } while (p < pend) { unsigned int c, cc; int n; n = rb_enc_precise_mbclen(p, pend, enc); if (!MBCLEN_CHARFOUND_P(n)) { if (p > prev) str_buf_cat(result, prev, p - prev); n = rb_enc_mbminlen(enc); if (pend < p + n) n = (int)(pend - p); while (n--) { snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377); str_buf_cat(result, buf, strlen(buf)); prev = ++p; } continue; } n = MBCLEN_CHARFOUND_LEN(n); c = rb_enc_mbc_to_codepoint(p, pend, enc); p += n; if ((asciicompat || unicode_p) && (c == '"'|| c == '\\' || (c == '#' && p < pend && MBCLEN_CHARFOUND_P(rb_enc_precise_mbclen(p,pend,enc)) && (cc = rb_enc_codepoint(p,pend,enc), (cc == '$' || cc == '@' || cc == '{'))))) { if (p - n > prev) str_buf_cat(result, prev, p - n - prev); str_buf_cat2(result, "\\"); if (asciicompat || enc == resenc) { prev = p - n; continue; } } switch (c) { case '\n': cc = 'n'; break; case '\r': cc = 'r'; break; case '\t': cc = 't'; break; case '\f': cc = 'f'; break; case '\013': cc = 'v'; break; case '\010': cc = 'b'; break; case '\007': cc = 'a'; break; case 033: cc = 'e'; break; default: cc = 0; break; } if (cc) { if (p - n > prev) str_buf_cat(result, prev, p - n - prev); buf[0] = '\\'; buf[1] = (char)cc; str_buf_cat(result, buf, 2); prev = p; continue; } if ((enc == resenc && rb_enc_isprint(c, enc)) || (asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) { continue; } else { if (p - n > prev) str_buf_cat(result, prev, p - n - prev); rb_str_buf_cat_escaped_char(result, c, unicode_p); prev = p; continue; } } if (p > prev) str_buf_cat(result, prev, p - prev); str_buf_cat2(result, "\""); return result;}
Returns theSymbol corresponding tostr,creating the symbol if it did not previously exist. SeeSymbol#id2name.
"Koala".intern#=> :Koalas ='cat'.to_sym#=> :cats== :cat#=> trues ='@cat'.to_sym#=> :@cats== :@cat#=> true
This can also be used to create symbols that cannot be represented usingthe:xxx
notation.
'cat and dog'.to_sym#=> :"cat and dog"
VALUErb_str_intern(VALUE str){ VALUE sym;#if USE_SYMBOL_GC rb_encoding *enc, *ascii; int type;#else ID id;#endif GLOBAL_SYMBOLS_ENTER(symbols); { sym = lookup_str_sym_with_lock(symbols, str); if (sym) { // ok } else {#if USE_SYMBOL_GC enc = rb_enc_get(str); ascii = rb_usascii_encoding(); if (enc != ascii && sym_check_asciionly(str)) { str = rb_str_dup(str); rb_enc_associate(str, ascii); OBJ_FREEZE(str); enc = ascii; } else { str = rb_str_dup(str); OBJ_FREEZE(str); } str = rb_fstring(str); type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN); if (type < 0) type = ID_JUNK; sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type);#else id = intern_str(str, 0); sym = ID2SYM(id);#endif } } GLOBAL_SYMBOLS_LEAVE(); return sym;}
Returns an array of lines instr split using the supplied recordseparator ($/
by default). This is a shorthand forstr.each_line(separator, getline_args).to_a
.
Ifchomp
istrue
,separator
will beremoved from the end of each line.
"hello\nworld\n".lines#=> ["hello\n", "world\n"]"hello world".lines(' ')#=> ["hello ", " ", "world"]"hello\nworld\n".lines(chomp:true)#=> ["hello", "world"]
If a block is given, which is a deprecated form, works the same aseach_line
.
static VALUErb_str_lines(int argc, VALUE *argv, VALUE str){ VALUE ary = WANTARRAY("lines", 0); return rb_str_enumerate_lines(argc, argv, str, ary);}
Ifinteger is greater than the length ofstr, returns anewString of lengthinteger withstr left justified and padded withpadstr; otherwise,returnsstr.
"hello".ljust(4)#=> "hello""hello".ljust(20)#=> "hello ""hello".ljust(20,'1234')#=> "hello123412341234123"
static VALUErb_str_ljust(int argc, VALUE *argv, VALUE str){ return rb_str_justify(argc, argv, str, 'l');}
Returns a copy of the receiver with leading whitespace removed. See also#rstrip and#strip.
Refer to#strip for the definitionof whitespace.
" hello ".lstrip#=> "hello ""hello".lstrip#=> "hello"
static VALUErb_str_lstrip(VALUE str){ char *start; long len, loffset; RSTRING_GETMEM(str, start, len); loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str)); if (loffset <= 0) return str_duplicate(rb_cString, str); return rb_str_subseq(str, loffset, len - loffset);}
Removes leading whitespace from the receiver. Returns the altered receiver,ornil
if no change was made. See also#rstrip! and#strip!.
Refer to#strip for the definitionof whitespace.
" hello ".lstrip!#=> "hello ""hello ".lstrip!#=> nil"hello".lstrip!#=> nil
static VALUErb_str_lstrip_bang(VALUE str){ rb_encoding *enc; char *start, *s; long olen, loffset; str_modify_keep_cr(str); enc = STR_ENC_GET(str); RSTRING_GETMEM(str, start, olen); loffset = lstrip_offset(str, start, start+olen, enc); if (loffset > 0) { long len = olen-loffset; s = start + loffset; memmove(start, s, len); STR_SET_LEN(str, len);#if !SHARABLE_MIDDLE_SUBSTRING TERM_FILL(start+len, rb_enc_mbminlen(enc));#endif return str; } return Qnil;}
Returns a Matchdata object (ornil
) based onself
and the givenpattern
.
Note: also updatesRegexp-relatedglobal variables.
Computesregexp
by convertingpattern
(if notalready a Regexp).
regexp =Regexp.new(pattern)
Computesmatchdata
, which will be either a MatchData object ornil
(seeRegexp#match):
matchdata = <tt>regexp.match(self)
With no block given, returns the computedmatchdata
:
'foo'.match('f')# => #<MatchData "f">'foo'.match('o')# => #<MatchData "o">'foo'.match('x')# => nil
If Integer argumentoffset
is given, the search begins atindexoffset
:
'foo'.match('f',1)# => nil'foo'.match('o',1)# => #<MatchData "o">
With a block given, calls the block with the computedmatchdata
and returns the block's return value:
'foo'.match(/o/) {|matchdata|matchdata }# => #<MatchData "o">'foo'.match(/x/) {|matchdata|matchdata }# => nil'foo'.match(/f/,1) {|matchdata|matchdata }# => nil
static VALUErb_str_match_m(int argc, VALUE *argv, VALUE str){ VALUE re, result; if (argc < 1) rb_check_arity(argc, 1, 2); re = argv[0]; argv[0] = str; result = rb_funcallv(get_pat(re), rb_intern("match"), argc, argv); if (!NIL_P(result) && rb_block_given_p()) { return rb_yield(result); } return result;}
Returnstrue
orfalse
based on whether a match isfound forself
andpattern
.
Note: does not updateRegexp-relatedglobal variables.
Computesregexp
by convertingpattern
(if notalready a Regexp).
regexp =Regexp.new(pattern)
Returnstrue
ifself+.match(regexp)
returns aMatchdata object,false
otherwise:
'foo'.match?(/o/)# => true'foo'.match?('o')# => true'foo'.match?(/x/)# => false
If Integer argumentoffset
is given, the search begins atindexoffset
:
'foo'.match?('f',1)# => false'foo'.match?('o',1)# => true
static VALUErb_str_match_m_p(int argc, VALUE *argv, VALUE str){ VALUE re; rb_check_arity(argc, 1, 2); re = get_pat(argv[0]); return rb_reg_match_p(re, str, argc > 1 ? NUM2LONG(argv[1]) : 0);}
Returns the successor toself
. The successor is calculated byincrementing characters.
The first character to be incremented is the rightmost alphanumeric: or, ifno alphanumerics, the rightmost character:
'THX1138'.succ# => "THX1139"'<<koala>>'.succ# => "<<koalb>>"'***'.succ# => '**+'
The successor to a digit is another digit, “carrying” to the next-leftcharacter for a “rollover” from 9 to 0, and prepending another digit ifnecessary:
'00'.succ# => "01"'09'.succ# => "10"'99'.succ# => "100"
The successor to a letter is another letter of the same case, carrying tothe next-left character for a rollover, and prepending another same-caseletter if necessary:
'aa'.succ# => "ab"'az'.succ# => "ba"'zz'.succ# => "aaa"'AA'.succ# => "AB"'AZ'.succ# => "BA"'ZZ'.succ# => "AAA"
The successor to a non-alphanumeric character is the next character in theunderlying character set's collating sequence, carrying to thenext-left character for a rollover, and prepending another character ifnecessary:
s =0.chr*3s# => "\x00\x00\x00"s.succ# => "\x00\x00\x01"s =255.chr*3s# => "\xFF\xFF\xFF"s.succ# => "\x01\x00\x00\x00"
Carrying can occur between and among mixtures of alphanumeric characters:
s ='zz99zz99's.succ# => "aaa00aa00"s ='99zz99zz's.succ# => "100aa00aa"
The successor to an empty String is a new empty String:
''.succ# => ""
VALUErb_str_succ(VALUE orig){ VALUE str; str = rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig)); rb_enc_cr_str_copy_for_substr(str, orig); return str_succ(str);}
Treats leading characters ofstr as a string of octal digits (withan optional sign) and returns the corresponding number. Returns 0 if theconversion fails.
"123".oct#=> 83"-377".oct#=> -255"bad".oct#=> 0"0377bad".oct#=> 255
Ifstr
starts with0
, radix indicators arehonored. See Kernel#Integer.
static VALUErb_str_oct(VALUE str){ return rb_str_to_inum(str, -8, FALSE);}
Returns theInteger ordinal of a one-characterstring.
"a".ord#=> 97
static VALUErb_str_ord(VALUE s){ unsigned int c; c = rb_enc_codepoint(RSTRING_PTR(s), RSTRING_END(s), STR_ENC_GET(s)); return UINT2NUM(c);}
Searchessep or pattern (regexp) in the string andreturns the part before it, the match, and the part after it. If it is notfound, returns two empty strings andstr.
"hello".partition("l")#=> ["he", "l", "lo"]"hello".partition("x")#=> ["hello", "", ""]"hello".partition(/.l/)#=> ["h", "el", "lo"]
static VALUErb_str_partition(VALUE str, VALUE sep){ long pos; sep = get_pat_quoted(sep, 0); if (RB_TYPE_P(sep, T_REGEXP)) { if (rb_reg_search(sep, str, 0, 0) < 0) { goto failed; } VALUE match = rb_backref_get(); struct re_registers *regs = RMATCH_REGS(match); pos = BEG(0); sep = rb_str_subseq(str, pos, END(0) - pos); } else { pos = rb_str_index(str, sep, 0); if (pos < 0) goto failed; } return rb_ary_new3(3, rb_str_subseq(str, 0, pos), sep, rb_str_subseq(str, pos+RSTRING_LEN(sep), RSTRING_LEN(str)-pos-RSTRING_LEN(sep))); failed: return rb_ary_new3(3, str_duplicate(rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));}
Returns a new String containing the concatenation of all givenother_strings
andself
:
s ='foo's.prepend('bar','baz')# => "barbazfoo"
Related:#concat.
static VALUErb_str_prepend_multi(int argc, VALUE *argv, VALUE str){ str_modifiable(str); if (argc == 1) { rb_str_update(str, 0L, 0L, argv[0]); } else if (argc > 1) { int i; VALUE arg_str = rb_str_tmp_new(0); rb_enc_copy(arg_str, str); for (i = 0; i < argc; i++) { rb_str_append(arg_str, argv[i]); } rb_str_update(str, 0L, 0L, arg_str); } return str;}
Replaces the contents ofstr with the corresponding values inother_str.
s ="hello"#=> "hello"s.replace"world"#=> "world"
VALUErb_str_replace(VALUE str, VALUE str2){ str_modifiable(str); if (str == str2) return str; StringValue(str2); str_discard(str); return str_replace(str, str2);}
Returns a new string with the characters fromstr in reverseorder.
"stressed".reverse#=> "desserts"
static VALUErb_str_reverse(VALUE str){ rb_encoding *enc; VALUE rev; char *s, *e, *p; int cr; if (RSTRING_LEN(str) <= 1) return str_duplicate(rb_cString, str); enc = STR_ENC_GET(str); rev = rb_str_new(0, RSTRING_LEN(str)); s = RSTRING_PTR(str); e = RSTRING_END(str); p = RSTRING_END(rev); cr = ENC_CODERANGE(str); if (RSTRING_LEN(str) > 1) { if (single_byte_optimizable(str)) { while (s < e) { *--p = *s++; } } else if (cr == ENC_CODERANGE_VALID) { while (s < e) { int clen = rb_enc_fast_mbclen(s, e, enc); p -= clen; memcpy(p, s, clen); s += clen; } } else { cr = rb_enc_asciicompat(enc) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID; while (s < e) { int clen = rb_enc_mbclen(s, e, enc); if (clen > 1 || (*s & 0x80)) cr = ENC_CODERANGE_UNKNOWN; p -= clen; memcpy(p, s, clen); s += clen; } } } STR_SET_LEN(rev, RSTRING_LEN(str)); str_enc_copy(rev, str); ENC_CODERANGE_SET(rev, cr); return rev;}
Reversesstr in place.
static VALUErb_str_reverse_bang(VALUE str){ if (RSTRING_LEN(str) > 1) { if (single_byte_optimizable(str)) { char *s, *e, c; str_modify_keep_cr(str); s = RSTRING_PTR(str); e = RSTRING_END(str) - 1; while (s < e) { c = *s; *s++ = *e; *e-- = c; } } else { str_shared_replace(str, rb_str_reverse(str)); } } else { str_modify_keep_cr(str); } return str;}
Returns the Integer index of thelast occurrence of the givensubstring
, ornil
if none found:
'foo'.rindex('f')# => 0'foo'.rindex('o')# => 2'foo'.rindex('oo')# => 1'foo'.rindex('ooo')# => nil
Returns the Integer index of thelast match for the given Regexpregexp
, ornil
if none found:
'foo'.rindex(/f/)# => 0'foo'.rindex(/o/)# => 2'foo'.rindex(/oo/)# => 1'foo'.rindex(/ooo/)# => nil
Integer argumentoffset
, if given and non-negative, specifiesthe maximum starting position in the
stringto_end_thesearch:'foo'.rindex('o',0)# => nil'foo'.rindex('o',1)# => 1'foo'.rindex('o',2)# => 2'foo'.rindex('o',3)# => 2
Ifoffset
is a negative Integer, the maximum starting positionin the string toend the search is the sum of the string'slength andoffset
:
'foo'.rindex('o',-1)# => 2'foo'.rindex('o',-2)# => 1'foo'.rindex('o',-3)# => nil'foo'.rindex('o',-4)# => nil
Related:#index
static VALUErb_str_rindex_m(int argc, VALUE *argv, VALUE str){ VALUE sub; VALUE vpos; rb_encoding *enc = STR_ENC_GET(str); long pos, len = str_strlen(str, enc); /* str's enc */ if (rb_scan_args(argc, argv, "11", &sub, &vpos) == 2) { pos = NUM2LONG(vpos); if (pos < 0) { pos += len; if (pos < 0) { if (RB_TYPE_P(sub, T_REGEXP)) { rb_backref_set(Qnil); } return Qnil; } } if (pos > len) pos = len; } else { pos = len; } if (RB_TYPE_P(sub, T_REGEXP)) { /* enc = rb_get_check(str, sub); */ pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos, enc, single_byte_optimizable(str)); if (rb_reg_search(sub, str, pos, 1) >= 0) { VALUE match = rb_backref_get(); struct re_registers *regs = RMATCH_REGS(match); pos = rb_str_sublen(str, BEG(0)); return LONG2NUM(pos); } } else { StringValue(sub); pos = rb_str_rindex(str, sub, pos); if (pos >= 0) return LONG2NUM(pos); } return Qnil;}
Ifinteger is greater than the length ofstr, returns anewString of lengthinteger withstr right justified and padded withpadstr; otherwise,returnsstr.
"hello".rjust(4)#=> "hello""hello".rjust(20)#=> " hello""hello".rjust(20,'1234')#=> "123412341234123hello"
static VALUErb_str_rjust(int argc, VALUE *argv, VALUE str){ return rb_str_justify(argc, argv, str, 'r');}
Searchessep or pattern (regexp) in the string from theend of the string, and returns the part before it, the match, and the partafter it. If it is not found, returns two empty strings andstr.
"hello".rpartition("l")#=> ["hel", "l", "o"]"hello".rpartition("x")#=> ["", "", "hello"]"hello".rpartition(/.l/)#=> ["he", "ll", "o"]
static VALUErb_str_rpartition(VALUE str, VALUE sep){ long pos = RSTRING_LEN(str); sep = get_pat_quoted(sep, 0); if (RB_TYPE_P(sep, T_REGEXP)) { if (rb_reg_search(sep, str, pos, 1) < 0) { goto failed; } VALUE match = rb_backref_get(); struct re_registers *regs = RMATCH_REGS(match); pos = BEG(0); sep = rb_str_subseq(str, pos, END(0) - pos); } else { pos = rb_str_sublen(str, pos); pos = rb_str_rindex(str, sep, pos); if(pos < 0) { goto failed; } pos = rb_str_offset(str, pos); } return rb_ary_new3(3, rb_str_subseq(str, 0, pos), sep, rb_str_subseq(str, pos+RSTRING_LEN(sep), RSTRING_LEN(str)-pos-RSTRING_LEN(sep))); failed: return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(rb_cString, str));}
Returns a copy of the receiver with trailing whitespace removed. See also#lstrip and#strip.
Refer to#strip for the definitionof whitespace.
" hello ".rstrip#=> " hello""hello".rstrip#=> "hello"
static VALUErb_str_rstrip(VALUE str){ rb_encoding *enc; char *start; long olen, roffset; enc = STR_ENC_GET(str); RSTRING_GETMEM(str, start, olen); roffset = rstrip_offset(str, start, start+olen, enc); if (roffset <= 0) return str_duplicate(rb_cString, str); return rb_str_subseq(str, 0, olen-roffset);}
Removes trailing whitespace from the receiver. Returns the alteredreceiver, ornil
if no change was made. See also#lstrip! and#strip!.
Refer to#strip for the definitionof whitespace.
" hello ".rstrip!#=> " hello"" hello".rstrip!#=> nil"hello".rstrip!#=> nil
static VALUErb_str_rstrip_bang(VALUE str){ rb_encoding *enc; char *start; long olen, roffset; str_modify_keep_cr(str); enc = STR_ENC_GET(str); RSTRING_GETMEM(str, start, olen); roffset = rstrip_offset(str, start, start+olen, enc); if (roffset > 0) { long len = olen - roffset; STR_SET_LEN(str, len);#if !SHARABLE_MIDDLE_SUBSTRING TERM_FILL(start+len, rb_enc_mbminlen(enc));#endif return str; } return Qnil;}
Both forms iterate throughstr, matching the pattern (which may beaRegexp or aString).For each match, a result is generated and either added to the result arrayor passed to the block. If the pattern contains no groups, each individualresult consists of the matched string,$&
. If the patterncontains groups, each individual result is itself an array containing oneentry per group.
a ="cruel world"a.scan(/\w+/)#=> ["cruel", "world"]a.scan(/.../)#=> ["cru", "el ", "wor"]a.scan(/(...)/)#=> [["cru"], ["el "], ["wor"]]a.scan(/(..)(..)/)#=> [["cr", "ue"], ["l ", "wo"]]
And the block form:
a.scan(/\w+/) {|w|print"<<#{w}>> " }print"\n"a.scan(/(.)(.)/) {|x,y|printy,x }print"\n"
produces:
<<cruel>> <<world>>rceu lowlr
static VALUErb_str_scan(VALUE str, VALUE pat){ VALUE result; long start = 0; long last = -1, prev = 0; char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str); pat = get_pat_quoted(pat, 1); mustnot_broken(str); if (!rb_block_given_p()) { VALUE ary = rb_ary_new(); while (!NIL_P(result = scan_once(str, pat, &start, 0))) { last = prev; prev = start; rb_ary_push(ary, result); } if (last >= 0) rb_pat_search(pat, str, last, 1); else rb_backref_set(Qnil); return ary; } while (!NIL_P(result = scan_once(str, pat, &start, 1))) { last = prev; prev = start; rb_yield(result); str_mod_check(str, p, len); } if (last >= 0) rb_pat_search(pat, str, last, 1); return str;}
If the string is invalid byte sequence then replace invalid bytes withgiven replacement character, else returns self. If block is given, replaceinvalid bytes with returned value of the block.
"abc\u3042\x81".scrub#=> "abc\u3042\uFFFD""abc\u3042\x81".scrub("*")#=> "abc\u3042*""abc\u3042\xE3\x80".scrub{|bytes|'<'+bytes.unpack('H*')[0]+'>' }#=> "abc\u3042<e380>"
static VALUEstr_scrub(int argc, VALUE *argv, VALUE str){ VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil; VALUE new = rb_str_scrub(str, repl); return NIL_P(new) ? str_duplicate(rb_cString, str): new;}
If the string is invalid byte sequence then replace invalid bytes withgiven replacement character, else returns self. If block is given, replaceinvalid bytes with returned value of the block.
"abc\u3042\x81".scrub!#=> "abc\u3042\uFFFD""abc\u3042\x81".scrub!("*")#=> "abc\u3042*""abc\u3042\xE3\x80".scrub!{|bytes|'<'+bytes.unpack('H*')[0]+'>' }#=> "abc\u3042<e380>"
static VALUEstr_scrub_bang(int argc, VALUE *argv, VALUE str){ VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil; VALUE new = rb_str_scrub(str, repl); if (!NIL_P(new)) rb_str_replace(str, new); return str;}
modifies theindexth byte asinteger.
static VALUErb_str_setbyte(VALUE str, VALUE index, VALUE value){ long pos = NUM2LONG(index); long len = RSTRING_LEN(str); char *head, *left = 0; unsigned char *ptr; rb_encoding *enc; int cr = ENC_CODERANGE_UNKNOWN, width, nlen; if (pos < -len || len <= pos) rb_raise(rb_eIndexError, "index %ld out of string", pos); if (pos < 0) pos += len; VALUE v = rb_to_int(value); VALUE w = rb_int_and(v, INT2FIX(0xff)); unsigned char byte = NUM2INT(w) & 0xFF; if (!str_independent(str)) str_make_independent(str); enc = STR_ENC_GET(str); head = RSTRING_PTR(str); ptr = (unsigned char *)&head[pos]; if (!STR_EMBED_P(str)) { cr = ENC_CODERANGE(str); switch (cr) { case ENC_CODERANGE_7BIT: left = (char *)ptr; *ptr = byte; if (ISASCII(byte)) goto end; nlen = rb_enc_precise_mbclen(left, head+len, enc); if (!MBCLEN_CHARFOUND_P(nlen)) ENC_CODERANGE_SET(str, ENC_CODERANGE_BROKEN); else ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID); goto end; case ENC_CODERANGE_VALID: left = rb_enc_left_char_head(head, ptr, head+len, enc); width = rb_enc_precise_mbclen(left, head+len, enc); *ptr = byte; nlen = rb_enc_precise_mbclen(left, head+len, enc); if (!MBCLEN_CHARFOUND_P(nlen)) ENC_CODERANGE_SET(str, ENC_CODERANGE_BROKEN); else if (MBCLEN_CHARFOUND_LEN(nlen) != width || ISASCII(byte)) ENC_CODERANGE_CLEAR(str); goto end; } } ENC_CODERANGE_CLEAR(str); *ptr = byte; end: return value;}
Returns the substring ofself
specified by the arguments.
When the single Integer argumentindex
is given, returns the1-character substring found inself
at offsetindex
:
'bar'[2]# => "r"
Counts backward from the end ofself
ifindex
isnegative:
'foo'[-3]# => "f"
Returnsnil
ifindex
is out of range:
'foo'[3]# => nil'foo'[-4]# => nil
When the two Integer argumentsstart
andlength
are given, returns the substring of the givenlength
found inself
at offsetstart
:
'foo'[0,2]# => "fo"'foo'[0,0]# => ""
Counts backward from the end ofself
ifstart
isnegative:
'foo'[-2,2]# => "oo"
Special case: returns a new empty String ifstart
is equal tothe length ofself
:
'foo'[3,2]# => ""
Returnsnil
ifstart
is out of range:
'foo'[4,2]# => nil'foo'[-4,2]# => nil
Returns the trailing substring ofself
iflength
is large:
'foo'[1,50]# => "oo"
Returnsnil
iflength
is negative:
'foo'[0,-1]# => nil
When the single Range argumentrange
is given, derivesstart
andlength
values from the givenrange
, and returns values as above:
'foo'[0..1]
is equivalent to'foo'[0,2]
.
'foo'[0...1]
is equivalent to'foo'[0,1]
.
When the Regexp argumentregexp
is given, and thecapture
argument is0
, returns the first matchingsubstring found inself
, ornil
if none found:
'foo'[/o/]# => "o"'foo'[/x/]# => nils ='hello there's[/[aeiou](.)\1/]# => "ell"s[/[aeiou](.)\1/,0]# => "ell"
If argumentcapture
is given and not0
, it shouldbe either an Integer capture group index or a String or Symbol capturegroup name; the method call returns only the specified capture (seeRegexp Capturing):
s ='hello there's[/[aeiou](.)\1/,1]# => "l"s[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/,"non_vowel"]# => "l"s[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, :vowel]# => "e"
If an invalid capture group index is given,nil
is returned. If an invalid capture group name is given,IndexError
israised.
When the single String argumentsubstring
is given, returnsthe substring fromself
if found, otherwisenil
:
'foo'['oo']# => "oo"'foo'['xx']# => nil
static VALUErb_str_aref_m(int argc, VALUE *argv, VALUE str){ if (argc == 2) { if (RB_TYPE_P(argv[0], T_REGEXP)) { return rb_str_subpat(str, argv[0], argv[1]); } else { long beg = NUM2LONG(argv[0]); long len = NUM2LONG(argv[1]); return rb_str_substr(str, beg, len); } } rb_check_arity(argc, 1, 2); return rb_str_aref(str, argv[0]);}
Deletes the specified portion fromstr, and returns the portiondeleted.
string ="this is a string"string.slice!(2)#=> "i"string.slice!(3..6)#=> " is "string.slice!(/s.*t/)#=> "sa st"string.slice!("r")#=> "r"string#=> "thing"
static VALUErb_str_slice_bang(int argc, VALUE *argv, VALUE str){ VALUE result = Qnil; VALUE indx; long beg, len = 1; char *p; rb_check_arity(argc, 1, 2); str_modify_keep_cr(str); indx = argv[0]; if (RB_TYPE_P(indx, T_REGEXP)) { if (rb_reg_search(indx, str, 0, 0) < 0) return Qnil; VALUE match = rb_backref_get(); struct re_registers *regs = RMATCH_REGS(match); int nth = 0; if (argc > 1 && (nth = rb_reg_backref_number(match, argv[1])) < 0) { if ((nth += regs->num_regs) <= 0) return Qnil; } else if (nth >= regs->num_regs) return Qnil; beg = BEG(nth); len = END(nth) - beg; goto subseq; } else if (argc == 2) { beg = NUM2LONG(indx); len = NUM2LONG(argv[1]); goto num_index; } else if (FIXNUM_P(indx)) { beg = FIX2LONG(indx); if (!(p = rb_str_subpos(str, beg, &len))) return Qnil; if (!len) return Qnil; beg = p - RSTRING_PTR(str); goto subseq; } else if (RB_TYPE_P(indx, T_STRING)) { beg = rb_str_index(str, indx, 0); if (beg == -1) return Qnil; len = RSTRING_LEN(indx); result = str_duplicate(rb_cString, indx); goto squash; } else { switch (rb_range_beg_len(indx, &beg, &len, str_strlen(str, NULL), 0)) { case Qnil: return Qnil; case Qfalse: beg = NUM2LONG(indx); if (!(p = rb_str_subpos(str, beg, &len))) return Qnil; if (!len) return Qnil; beg = p - RSTRING_PTR(str); goto subseq; default: goto num_index; } } num_index: if (!(p = rb_str_subpos(str, beg, &len))) return Qnil; beg = p - RSTRING_PTR(str); subseq: result = rb_str_new(RSTRING_PTR(str)+beg, len); rb_enc_cr_str_copy_for_substr(result, str); squash: if (len > 0) { if (beg == 0) { rb_str_drop_bytes(str, len); } else { char *sptr = RSTRING_PTR(str); long slen = RSTRING_LEN(str); if (beg + len > slen) /* pathological check */ len = slen - beg; memmove(sptr + beg, sptr + beg + len, slen - (beg + len)); slen -= len; STR_SET_LEN(str, slen); TERM_FILL(&sptr[slen], TERM_LEN(str)); } } return result;}
Dividesstr into substrings based on a delimiter, returning anarray of these substrings.
Ifpattern is aString, then itscontents are used as the delimiter when splittingstr. Ifpattern is a single space,str is split on whitespace,with leading and trailing whitespace and runs of contiguous whitespacecharacters ignored.
Ifpattern is aRegexp,str isdivided where the pattern matches. Whenever the pattern matches azero-length string,str is split into individual characters. Ifpattern contains groups, the respective matches will be returnedin the array as well.
Ifpattern isnil
, the value of$;
isused. If$;
isnil
(which is the default),str is split on whitespace as if ' ' were specified.
If thelimit parameter is omitted, trailing null fields aresuppressed. Iflimit is a positive number, at most that number ofsplit substrings will be returned (captured groups will be returned aswell, but are not counted towards the limit). Iflimit is1
, the entire string is returned as the only entry in anarray. If negative, there is no limit to the number of fields returned, andtrailing null fields are not suppressed.
When the inputstr
is empty an emptyArray is returned as the string is considered to haveno fields to split.
" now's the time ".split#=> ["now's", "the", "time"]" now's the time ".split(' ')#=> ["now's", "the", "time"]" now's the time".split(/ /)#=> ["", "now's", "", "the", "time"]"1, 2.34,56, 7".split(%r{,\s*})#=> ["1", "2.34", "56", "7"]"hello".split(//)#=> ["h", "e", "l", "l", "o"]"hello".split(//,3)#=> ["h", "e", "llo"]"hi mom".split(%r{\s*})#=> ["h", "i", "m", "o", "m"]"mellow yellow".split("ello")#=> ["m", "w y", "w"]"1,2,,3,4,,".split(',')#=> ["1", "2", "", "3", "4"]"1,2,,3,4,,".split(',',4)#=> ["1", "2", "", "3,4,,"]"1,2,,3,4,,".split(',',-4)#=> ["1", "2", "", "3", "4", "", ""]"1:2:3".split(/(:)()()/,2)#=> ["1", ":", "", "", "2:3"]"".split(',',-1)#=> []
If a block is given, invoke the block with each split substring.
static VALUErb_str_split_m(int argc, VALUE *argv, VALUE str){ rb_encoding *enc; VALUE spat; VALUE limit; split_type_t split_type; long beg, end, i = 0, empty_count = -1; int lim = 0; VALUE result, tmp; result = rb_block_given_p() ? Qfalse : Qnil; if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) { lim = NUM2INT(limit); if (lim <= 0) limit = Qnil; else if (lim == 1) { if (RSTRING_LEN(str) == 0) return result ? rb_ary_new2(0) : str; tmp = str_duplicate(rb_cString, str); if (!result) { rb_yield(tmp); return str; } return rb_ary_new3(1, tmp); } i = 1; } if (NIL_P(limit) && !lim) empty_count = 0; enc = STR_ENC_GET(str); split_type = SPLIT_TYPE_REGEXP; if (!NIL_P(spat)) { spat = get_pat_quoted(spat, 0); } else if (NIL_P(spat = rb_fs)) { split_type = SPLIT_TYPE_AWK; } else if (!(spat = rb_fs_check(spat))) { rb_raise(rb_eTypeError, "value of $; must be String or Regexp"); } else { rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "$; is set to non-nil value"); } if (split_type != SPLIT_TYPE_AWK) { switch (BUILTIN_TYPE(spat)) { case T_REGEXP: rb_reg_options(spat); /* check if uninitialized */ tmp = RREGEXP_SRC(spat); split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP); if (split_type == SPLIT_TYPE_AWK) { spat = tmp; split_type = SPLIT_TYPE_STRING; } break; case T_STRING: mustnot_broken(spat); split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING); break; default: UNREACHABLE_RETURN(Qnil); } }#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count)) if (result) result = rb_ary_new(); beg = 0; char *ptr = RSTRING_PTR(str); char *eptr = RSTRING_END(str); if (split_type == SPLIT_TYPE_AWK) { char *bptr = ptr; int skip = 1; unsigned int c; end = beg; if (is_ascii_string(str)) { while (ptr < eptr) { c = (unsigned char)*ptr++; if (skip) { if (ascii_isspace(c)) { beg = ptr - bptr; } else { end = ptr - bptr; skip = 0; if (!NIL_P(limit) && lim <= i) break; } } else if (ascii_isspace(c)) { SPLIT_STR(beg, end-beg); skip = 1; beg = ptr - bptr; if (!NIL_P(limit)) ++i; } else { end = ptr - bptr; } } } else { while (ptr < eptr) { int n; c = rb_enc_codepoint_len(ptr, eptr, &n, enc); ptr += n; if (skip) { if (rb_isspace(c)) { beg = ptr - bptr; } else { end = ptr - bptr; skip = 0; if (!NIL_P(limit) && lim <= i) break; } } else if (rb_isspace(c)) { SPLIT_STR(beg, end-beg); skip = 1; beg = ptr - bptr; if (!NIL_P(limit)) ++i; } else { end = ptr - bptr; } } } } else if (split_type == SPLIT_TYPE_STRING) { char *str_start = ptr; char *substr_start = ptr; char *sptr = RSTRING_PTR(spat); long slen = RSTRING_LEN(spat); mustnot_broken(str); enc = rb_enc_check(str, spat); while (ptr < eptr && (end = rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) { /* Check we are at the start of a char */ char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc); if (t != ptr + end) { ptr = t; continue; } SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start); ptr += end + slen; substr_start = ptr; if (!NIL_P(limit) && lim <= ++i) break; } beg = ptr - str_start; } else if (split_type == SPLIT_TYPE_CHARS) { char *str_start = ptr; int n; mustnot_broken(str); enc = rb_enc_get(str); while (ptr < eptr && (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) { SPLIT_STR(ptr - str_start, n); ptr += n; if (!NIL_P(limit) && lim <= ++i) break; } beg = ptr - str_start; } else { long len = RSTRING_LEN(str); long start = beg; long idx; int last_null = 0; struct re_registers *regs; VALUE match = 0; for (; rb_reg_search(spat, str, start, 0) >= 0; (match ? (rb_match_unbusy(match), rb_backref_set(match)) : (void)0)) { match = rb_backref_get(); if (!result) rb_match_busy(match); regs = RMATCH_REGS(match); end = BEG(0); if (start == end && BEG(0) == END(0)) { if (!ptr) { SPLIT_STR(0, 0); break; } else if (last_null == 1) { SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc)); beg = start; } else { if (start == len) start++; else start += rb_enc_fast_mbclen(ptr+start,eptr,enc); last_null = 1; continue; } } else { SPLIT_STR(beg, end-beg); beg = start = END(0); } last_null = 0; for (idx=1; idx < regs->num_regs; idx++) { if (BEG(idx) == -1) continue; SPLIT_STR(BEG(idx), END(idx)-BEG(idx)); } if (!NIL_P(limit) && lim <= ++i) break; } if (match) rb_match_unbusy(match); } if (RSTRING_LEN(str) > 0 && (!NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) { SPLIT_STR(beg, RSTRING_LEN(str)-beg); } return result ? result : str;}
Builds a set of characters from theother_str parameter(s) usingthe procedure described for#count. Returns a new string whereruns of the same character that occur in this set are replaced by a singlecharacter. If no arguments are given, all runs of identical characters arereplaced by a single character.
"yellow moon".squeeze#=> "yelow mon"" now is the".squeeze(" ")#=> " now is the""putters shoot balls".squeeze("m-z")#=> "puters shot balls"
static VALUErb_str_squeeze(int argc, VALUE *argv, VALUE str){ str = str_duplicate(rb_cString, str); rb_str_squeeze_bang(argc, argv, str); return str;}
Squeezesstr in place, returning eitherstr, ornil
if no changes were made.
static VALUErb_str_squeeze_bang(int argc, VALUE *argv, VALUE str){ char squeez[TR_TABLE_SIZE]; rb_encoding *enc = 0; VALUE del = 0, nodel = 0; unsigned char *s, *send, *t; int i, modify = 0; int ascompat, singlebyte = single_byte_optimizable(str); unsigned int save; if (argc == 0) { enc = STR_ENC_GET(str); } else { for (i=0; i<argc; i++) { VALUE s = argv[i]; StringValue(s); enc = rb_enc_check(str, s); if (singlebyte && !single_byte_optimizable(s)) singlebyte = 0; tr_setup_table(s, squeez, i==0, &del, &nodel, enc); } } str_modify_keep_cr(str); s = t = (unsigned char *)RSTRING_PTR(str); if (!s || RSTRING_LEN(str) == 0) return Qnil; send = (unsigned char *)RSTRING_END(str); save = -1; ascompat = rb_enc_asciicompat(enc); if (singlebyte) { while (s < send) { unsigned int c = *s++; if (c != save || (argc > 0 && !squeez[c])) { *t++ = save = c; } } } else { while (s < send) { unsigned int c; int clen; if (ascompat && (c = *s) < 0x80) { if (c != save || (argc > 0 && !squeez[c])) { *t++ = save = c; } s++; } else { c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, enc); if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) { if (t != s) rb_enc_mbcput(c, t, enc); save = c; t += clen; } s += clen; } } } TERM_FILL((char *)t, TERM_LEN(str)); if ((char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) { STR_SET_LEN(str, (char *)t - RSTRING_PTR(str)); modify = 1; } if (modify) return str; return Qnil;}
Returns true ifstr
starts with one of theprefixes
given. Each of theprefixes
should be aString or aRegexp.
"hello".start_with?("hell")#=> true"hello".start_with?(/H/i)#=> true# returns true if one of the prefixes matches."hello".start_with?("heaven","hell")#=> true"hello".start_with?("heaven","paradise")#=> false
static VALUErb_str_start_with(int argc, VALUE *argv, VALUE str){ int i; for (i=0; i<argc; i++) { VALUE tmp = argv[i]; if (RB_TYPE_P(tmp, T_REGEXP)) { if (rb_reg_start_with_p(tmp, str)) return Qtrue; } else { StringValue(tmp); rb_enc_check(str, tmp); if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue; if (memcmp(RSTRING_PTR(str), RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0) return Qtrue; } } return Qfalse;}
Returns a copy of the receiver with leading and trailing whitespaceremoved.
Whitespace is defined as any of the following characters: null, horizontaltab, line feed, vertical tab, form feed, carriage return, space.
" hello ".strip#=> "hello""\tgoodbye\r\n".strip#=> "goodbye""\x00\t\n\v\f\r ".strip#=> """hello".strip#=> "hello"
static VALUErb_str_strip(VALUE str){ char *start; long olen, loffset, roffset; rb_encoding *enc = STR_ENC_GET(str); RSTRING_GETMEM(str, start, olen); loffset = lstrip_offset(str, start, start+olen, enc); roffset = rstrip_offset(str, start+loffset, start+olen, enc); if (loffset <= 0 && roffset <= 0) return str_duplicate(rb_cString, str); return rb_str_subseq(str, loffset, olen-loffset-roffset);}
Removes leading and trailing whitespace from the receiver. Returns thealtered receiver, ornil
if there was no change.
Refer to#strip for the definitionof whitespace.
" hello ".strip!#=> "hello""hello".strip!#=> nil
static VALUErb_str_strip_bang(VALUE str){ char *start; long olen, loffset, roffset; rb_encoding *enc; str_modify_keep_cr(str); enc = STR_ENC_GET(str); RSTRING_GETMEM(str, start, olen); loffset = lstrip_offset(str, start, start+olen, enc); roffset = rstrip_offset(str, start+loffset, start+olen, enc); if (loffset > 0 || roffset > 0) { long len = olen-roffset; if (loffset > 0) { len -= loffset; memmove(start, start + loffset, len); } STR_SET_LEN(str, len);#if !SHARABLE_MIDDLE_SUBSTRING TERM_FILL(start+len, rb_enc_mbminlen(enc));#endif return str; } return Qnil;}
Returns a copy ofstr
with thefirst occurrence ofpattern
replaced by the second argument. Thepattern
is typically aRegexp; ifgiven as aString, any regular expressionmetacharacters it contains will be interpreted literally, e.g.\d
will match a backslash followed by 'd', instead ofa digit.
Ifreplacement
is aString it willbe substituted for the matched text. It may contain back-references to thepattern's capture groups of the form\d
, wheredis a group number, or\k<n>
, wheren is a groupname. Similarly,\&
,\'
,\`
,and+
correspond to special variables,$&
,$'
,$`
, and$+
, respectively.(See regexp.rdoc for details.)\0
is the same as\&
.\\
is interpreted as an escape, i.e., asingle backslash. Note that, withinreplacement
the specialmatch variables, such as$&
, will not refer to the currentmatch.
If the second argument is aHash, and the matchedtext is one of its keys, the corresponding value is the replacement string.
In the block form, the current match string is passed in as a parameter,and variables such as$1
,$2
,$`
,$&
, and$'
will be set appropriately.(See regexp.rdoc for details.) The value returned by the block will besubstituted for the match on each call.
"hello".sub(/[aeiou]/,'*')#=> "h*llo""hello".sub(/([aeiou])/,'<\1>')#=> "h<e>llo""hello".sub(/./) {|s|s.ord.to_s+' ' }#=> "104 ello""hello".sub(/(?<foo>[aeiou])/,'*\k<foo>*')#=> "h*e*llo"'Is SHELL your preferred shell?'.sub(/[[:upper:]]{2,}/,ENV)#=> "Is /bin/bash your preferred shell?"
Note that a string literal consumes backslashes. (See syntax/literals.rdocfor details about string literals.) Back-references are typically precededby an additional backslash. For example, if you want to write aback-reference\&
inreplacement
with adouble-quoted string literal, you need to write:"..\\&.."
. If you want to write anon-back-reference string\&
inreplacement
,you need first to escape the backslash to prevent this method frominterpreting it as a back-reference, and then you need to escape thebackslashes again to prevent a string literal from consuming them:"..\\\\&.."
. You may want to use the block formto avoid a lot of backslashes.
static VALUErb_str_sub(int argc, VALUE *argv, VALUE str){ str = str_duplicate(rb_cString, str); rb_str_sub_bang(argc, argv, str); return str;}
Performs the same substitution as#sub in-place.
Returnsstr
if a substitution was performed ornil
if no substitution was performed.
static VALUErb_str_sub_bang(int argc, VALUE *argv, VALUE str){ VALUE pat, repl, hash = Qnil; int iter = 0; long plen; int min_arity = rb_block_given_p() ? 1 : 2; long beg; rb_check_arity(argc, min_arity, 2); if (argc == 1) { iter = 1; } else { repl = argv[1]; hash = rb_check_hash_type(argv[1]); if (NIL_P(hash)) { StringValue(repl); } } pat = get_pat_quoted(argv[0], 1); str_modifiable(str); beg = rb_pat_search(pat, str, 0, 1); if (beg >= 0) { rb_encoding *enc; int cr = ENC_CODERANGE(str); long beg0, end0; VALUE match, match0 = Qnil; struct re_registers *regs; char *p, *rp; long len, rlen; match = rb_backref_get(); regs = RMATCH_REGS(match); if (RB_TYPE_P(pat, T_STRING)) { beg0 = beg; end0 = beg0 + RSTRING_LEN(pat); match0 = pat; } else { beg0 = BEG(0); end0 = END(0); if (iter) match0 = rb_reg_nth_match(0, match); } if (iter || !NIL_P(hash)) { p = RSTRING_PTR(str); len = RSTRING_LEN(str); if (iter) { repl = rb_obj_as_string(rb_yield(match0)); } else { repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0)); repl = rb_obj_as_string(repl); } str_mod_check(str, p, len); rb_check_frozen(str); } else { repl = rb_reg_regsub(repl, str, regs, RB_TYPE_P(pat, T_STRING) ? Qnil : pat); } enc = rb_enc_compatible(str, repl); if (!enc) { rb_encoding *str_enc = STR_ENC_GET(str); p = RSTRING_PTR(str); len = RSTRING_LEN(str); if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT || coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) { rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s", rb_enc_name(str_enc), rb_enc_name(STR_ENC_GET(repl))); } enc = STR_ENC_GET(repl); } rb_str_modify(str); rb_enc_associate(str, enc); if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) { int cr2 = ENC_CODERANGE(repl); if (cr2 == ENC_CODERANGE_BROKEN || (cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT)) cr = ENC_CODERANGE_UNKNOWN; else cr = cr2; } plen = end0 - beg0; rlen = RSTRING_LEN(repl); len = RSTRING_LEN(str); if (rlen > plen) { RESIZE_CAPA(str, len + rlen - plen); } p = RSTRING_PTR(str); if (rlen != plen) { memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen); } rp = RSTRING_PTR(repl); memmove(p + beg0, rp, rlen); len += rlen - plen; STR_SET_LEN(str, len); TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str)); ENC_CODERANGE_SET(str, cr); return str; } return Qnil;}
Returns the successor toself
. The successor is calculated byincrementing characters.
The first character to be incremented is the rightmost alphanumeric: or, ifno alphanumerics, the rightmost character:
'THX1138'.succ# => "THX1139"'<<koala>>'.succ# => "<<koalb>>"'***'.succ# => '**+'
The successor to a digit is another digit, “carrying” to the next-leftcharacter for a “rollover” from 9 to 0, and prepending another digit ifnecessary:
'00'.succ# => "01"'09'.succ# => "10"'99'.succ# => "100"
The successor to a letter is another letter of the same case, carrying tothe next-left character for a rollover, and prepending another same-caseletter if necessary:
'aa'.succ# => "ab"'az'.succ# => "ba"'zz'.succ# => "aaa"'AA'.succ# => "AB"'AZ'.succ# => "BA"'ZZ'.succ# => "AAA"
The successor to a non-alphanumeric character is the next character in theunderlying character set's collating sequence, carrying to thenext-left character for a rollover, and prepending another character ifnecessary:
s =0.chr*3s# => "\x00\x00\x00"s.succ# => "\x00\x00\x01"s =255.chr*3s# => "\xFF\xFF\xFF"s.succ# => "\x01\x00\x00\x00"
Carrying can occur between and among mixtures of alphanumeric characters:
s ='zz99zz99's.succ# => "aaa00aa00"s ='99zz99zz's.succ# => "100aa00aa"
The successor to an empty String is a new empty String:
''.succ# => ""
VALUErb_str_succ(VALUE orig){ VALUE str; str = rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig)); rb_enc_cr_str_copy_for_substr(str, orig); return str_succ(str);}
Returns a basicn-bit checksum of the characters instr,wheren is the optionalIntegerparameter, defaulting to 16. The result is simply the sum of the binaryvalue of each byte instr modulo2**n - 1
. This isnot a particularly good checksum.
static VALUErb_str_sum(int argc, VALUE *argv, VALUE str){ int bits = 16; char *ptr, *p, *pend; long len; VALUE sum = INT2FIX(0); unsigned long sum0 = 0; if (rb_check_arity(argc, 0, 1) && (bits = NUM2INT(argv[0])) < 0) { bits = 0; } ptr = p = RSTRING_PTR(str); len = RSTRING_LEN(str); pend = p + len; while (p < pend) { if (FIXNUM_MAX - UCHAR_MAX < sum0) { sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0)); str_mod_check(str, ptr, len); sum0 = 0; } sum0 += (unsigned char)*p; p++; } if (bits == 0) { if (sum0) { sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0)); } } else { if (sum == INT2FIX(0)) { if (bits < (int)sizeof(long)*CHAR_BIT) { sum0 &= (((unsigned long)1)<<bits)-1; } sum = LONG2FIX(sum0); } else { VALUE mod; if (sum0) { sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0)); } mod = rb_funcall(INT2FIX(1), idLTLT, 1, INT2FIX(bits)); mod = rb_funcall(mod, '-', 1, INT2FIX(1)); sum = rb_funcall(sum, '&', 1, mod); } } return sum;}
Returns a copy ofstr with uppercase alphabetic charactersconverted to lowercase and lowercase characters converted to uppercase.
See#downcase for meaning ofoptions
and use with different encodings.
"Hello".swapcase#=> "hELLO""cYbEr_PuNk11".swapcase#=> "CyBeR_pUnK11"
static VALUErb_str_swapcase(int argc, VALUE *argv, VALUE str){ rb_encoding *enc; OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE; VALUE ret; flags = check_case_options(argc, argv, flags); enc = str_true_enc(str); if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return str_duplicate(rb_cString, str); if (flags&ONIGENC_CASE_ASCII_ONLY) { ret = rb_str_new(0, RSTRING_LEN(str)); rb_str_ascii_casemap(str, ret, &flags, enc); } else { ret = rb_str_casemap(str, &flags, enc); } return ret;}
Equivalent to#swapcase, butmodifies the receiver in place, returningstr, ornil
if no changes were made.
See#downcase for meaning ofoptions
and use with different encodings.
static VALUErb_str_swapcase_bang(int argc, VALUE *argv, VALUE str){ rb_encoding *enc; OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE; flags = check_case_options(argc, argv, flags); str_modify_keep_cr(str); enc = str_true_enc(str); if (flags&ONIGENC_CASE_ASCII_ONLY) rb_str_ascii_casemap(str, str, &flags, enc); else str_shared_replace(str, rb_str_casemap(str, &flags, enc)); if (ONIGENC_CASE_MODIFIED&flags) return str; return Qnil;}
Returns a complex which denotes the string form. The parser ignoresleading whitespaces and trailing garbage. Any digit sequences can beseparated by an underscore. Returns zero for null or garbage string.
'9'.to_c#=> (9+0i)'2.5'.to_c#=> (2.5+0i)'2.5/1'.to_c#=> ((5/2)+0i)'-3/2'.to_c#=> ((-3/2)+0i)'-i'.to_c#=> (0-1i)'45i'.to_c#=> (0+45i)'3-4i'.to_c#=> (3-4i)'-4e2-4e-2i'.to_c#=> (-400.0-0.04i)'-0.0-0.0i'.to_c#=> (-0.0-0.0i)'1/2+3/4i'.to_c#=> ((1/2)+(3/4)*i)'ruby'.to_c#=> (0+0i)
SeeKernel.Complex.
static VALUEstring_to_c(VALUE self){ char *s; VALUE num; rb_must_asciicompat(self); s = RSTRING_PTR(self); if (s && s[RSTRING_LEN(self)]) { rb_str_modify(self); s = RSTRING_PTR(self); s[RSTRING_LEN(self)] = '\0'; } if (!s) s = (char *)""; (void)parse_comp(s, 0, &num); return num;}
Returns the result of interpreting leading characters instr as afloating point number. Extraneous characters past the end of a valid numberare ignored. If there is not a valid number at the start ofstr,0.0
is returned. This method never raises an exception.
"123.45e1".to_f#=> 1234.5"45.67 degrees".to_f#=> 45.67"thx1138".to_f#=> 0.0
static VALUErb_str_to_f(VALUE str){ return DBL2NUM(rb_str_to_dbl(str, FALSE));}
Returns the result of interpreting leading characters instr as aninteger basebase (between 2 and 36). Extraneous characters pastthe end of a valid number are ignored. If there is not a valid number atthe start ofstr,0
is returned. This method neverraises an exception whenbase is valid.
"12345".to_i#=> 12345"99 red balloons".to_i#=> 99"0a".to_i#=> 0"0a".to_i(16)#=> 10"hello".to_i#=> 0"1100101".to_i(2)#=> 101"1100101".to_i(8)#=> 294977"1100101".to_i(10)#=> 1100101"1100101".to_i(16)#=> 17826049
static VALUErb_str_to_i(int argc, VALUE *argv, VALUE str){ int base = 10; if (rb_check_arity(argc, 0, 1) && (base = NUM2INT(argv[0])) < 0) { rb_raise(rb_eArgError, "invalid radix %d", base); } return rb_str_to_inum(str, base, FALSE);}
Returns the result of interpreting leading characters instr
as a rational. Leading whitespace and extraneous characters past the endof a valid number are ignored. Digit sequences can be separated by anunderscore. If there is not a valid number at the start ofstr
, zero is returned. This method never raises an exception.
' 2 '.to_r#=> (2/1)'300/2'.to_r#=> (150/1)'-9.2'.to_r#=> (-46/5)'-9.2e2'.to_r#=> (-920/1)'1_234_567'.to_r#=> (1234567/1)'21 June 09'.to_r#=> (21/1)'21/06/09'.to_r#=> (7/2)'BWV 1079'.to_r#=> (0/1)
NOTE: “0.3”.to_r isn't the same as 0.3.to_r. The former is equivalentto “3/10”.to_r, but the latter isn't so.
"0.3".to_r==3/10r#=> true0.3.to_r==3/10r#=> false
See also Kernel#Rational.
static VALUEstring_to_r(VALUE self){ VALUE num; rb_must_asciicompat(self); num = parse_rat(RSTRING_PTR(self), RSTRING_END(self), 0, TRUE); if (RB_FLOAT_TYPE_P(num) && !FLOAT_ZERO_P(num)) rb_raise(rb_eFloatDomainError, "Infinity"); return num;}
Returns theSymbol corresponding tostr,creating the symbol if it did not previously exist. SeeSymbol#id2name.
"Koala".intern#=> :Koalas ='cat'.to_sym#=> :cats== :cat#=> trues ='@cat'.to_sym#=> :@cats== :@cat#=> true
This can also be used to create symbols that cannot be represented usingthe:xxx
notation.
'cat and dog'.to_sym#=> :"cat and dog"
VALUErb_str_intern(VALUE str){ VALUE sym;#if USE_SYMBOL_GC rb_encoding *enc, *ascii; int type;#else ID id;#endif GLOBAL_SYMBOLS_ENTER(symbols); { sym = lookup_str_sym_with_lock(symbols, str); if (sym) { // ok } else {#if USE_SYMBOL_GC enc = rb_enc_get(str); ascii = rb_usascii_encoding(); if (enc != ascii && sym_check_asciionly(str)) { str = rb_str_dup(str); rb_enc_associate(str, ascii); OBJ_FREEZE(str); enc = ascii; } else { str = rb_str_dup(str); OBJ_FREEZE(str); } str = rb_fstring(str); type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN); if (type < 0) type = ID_JUNK; sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type);#else id = intern_str(str, 0); sym = ID2SYM(id);#endif } } GLOBAL_SYMBOLS_LEAVE(); return sym;}
Returns a copy ofstr
with the characters infrom_str
replaced by the corresponding characters into_str
. Ifto_str
is shorter thanfrom_str
, it is padded with its last character in order tomaintain the correspondence.
"hello".tr('el','ip')#=> "hippo""hello".tr('aeiou','*')#=> "h*ll*""hello".tr('aeiou','AA*')#=> "hAll*"
Both strings may use thec1-c2
notation to denote ranges ofcharacters, andfrom_str
may start with a^
,which denotes all characters except those listed.
"hello".tr('a-y','b-z')#=> "ifmmp""hello".tr('^aeiou','*')#=> "*e**o"
The backslash character\
can be used to escape^
or-
and is otherwise ignored unless it appears at the end ofa range or the end of thefrom_str
orto_str
:
"hello^world".tr("\\^aeiou","*")#=> "h*ll**w*rld""hello-world".tr("a\\-eo","*")#=> "h*ll**w*rld""hello\r\nworld".tr("\r","")#=> "hello\nworld""hello\r\nworld".tr("\\r","")#=> "hello\r\nwold""hello\r\nworld".tr("\\\r","")#=> "hello\nworld""X['\\b']".tr("X\\","")#=> "['b']""X['\\b']".tr("X-\\]","")#=> "'b'"
static VALUErb_str_tr(VALUE str, VALUE src, VALUE repl){ str = str_duplicate(rb_cString, str); tr_trans(str, src, repl, 0); return str;}
Translatesstr in place, using the same rules as#tr. Returnsstr, ornil
if no changes were made.
static VALUErb_str_tr_bang(VALUE str, VALUE src, VALUE repl){ return tr_trans(str, src, repl, 0);}
Processes a copy ofstr as described under#tr, then removes duplicate charactersin regions that were affected by the translation.
"hello".tr_s('l','r')#=> "hero""hello".tr_s('el','*')#=> "h*o""hello".tr_s('el','hx')#=> "hhxo"
static VALUErb_str_tr_s(VALUE str, VALUE src, VALUE repl){ str = str_duplicate(rb_cString, str); tr_trans(str, src, repl, 1); return str;}
Performs#tr_s processing onstr in place, returningstr, ornil
if nochanges were made.
static VALUErb_str_tr_s_bang(VALUE str, VALUE src, VALUE repl){ return tr_trans(str, src, repl, 1);}
Returns an unescaped version of the string. This does the inverse of#dump.
"\"hello \\n ''\"".undump#=> "hello \n ''"
static VALUEstr_undump(VALUE str){ const char *s = RSTRING_PTR(str); const char *s_end = RSTRING_END(str); rb_encoding *enc = rb_enc_get(str); VALUE undumped = rb_enc_str_new(s, 0L, enc); bool utf8 = false; bool binary = false; int w; rb_must_asciicompat(str); if (rb_str_is_ascii_only_p(str) == Qfalse) { rb_raise(rb_eRuntimeError, "non-ASCII character detected"); } if (!str_null_check(str, &w)) { rb_raise(rb_eRuntimeError, "string contains null byte"); } if (RSTRING_LEN(str) < 2) goto invalid_format; if (*s != '"') goto invalid_format; /* strip '"' at the start */ s++; for (;;) { if (s >= s_end) { rb_raise(rb_eRuntimeError, "unterminated dumped string"); } if (*s == '"') { /* epilogue */ s++; if (s == s_end) { /* ascii compatible dumped string */ break; } else { static const char force_encoding_suffix[] = ".force_encoding(\""; /* "\")" */ static const char dup_suffix[] = ".dup"; const char *encname; int encidx; ptrdiff_t size; /* check separately for strings dumped by older versions */ size = sizeof(dup_suffix) - 1; if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size; size = sizeof(force_encoding_suffix) - 1; if (s_end - s <= size) goto invalid_format; if (memcmp(s, force_encoding_suffix, size) != 0) goto invalid_format; s += size; if (utf8) { rb_raise(rb_eRuntimeError, "dumped string contained Unicode escape but used force_encoding"); } encname = s; s = memchr(s, '"', s_end-s); size = s - encname; if (!s) goto invalid_format; if (s_end - s != 2) goto invalid_format; if (s[0] != '"' || s[1] != ')') goto invalid_format; encidx = rb_enc_find_index2(encname, (long)size); if (encidx < 0) { rb_raise(rb_eRuntimeError, "dumped string has unknown encoding name"); } rb_enc_associate_index(undumped, encidx); } break; } if (*s == '\\') { s++; if (s >= s_end) { rb_raise(rb_eRuntimeError, "invalid escape"); } undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary); } else { rb_str_cat(undumped, s++, 1); } } return undumped;invalid_format: rb_raise(rb_eRuntimeError, "invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");}
Unicode Normalization—Returns a normalized form ofstr
, usingUnicode normalizations NFC, NFD, NFKC, or NFKD. The normalization form usedis determined byform
, which can be any of the four values:nfc
,:nfd
,:nfkc
, or:nfkd
. The default is:nfc
.
If the string is not in a UnicodeEncoding,then anException is raised. In this context,'Unicode Encoding' means any of UTF-8, UTF-16BE/LE, andUTF-32BE/LE, as well as GB18030, UCS_2BE, and UCS_4BE. Anything other thanUTF-8 is implemented by converting to UTF-8, which makes it slower thanUTF-8.
"a\u0300".unicode_normalize#=> "\u00E0""a\u0300".unicode_normalize(:nfc)#=> "\u00E0""\u00E0".unicode_normalize(:nfd)#=> "a\u0300""\xE0".force_encoding('ISO-8859-1').unicode_normalize(:nfd)#=> Encoding::CompatibilityError raised
static VALUErb_str_unicode_normalize(int argc, VALUE *argv, VALUE str){ return unicode_normalize_common(argc, argv, str, id_normalize);}
Destructive version of#unicode_normalize, doingUnicode normalization in place.
static VALUErb_str_unicode_normalize_bang(int argc, VALUE *argv, VALUE str){ return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));}
Checks whetherstr
is in Unicode normalization formform
, which can be any of the four values:nfc
,:nfd
,:nfkc
, or:nfkd
. The defaultis:nfc
.
If the string is not in a UnicodeEncoding,then anException is raised. For details, see#unicode_normalize.
"a\u0300".unicode_normalized?#=> false"a\u0300".unicode_normalized?(:nfd)#=> true"\u00E0".unicode_normalized?#=> true"\u00E0".unicode_normalized?(:nfd)#=> false"\xE0".force_encoding('ISO-8859-1').unicode_normalized?#=> Encoding::CompatibilityError raised
static VALUErb_str_unicode_normalized_p(int argc, VALUE *argv, VALUE str){ return unicode_normalize_common(argc, argv, str, id_normalized_p);}
Decodesstr (which may contain binary data) according to theformat string, returning an array of each value extracted. The formatstring consists of a sequence of single-character directives, summarized inthe table at the end of this entry. Each directive may be followed by anumber, indicating the number of times to repeat with this directive. Anasterisk (“*
'') will use up all remaining elements.The directivessSiIlL
may each be followed by an underscore(“_
'') or exclamation mark (“!
'')to use the underlying platform's native size for the specified type;otherwise, it uses a platform-independent consistent size. Spaces areignored in the format string. See also#unpack1,Array#pack.
"abc \0\0abc \0\0".unpack('A6Z6')#=> ["abc", "abc "]"abc \0\0".unpack('a3a3')#=> ["abc", " \000\000"]"abc \0abc \0".unpack('Z*Z*')#=> ["abc ", "abc "]"aa".unpack('b8B8')#=> ["10000110", "01100001"]"aaa".unpack('h2H2c')#=> ["16", "61", 97]"\xfe\xff\xfe\xff".unpack('sS')#=> [-2, 65534]"now=20is".unpack('M*')#=> ["now is"]"whole".unpack('xax2aX2aX1aX2a')#=> ["h", "e", "l", "l", "o"]
This table summarizes the various formats and the Ruby classes returned byeach.
Integer | |Directive | Returns | Meaning------------------------------------------------------------------C | Integer | 8-bit unsigned (unsigned char)S | Integer | 16-bit unsigned, native endian (uint16_t)L | Integer | 32-bit unsigned, native endian (uint32_t)Q | Integer | 64-bit unsigned, native endian (uint64_t)J | Integer | pointer width unsigned, native endian (uintptr_t) | |c | Integer | 8-bit signed (signed char)s | Integer | 16-bit signed, native endian (int16_t)l | Integer | 32-bit signed, native endian (int32_t)q | Integer | 64-bit signed, native endian (int64_t)j | Integer | pointer width signed, native endian (intptr_t) | |S_ S! | Integer | unsigned short, native endianI I_ I! | Integer | unsigned int, native endianL_ L! | Integer | unsigned long, native endianQ_ Q! | Integer | unsigned long long, native endian (ArgumentError | | if the platform has no long long type.)J! | Integer | uintptr_t, native endian (same with J) | |s_ s! | Integer | signed short, native endiani i_ i! | Integer | signed int, native endianl_ l! | Integer | signed long, native endianq_ q! | Integer | signed long long, native endian (ArgumentError | | if the platform has no long long type.)j! | Integer | intptr_t, native endian (same with j) | |S> s> S!> s!> | Integer | same as the directives without ">" exceptL> l> L!> l!> | | big endianI!> i!> | |Q> q> Q!> q!> | | "S>" is same as "n"J> j> J!> j!> | | "L>" is same as "N" | |S< s< S!< s!< | Integer | same as the directives without "<" exceptL< l< L!< l!< | | little endianI!< i!< | |Q< q< Q!< q!< | | "S<" is same as "v"J< j< J!< j!< | | "L<" is same as "V" | |n | Integer | 16-bit unsigned, network (big-endian) byte orderN | Integer | 32-bit unsigned, network (big-endian) byte orderv | Integer | 16-bit unsigned, VAX (little-endian) byte orderV | Integer | 32-bit unsigned, VAX (little-endian) byte order | |U | Integer | UTF-8 characterw | Integer | BER-compressed integer (see Array#pack)Float | |Directive | Returns | Meaning-----------------------------------------------------------------D d | Float | double-precision, native formatF f | Float | single-precision, native formatE | Float | double-precision, little-endian byte ordere | Float | single-precision, little-endian byte orderG | Float | double-precision, network (big-endian) byte orderg | Float | single-precision, network (big-endian) byte orderString | |Directive | Returns | Meaning-----------------------------------------------------------------A | String | arbitrary binary string (remove trailing nulls and ASCII spaces)a | String | arbitrary binary stringZ | String | null-terminated stringB | String | bit string (MSB first)b | String | bit string (LSB first)H | String | hex string (high nibble first)h | String | hex string (low nibble first)u | String | UU-encoded stringM | String | quoted-printable, MIME encoding (see RFC2045)m | String | base64 encoded string (RFC 2045) (default) | | base64 encoded string (RFC 4648) if followed by 0P | String | pointer to a structure (fixed-length string)p | String | pointer to a null-terminated stringMisc. | |Directive | Returns | Meaning-----------------------------------------------------------------@ | --- | skip to the offset given by the length argumentX | --- | skip backward one bytex | --- | skip forward one byte
HISTORY
J, J! j, and j! are available since Ruby 2.3.
Q_, Q!, q_, and q! are available since Ruby 2.1.
I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
# File pack.rb, line 256defunpack(fmt)Primitive.pack_unpack(fmt)end
Decodesstr (which may contain binary data) according to theformat string, returning the first value extracted. See also#unpack,Array#pack.
Contrast with#unpack:
"abc \0\0abc \0\0".unpack('A6Z6')#=> ["abc", "abc "]"abc \0\0abc \0\0".unpack1('A6Z6')#=> "abc"
In that case data would be lost but often it's the case that the arrayonly holds one value, especially when unpacking binary data. For instance:
“xffx00x00x00”.unpack(“l”) #=> [255]“xffx00x00x00”.unpack1(“l”) #=> 255
Thus unpack1 is convenient, makes clear the intention and signals theexpected return value to those reading the code.
# File pack.rb, line 280defunpack1(fmt)Primitive.pack_unpack1(fmt)end
Returns a copy ofstr with all lowercase letters replaced withtheir uppercase counterparts.
See#downcase for meaning ofoptions
and use with different encodings.
"hEllO".upcase#=> "HELLO"
static VALUErb_str_upcase(int argc, VALUE *argv, VALUE str){ rb_encoding *enc; OnigCaseFoldType flags = ONIGENC_CASE_UPCASE; VALUE ret; flags = check_case_options(argc, argv, flags); enc = str_true_enc(str); if (case_option_single_p(flags, enc, str)) { ret = rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str)); str_enc_copy(ret, str); upcase_single(ret); } else if (flags&ONIGENC_CASE_ASCII_ONLY) { ret = rb_str_new(0, RSTRING_LEN(str)); rb_str_ascii_casemap(str, ret, &flags, enc); } else { ret = rb_str_casemap(str, &flags, enc); } return ret;}
Upcases the contents ofstr, returningnil
if nochanges were made.
See#downcase for meaning ofoptions
and use with different encodings.
static VALUErb_str_upcase_bang(int argc, VALUE *argv, VALUE str){ rb_encoding *enc; OnigCaseFoldType flags = ONIGENC_CASE_UPCASE; flags = check_case_options(argc, argv, flags); str_modify_keep_cr(str); enc = str_true_enc(str); if (case_option_single_p(flags, enc, str)) { if (upcase_single(str)) flags |= ONIGENC_CASE_MODIFIED; } else if (flags&ONIGENC_CASE_ASCII_ONLY) rb_str_ascii_casemap(str, str, &flags, enc); else str_shared_replace(str, rb_str_casemap(str, &flags, enc)); if (ONIGENC_CASE_MODIFIED&flags) return str; return Qnil;}
With a block given, calls the block with each String value returned bysuccessive calls to#succ; thefirst value isself
, the next isself.succ
, andso on; the sequence terminates when valueother_string
isreached; returnsself
:
'a8'.upto('b6') {|s|prints,' ' }# => "a8"
Output:
a8a9b0b1b2b3b4b5b6
If argumentexclusive
is given as a truthy object, the lastvalue is omitted:
'a8'.upto('b6',true) {|s|prints,' ' }# => "a8"
Output:
a8a9b0b1b2b3b4b5
Ifother_string
would not be reached, does not call the block:
'25'.upto('5') {|s|fails }'aa'.upto('a') {|s|fails }
With no block given, returns a new Enumerator:
'a8'.upto('b6')# => #<Enumerator: "a8":upto("b6")>
static VALUErb_str_upto(int argc, VALUE *argv, VALUE beg){ VALUE end, exclusive; rb_scan_args(argc, argv, "11", &end, &exclusive); RETURN_ENUMERATOR(beg, argc, argv); return rb_str_upto_each(beg, end, RTEST(exclusive), str_upto_i, Qnil);}
Returns true for a string which is encoded correctly.
"\xc2\xa1".force_encoding("UTF-8").valid_encoding?#=> true"\xc2".force_encoding("UTF-8").valid_encoding?#=> false"\x80".force_encoding("UTF-8").valid_encoding?#=> false
static VALUErb_str_valid_encoding_p(VALUE str){ int cr = rb_enc_str_coderange(str); return cr == ENC_CODERANGE_BROKEN ? Qfalse : Qtrue;}
This page was generated for Ruby 3.0.0
Generated with Ruby-doc Rdoc Generator 0.42.0.