| // Copyright 2013 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifdef UNSAFE_BUFFERS_BUILD |
| // TODO(crbug.com/390223051): Remove C-library calls to fix the errors. |
| #pragma allow_unsafe_libc_calls |
| #endif |
| |
| #include"url/url_util.h" |
| |
| #include<stddef.h> |
| |
| #include<optional> |
| #include<string_view> |
| |
| #include"build/build_config.h" |
| #include"testing/gtest/include/gtest/gtest-message.h" |
| #include"testing/gtest/include/gtest/gtest.h" |
| #include"url/third_party/mozilla/url_parse.h" |
| #include"url/url_canon.h" |
| #include"url/url_canon_stdstring.h" |
| #include"url/url_test_utils.h" |
| |
| namespace url{ |
| |
| classURLUtilTest:public testing::Test{ |
| public: |
| URLUtilTest()=default; |
| |
| URLUtilTest(constURLUtilTest&)=delete; |
| URLUtilTest&operator=(constURLUtilTest&)=delete; |
| |
| ~URLUtilTest() override=default; |
| |
| protected: |
| structURLCase{ |
| const std::string_view input; |
| const std::string_view expected; |
| bool expected_success; |
| }; |
| |
| structResolveRelativeCase{ |
| const std::string_view base; |
| const std::string_view rel; |
| std::optional<std::string_view> expected; |
| }; |
| |
| voidTestCanonicalize(constURLCase& url_case){ |
| std::string canonicalized; |
| StdStringCanonOutput output(&canonicalized); |
| Parsed parsed; |
| bool success= |
| Canonicalize(url_case.input.data(), url_case.input.size(), |
| /*trim_path_end=*/false, |
| /*charset_converter=*/nullptr,&output,&parsed); |
| output.Complete(); |
| EXPECT_EQ(success, url_case.expected_success); |
| EXPECT_EQ(output.view(), url_case.expected); |
| } |
| |
| voidTestResolveRelative(constResolveRelativeCase& test){ |
| SCOPED_TRACE(testing::Message() |
| <<"base: "<< test.base<<", rel: "<< test.rel); |
| |
| Parsed base_parsed=ParseNonSpecialURL(test.base); |
| |
| std::string resolved; |
| StdStringCanonOutput output(&resolved); |
| |
| Parsed resolved_parsed; |
| bool valid=ResolveRelative(test.base.data(), test.base.size(), |
| base_parsed, test.rel.data(), test.rel.size(), |
| nullptr,&output,&resolved_parsed); |
| output.Complete(); |
| |
| if(valid){ |
| ASSERT_TRUE(test.expected); |
| EXPECT_EQ(resolved,*test.expected); |
| }else{ |
| EXPECT_FALSE(test.expected); |
| } |
| } |
| |
| private: |
| ScopedSchemeRegistryForTests scoped_registry_; |
| }; |
| |
| TEST_F(URLUtilTest,FindAndCompareScheme){ |
| Component found_scheme; |
| |
| // Simple case where the scheme is found and matches. |
| constchar kStr1[]="http://www.com/"; |
| EXPECT_TRUE(FindAndCompareScheme(kStr1,static_cast<int>(strlen(kStr1)), |
| "http",nullptr)); |
| EXPECT_TRUE(FindAndCompareScheme( |
| kStr1,static_cast<int>(strlen(kStr1)),"http",&found_scheme)); |
| EXPECT_TRUE(found_scheme==Component(0,4)); |
| |
| // A case where the scheme is found and doesn't match. |
| EXPECT_FALSE(FindAndCompareScheme( |
| kStr1,static_cast<int>(strlen(kStr1)),"https",&found_scheme)); |
| EXPECT_TRUE(found_scheme==Component(0,4)); |
| |
| // A case where there is no scheme. |
| constchar kStr2[]="httpfoobar"; |
| EXPECT_FALSE(FindAndCompareScheme( |
| kStr2,static_cast<int>(strlen(kStr2)),"http",&found_scheme)); |
| EXPECT_TRUE(found_scheme==Component()); |
| |
| // When there is an empty scheme, it should match the empty scheme. |
| constchar kStr3[]=":foo.com/"; |
| EXPECT_TRUE(FindAndCompareScheme( |
| kStr3,static_cast<int>(strlen(kStr3)),"",&found_scheme)); |
| EXPECT_TRUE(found_scheme==Component(0,0)); |
| |
| // But when there is no scheme, it should fail. |
| EXPECT_FALSE(FindAndCompareScheme("",0,"",&found_scheme)); |
| EXPECT_TRUE(found_scheme==Component()); |
| |
| // When there is a whitespace char in scheme, it should canonicalize the URL |
| // before comparison. |
| constchar whtspc_str[]=" \r\n\tjav\ra\nscri\tpt:alert(1)"; |
| EXPECT_TRUE(FindAndCompareScheme(whtspc_str, |
| static_cast<int>(strlen(whtspc_str)), |
| "javascript",&found_scheme)); |
| EXPECT_TRUE(found_scheme==Component(1,10)); |
| |
| // Control characters should be stripped out on the ends, and kept in the |
| // middle. |
| constchar ctrl_str[]="\02jav\02scr\03ipt:alert(1)"; |
| EXPECT_FALSE(FindAndCompareScheme(ctrl_str, |
| static_cast<int>(strlen(ctrl_str)), |
| "javascript",&found_scheme)); |
| EXPECT_TRUE(found_scheme==Component(1,11)); |
| } |
| |
| TEST_F(URLUtilTest,IsStandard){ |
| constchar kHTTPScheme[]="http"; |
| EXPECT_TRUE(IsStandard(kHTTPScheme)); |
| |
| constchar kFooScheme[]="foo"; |
| EXPECT_FALSE(IsStandard(kFooScheme)); |
| } |
| |
| TEST_F(URLUtilTest,IsReferrerScheme){ |
| constchar kHTTPScheme[]="http"; |
| EXPECT_TRUE(IsReferrerScheme(kHTTPScheme)); |
| |
| constchar kFooScheme[]="foo"; |
| EXPECT_FALSE(IsReferrerScheme(kFooScheme)); |
| } |
| |
| TEST_F(URLUtilTest,AddReferrerScheme){ |
| staticconstchar kFooScheme[]="foo"; |
| EXPECT_FALSE(IsReferrerScheme(kFooScheme)); |
| |
| url::ScopedSchemeRegistryForTests scoped_registry; |
| AddReferrerScheme(kFooScheme, url::SCHEME_WITH_HOST); |
| EXPECT_TRUE(IsReferrerScheme(kFooScheme)); |
| } |
| |
| TEST_F(URLUtilTest,ShutdownCleansUpSchemes){ |
| staticconstchar kFooScheme[]="foo"; |
| EXPECT_FALSE(IsReferrerScheme(kFooScheme)); |
| |
| { |
| url::ScopedSchemeRegistryForTests scoped_registry; |
| AddReferrerScheme(kFooScheme, url::SCHEME_WITH_HOST); |
| EXPECT_TRUE(IsReferrerScheme(kFooScheme)); |
| } |
| |
| EXPECT_FALSE(IsReferrerScheme(kFooScheme)); |
| } |
| |
| TEST_F(URLUtilTest,GetStandardSchemeType){ |
| url::SchemeType scheme_type; |
| |
| constchar kHTTPScheme[]="http"; |
| scheme_type= url::SCHEME_WITHOUT_AUTHORITY; |
| EXPECT_TRUE(GetStandardSchemeType(kHTTPScheme, |
| &scheme_type)); |
| EXPECT_EQ(url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, scheme_type); |
| |
| constchar kFilesystemScheme[]="filesystem"; |
| scheme_type= url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION; |
| EXPECT_TRUE(GetStandardSchemeType(kFilesystemScheme, |
| &scheme_type)); |
| EXPECT_EQ(url::SCHEME_WITHOUT_AUTHORITY, scheme_type); |
| |
| constchar kFooScheme[]="foo"; |
| scheme_type= url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION; |
| EXPECT_FALSE(GetStandardSchemeType(kFooScheme, |
| &scheme_type)); |
| } |
| |
| TEST_F(URLUtilTest,GetStandardSchemes){ |
| std::vector<std::string> expected={ |
| kHttpsScheme, kHttpScheme, kFileScheme, kFtpScheme, |
| kWssScheme, kWsScheme, kFileSystemScheme,"foo", |
| }; |
| AddStandardScheme("foo", url::SCHEME_WITHOUT_AUTHORITY); |
| EXPECT_EQ(expected,GetStandardSchemes()); |
| } |
| |
| TEST_F(URLUtilTest,ReplaceComponents){ |
| Parsed parsed; |
| RawCanonOutputT<char> output; |
| Parsed new_parsed; |
| |
| // Check that the following calls do not cause crash |
| Replacements<char> replacements; |
| replacements.SetRef("test",Component(0,4)); |
| ReplaceComponents(nullptr,0, parsed, replacements,nullptr,&output, |
| &new_parsed); |
| ReplaceComponents("",0, parsed, replacements,nullptr,&output,&new_parsed); |
| replacements.ClearRef(); |
| replacements.SetHost("test",Component(0,4)); |
| ReplaceComponents(nullptr,0, parsed, replacements,nullptr,&output, |
| &new_parsed); |
| ReplaceComponents("",0, parsed, replacements,nullptr,&output,&new_parsed); |
| |
| replacements.ClearHost(); |
| ReplaceComponents(nullptr,0, parsed, replacements,nullptr,&output, |
| &new_parsed); |
| ReplaceComponents("",0, parsed, replacements,nullptr,&output,&new_parsed); |
| ReplaceComponents(nullptr,0, parsed, replacements,nullptr,&output, |
| &new_parsed); |
| ReplaceComponents("",0, parsed, replacements,nullptr,&output,&new_parsed); |
| } |
| |
| static std::stringCheckReplaceScheme(constchar* base_url, |
| constchar* scheme){ |
| // Make sure the input is canonicalized. |
| RawCanonOutput<32> original; |
| Parsed original_parsed; |
| Canonicalize(base_url, strlen(base_url),true,nullptr,&original, |
| &original_parsed); |
| |
| Replacements<char> replacements; |
| replacements.SetScheme(scheme,Component(0, strlen(scheme))); |
| |
| std::string output_string; |
| StdStringCanonOutput output(&output_string); |
| Parsed output_parsed; |
| ReplaceComponents(original.data(), original.length(), original_parsed, |
| replacements,nullptr,&output,&output_parsed); |
| |
| output.Complete(); |
| return output_string; |
| } |
| |
| TEST_F(URLUtilTest,ReplaceScheme){ |
| EXPECT_EQ("https://google.com/", |
| CheckReplaceScheme("http://google.com/","https")); |
| EXPECT_EQ("file://google.com/", |
| CheckReplaceScheme("http://google.com/","file")); |
| EXPECT_EQ("http://home/Build", |
| CheckReplaceScheme("file:///Home/Build","http")); |
| EXPECT_EQ("#"261"> | CheckReplaceScheme("about:foo","javascript")); |
| EXPECT_EQ("://google.com/", |
| CheckReplaceScheme("http://google.com/","")); |
| EXPECT_EQ("http://google.com/", |
| CheckReplaceScheme("about:google.com","http")); |
| EXPECT_EQ("http:",CheckReplaceScheme("","http")); |
| |
| #ifdef WIN32 |
| // Magic Windows drive letter behavior when converting to a file URL. |
| EXPECT_EQ("file:///E:/foo/", |
| CheckReplaceScheme("http://localhost/e:foo/","file")); |
| #endif |
| |
| // This will probably change to "about://google.com/" when we fix |
| // http://crbug.com/160 which should also be an acceptable result. |
| EXPECT_EQ("about://google.com/", |
| CheckReplaceScheme("http://google.com/","about")); |
| |
| EXPECT_EQ("http://example.com/%20hello%20#%20world", |
| CheckReplaceScheme("myscheme:example.com/ hello # world ","http")); |
| } |
| |
| TEST_F(URLUtilTest,DecodeURLEscapeSequences){ |
| structDecodeCase{ |
| constchar* input; |
| constchar* output; |
| } decode_cases[]={ |
| {"hello, world","hello, world"}, |
| {"%01%02%03%04%05%06%07%08%09%0a%0B%0C%0D%0e%0f/", |
| "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0B\x0C\x0D\x0e\x0f/"}, |
| {"%10%11%12%13%14%15%16%17%18%19%1a%1B%1C%1D%1e%1f/", |
| "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1B\x1C\x1D\x1e\x1f/"}, |
| {"%20%21%22%23%24%25%26%27%28%29%2a%2B%2C%2D%2e%2f/", |
| " !\"#$%&'()*+,-.//"}, |
| {"%30%31%32%33%34%35%36%37%38%39%3a%3B%3C%3D%3e%3f/", |
| "0123456789:;<=>?/"}, |
| {"%40%41%42%43%44%45%46%47%48%49%4a%4B%4C%4D%4e%4f/", |
| "@ABCDEFGHIJKLMNO/"}, |
| {"%50%51%52%53%54%55%56%57%58%59%5a%5B%5C%5D%5e%5f/", |
| "PQRSTUVWXYZ[\\]^_/"}, |
| {"%60%61%62%63%64%65%66%67%68%69%6a%6B%6C%6D%6e%6f/", |
| "`abcdefghijklmno/"}, |
| {"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/", |
| "pqrstuvwxyz{|}~\x7f/"}, |
| {"%e4%bd%a0%e5%a5%bd","\xe4\xbd\xa0\xe5\xa5\xbd"}, |
| // U+FFFF (Noncharacter) should not be replaced with U+FFFD (Replacement |
| // Character) (http://crbug.com/1416021) |
| {"%ef%bf%bf","\xef\xbf\xbf"}, |
| // U+FDD0 (Noncharacter) |
| {"%ef%b7%90","\xef\xb7\x90"}, |
| // U+FFFD (Replacement Character) |
| {"%ef%bf%bd","\xef\xbf\xbd"}, |
| }; |
| |
| for(constauto& decode_case: decode_cases){ |
| RawCanonOutputT<char16_t> output; |
| DecodeURLEscapeSequences(decode_case.input, |
| DecodeURLMode::kUTF8OrIsomorphic,&output); |
| EXPECT_EQ(decode_case.output, base::UTF16ToUTF8(std::u16string( |
| output.data(), output.length()))); |
| |
| RawCanonOutputT<char16_t> output_utf8; |
| DecodeURLEscapeSequences(decode_case.input,DecodeURLMode::kUTF8, |
| &output_utf8); |
| EXPECT_EQ(decode_case.output, |
| base::UTF16ToUTF8( |
| std::u16string(output_utf8.data(), output_utf8.length()))); |
| } |
| |
| // Our decode should decode %00 |
| constchar zero_input[]="%00"; |
| RawCanonOutputT<char16_t> zero_output; |
| DecodeURLEscapeSequences(zero_input,DecodeURLMode::kUTF8,&zero_output); |
| EXPECT_NE("%00", base::UTF16ToUTF8(std::u16string(zero_output.data(), |
| zero_output.length()))); |
| |
| // Test the error behavior for invalid UTF-8. |
| structUtf8DecodeCase{ |
| constchar* input; |
| std::vector<char16_t> expected_iso; |
| std::vector<char16_t> expected_utf8; |
| } utf8_decode_cases[]={ |
| // %e5%a5%bd is a valid UTF-8 sequence. U+597D |
| {"%e4%a0%e5%a5%bd", |
| {0x00e4,0x00a0,0x00e5,0x00a5,0x00bd,0}, |
| {0xfffd,0x597d,0}}, |
| {"%e5%a5%bd%e4%a0", |
| {0x00e5,0x00a5,0x00bd,0x00e4,0x00a0,0}, |
| {0x597d,0xfffd,0}}, |
| {"%e4%a0%e5%bd", |
| {0x00e4,0x00a0,0x00e5,0x00bd,0}, |
| {0xfffd,0xfffd,0}}, |
| }; |
| |
| for(constauto& utf8_decode_case: utf8_decode_cases){ |
| RawCanonOutputT<char16_t> output_iso; |
| DecodeURLEscapeSequences(utf8_decode_case.input, |
| DecodeURLMode::kUTF8OrIsomorphic,&output_iso); |
| EXPECT_EQ(std::u16string(utf8_decode_case.expected_iso.data()), |
| std::u16string(output_iso.data(), output_iso.length())); |
| |
| RawCanonOutputT<char16_t> output_utf8; |
| DecodeURLEscapeSequences(utf8_decode_case.input,DecodeURLMode::kUTF8, |
| &output_utf8); |
| EXPECT_EQ(std::u16string(utf8_decode_case.expected_utf8.data()), |
| std::u16string(output_utf8.data(), output_utf8.length())); |
| } |
| } |
| |
| TEST_F(URLUtilTest,TestEncodeURIComponent){ |
| structEncodeCase{ |
| constchar* input; |
| constchar* output; |
| } encode_cases[]={ |
| {"hello, world","hello%2C%20world"}, |
| {"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", |
| "%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F"}, |
| {"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", |
| "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F"}, |
| {" !\"#$%&'()*+,-./", |
| "%20!%22%23%24%25%26%27()*%2B%2C-.%2F"}, |
| {"0123456789:;<=>?", |
| "0123456789%3A%3B%3C%3D%3E%3F"}, |
| {"@ABCDEFGHIJKLMNO", |
| "%40ABCDEFGHIJKLMNO"}, |
| {"PQRSTUVWXYZ[\\]^_", |
| "PQRSTUVWXYZ%5B%5C%5D%5E_"}, |
| {"`abcdefghijklmno", |
| "%60abcdefghijklmno"}, |
| {"pqrstuvwxyz{|}~\x7f", |
| "pqrstuvwxyz%7B%7C%7D~%7F"}, |
| }; |
| |
| for(constauto& encode_case: encode_cases){ |
| RawCanonOutputT<char> buffer; |
| EncodeURIComponent(encode_case.input,&buffer); |
| std::string output(buffer.data(), buffer.length()); |
| EXPECT_EQ(encode_case.output, output); |
| } |
| } |
| |
| TEST_F(URLUtilTest,PotentiallyDanglingMarkup){ |
| structResolveRelativeCase{ |
| constchar* base; |
| constchar* rel; |
| bool potentially_dangling_markup; |
| constchar* out; |
| } cases[]={ |
| {"https://example.com/","/path<",false,"https://example.com/path%3C"}, |
| {"https://example.com/","\n/path<",true,"https://example.com/path%3C"}, |
| {"https://example.com/","\r/path<",true,"https://example.com/path%3C"}, |
| {"https://example.com/","\t/path<",true,"https://example.com/path%3C"}, |
| {"https://example.com/","/pa\nth<",true,"https://example.com/path%3C"}, |
| {"https://example.com/","/pa\rth<",true,"https://example.com/path%3C"}, |
| {"https://example.com/","/pa\tth<",true,"https://example.com/path%3C"}, |
| {"https://example.com/","/path\n<",true,"https://example.com/path%3C"}, |
| {"https://example.com/","/path\r<",true,"https://example.com/path%3C"}, |
| {"https://example.com/","/path\r<",true,"https://example.com/path%3C"}, |
| {"https://example.com/","\n/<path",true,"https://example.com/%3Cpath"}, |
| {"https://example.com/","\r/<path",true,"https://example.com/%3Cpath"}, |
| {"https://example.com/","\t/<path",true,"https://example.com/%3Cpath"}, |
| {"https://example.com/","/<pa\nth",true,"https://example.com/%3Cpath"}, |
| {"https://example.com/","/<pa\rth",true,"https://example.com/%3Cpath"}, |
| {"https://example.com/","/<pa\tth",true,"https://example.com/%3Cpath"}, |
| {"https://example.com/","/<path\n",true,"https://example.com/%3Cpath"}, |
| {"https://example.com/","/<path\r",true,"https://example.com/%3Cpath"}, |
| {"https://example.com/","/<path\r",true,"https://example.com/%3Cpath"}, |
| }; |
| |
| for(constauto& test: cases){ |
| SCOPED_TRACE(::testing::Message()<< test.base<<", "<< test.rel); |
| Parsed base_parsed=ParseStandardURL(test.base); |
| |
| std::string resolved; |
| StdStringCanonOutput output(&resolved); |
| Parsed resolved_parsed; |
| bool valid= |
| ResolveRelative(test.base, strlen(test.base), base_parsed, test.rel, |
| strlen(test.rel),nullptr,&output,&resolved_parsed); |
| ASSERT_TRUE(valid); |
| output.Complete(); |
| |
| EXPECT_EQ(test.potentially_dangling_markup, |
| resolved_parsed.potentially_dangling_markup); |
| EXPECT_EQ(test.out, resolved); |
| } |
| } |
| |
| TEST_F(URLUtilTest,PotentiallyDanglingMarkupAfterReplacement){ |
| // Parse a URL with potentially dangling markup. |
| Parsed original_parsed; |
| RawCanonOutput<32> original; |
| constchar* url="htt\nps://example.com/<path"; |
| Canonicalize(url, strlen(url),false,nullptr,&original,&original_parsed); |
| ASSERT_TRUE(original_parsed.potentially_dangling_markup); |
| |
| // Perform a replacement, and validate that the potentially_dangling_markup |
| // flag carried over to the new Parsed object. |
| Replacements<char> replacements; |
| replacements.ClearRef(); |
| Parsed replaced_parsed; |
| RawCanonOutput<32> replaced; |
| ReplaceComponents(original.data(), original.length(), original_parsed, |
| replacements,nullptr,&replaced,&replaced_parsed); |
| EXPECT_TRUE(replaced_parsed.potentially_dangling_markup); |
| } |
| |
| TEST_F(URLUtilTest,PotentiallyDanglingMarkupAfterSchemeOnlyReplacement){ |
| // Parse a URL with potentially dangling markup. |
| Parsed original_parsed; |
| RawCanonOutput<32> original; |
| constchar* url="http://example.com/\n/<path"; |
| Canonicalize(url, strlen(url),false,nullptr,&original,&original_parsed); |
| ASSERT_TRUE(original_parsed.potentially_dangling_markup); |
| |
| // Perform a replacement, and validate that the potentially_dangling_markup |
| // flag carried over to the new Parsed object. |
| Replacements<char> replacements; |
| constchar* new_scheme="https"; |
| replacements.SetScheme(new_scheme,Component(0, strlen(new_scheme))); |
| Parsed replaced_parsed; |
| RawCanonOutput<32> replaced; |
| ReplaceComponents(original.data(), original.length(), original_parsed, |
| replacements,nullptr,&replaced,&replaced_parsed); |
| EXPECT_TRUE(replaced_parsed.potentially_dangling_markup); |
| } |
| |
| TEST_F(URLUtilTest,TestDomainIs){ |
| conststruct{ |
| constchar* canonicalized_host; |
| constchar* lower_ascii_domain; |
| bool expected_domain_is; |
| } kTestCases[]={ |
| {"google.com","google.com",true}, |
| {"www.google.com","google.com",true},// Subdomain is ignored. |
| {"www.google.com.cn","google.com",false},// Different TLD. |
| {"www.google.comm","google.com",false}, |
| {"www.iamnotgoogle.com","google.com",false},// Different hostname. |
| {"www.google.com","Google.com",false},// The input is not lower-cased. |
| |
| // If the host ends with a dot, it matches domains with or without a dot. |
| {"www.google.com.","google.com",true}, |
| {"www.google.com.","google.com.",true}, |
| {"www.google.com.",".com",true}, |
| {"www.google.com.",".com.",true}, |
| |
| // But, if the host doesn't end with a dot and the input domain does, then |
| // it's considered to not match. |
| {"www.google.com","google.com.",false}, |
| |
| // If the host ends with two dots, it doesn't match. |
| {"www.google.com..","google.com",false}, |
| |
| // Empty parameters. |
| {"www.google.com","",false}, |
| {"","www.google.com",false}, |
| {"","",false}, |
| }; |
| |
| for(constauto& test_case: kTestCases){ |
| SCOPED_TRACE(testing::Message()<<"(host, domain): (" |
| << test_case.canonicalized_host<<", " |
| << test_case.lower_ascii_domain<<")"); |
| |
| EXPECT_EQ( |
| test_case.expected_domain_is, |
| DomainIs(test_case.canonicalized_host, test_case.lower_ascii_domain)); |
| } |
| } |
| |
| namespace{ |
| std::optional<std::string>CanonicalizeSpec(std::string_view spec, |
| bool trim_path_end){ |
| std::string canonicalized; |
| StdStringCanonOutput output(&canonicalized); |
| Parsed parsed; |
| if(!Canonicalize(spec.data(), spec.size(), trim_path_end, |
| /*charset_converter=*/nullptr,&output,&parsed)){ |
| return{}; |
| } |
| output.Complete();// Must be called before string is used. |
| return canonicalized; |
| } |
| }// namespace |
| |
| #if BUILDFLAG(IS_WIN) |
| // Regression test for https://crbug.com/1252658. |
| TEST_F(URLUtilTest,TestCanonicalizeWindowsPathWithLeadingNUL){ |
| autoPrefixWithNUL=[](std::string&& s)-> std::string{return'\0'+ s;}; |
| EXPECT_EQ(CanonicalizeSpec(PrefixWithNUL("w:"),/*trim_path_end=*/false), |
| std::make_optional("file:///W:")); |
| EXPECT_EQ(CanonicalizeSpec(PrefixWithNUL("\\\\server\\share"), |
| /*trim_path_end=*/false), |
| std::make_optional("file://server/share")); |
| } |
| #endif |
| |
| TEST_F(URLUtilTest,TestCanonicalizeIdempotencyWithLeadingControlCharacters){ |
| std::string spec="_w:"; |
| // Loop over all C0 control characters and the space character. |
| for(char c='\0'; c<=' '; c++){ |
| SCOPED_TRACE(testing::Message()<<"c: "<< c); |
| |
| // Overwrite the first character of `spec`. Note that replacing the first |
| // character with NUL will not change the length! |
| spec[0]= c; |
| |
| for(bool trim_path_end:{false,true}){ |
| SCOPED_TRACE(testing::Message()<<"trim_path_end: "<< trim_path_end); |
| |
| std::optional<std::string> canonicalized= |
| CanonicalizeSpec(spec, trim_path_end); |
| ASSERT_TRUE(canonicalized); |
| EXPECT_EQ(canonicalized,CanonicalizeSpec(*canonicalized, trim_path_end)); |
| } |
| } |
| } |
| |
| TEST_F(URLUtilTest,TestHasInvalidURLEscapeSequences){ |
| structTestCase{ |
| constchar* input; |
| bool is_invalid; |
| } cases[]={ |
| // Edge cases. |
| {"",false}, |
| {"%",true}, |
| |
| // Single regular chars with no escaping are valid. |
| {"a",false}, |
| {"g",false}, |
| {"A",false}, |
| {"G",false}, |
| {":",false}, |
| {"]",false}, |
| {"\x00",false},// ASCII 'NUL' char |
| {"\x01",false},// ASCII 'SOH' char |
| {"\xC2\xA3",false},// UTF-8 encoded '£'. |
| |
| // Longer strings without escaping are valid. |
| {"Hello world",false}, |
| {"Here: [%25] <-- a percent-encoded percent character.",false}, |
| |
| // Valid %-escaped sequences ('%' followed by two hex digits). |
| {"%00",false}, |
| {"%20",false}, |
| {"%02",false}, |
| {"%ff",false}, |
| {"%FF",false}, |
| {"%0a",false}, |
| {"%0A",false}, |
| {"abc%FF",false}, |
| {"%FFabc",false}, |
| {"abc%FFabc",false}, |
| {"hello %FF world",false}, |
| {"%20hello%20world",false}, |
| {"%25",false}, |
| {"%25%25",false}, |
| {"%250",false}, |
| {"%259",false}, |
| {"%25A",false}, |
| {"%25F",false}, |
| {"%0a:",false}, |
| |
| // '%' followed by a single character is never a valid sequence. |
| {"%%",true}, |
| {"%2",true}, |
| {"%a",true}, |
| {"%A",true}, |
| {"%g",true}, |
| {"%G",true}, |
| {"%:",true}, |
| {"%[",true}, |
| {"%F",true}, |
| {"%\xC2\xA3",true},//% followed by UTF-8 encoded '£'. |
| |
| // String ends on a potential escape sequence but without two hex-digits |
| // is invalid. |
| {"abc%",true}, |
| {"abc%%",true}, |
| {"abc%%%",true}, |
| {"abc%a",true}, |
| |
| // One hex and one non-hex digit is invalid. |
| {"%a:",true}, |
| {"%:a",true}, |
| {"%::",true}, |
| {"%ag",true}, |
| {"%ga",true}, |
| {"%-1",true}, |
| {"%1-",true}, |
| {"%0\xC2\xA3",true},// %0£. |
| }; |
| |
| for(TestCase test_case: cases){ |
| constchar* input= test_case.input; |
| bool result=HasInvalidURLEscapeSequences(input); |
| EXPECT_EQ(test_case.is_invalid, result) |
| <<"Invalid result for '"<< input<<"'"; |
| } |
| } |
| |
| TEST_F(URLUtilTest,TestResolveRelativeWithNonStandardBase){ |
| // This tests non-standard (in the sense that IsStandard() == false) |
| // hierarchical schemes. |
| structResolveRelativeCase{ |
| constchar* base; |
| constchar* rel; |
| bool is_valid; |
| constchar* out; |
| } resolve_non_standard_cases[]={ |
| // Resolving a relative path against a non-hierarchical URL should fail. |
| {"scheme:opaque_data","/path",false,""}, |
| // Resolving a relative path against a non-standard authority-based base |
| // URL doesn't alter the authority section. |
| {"scheme://Authority/","../path",true,"scheme://Authority/path"}, |
| // A non-standard hierarchical base is resolved with path URL |
| // canonicalization rules. |
| {"data:/Blah:Blah/","file.html",true,"data:/Blah:Blah/file.html"}, |
| {"data:/Path/../part/part2","file.html",true, |
| "data:/Path/../part/file.html"}, |
| {"data://text/html,payload","//user:pass@host:33////payload22",true, |
| "data://user:pass@host:33////payload22"}, |
| // Path URL canonicalization rules also apply to non-standard authority- |
| // based URLs. |
| {"custom://Authority/","file.html",true, |
| "custom://Authority/file.html"}, |
| {"custom://Authority/","other://Auth/",true,"other://Auth/"}, |
| {"custom://Authority/","../../file.html",true, |
| "custom://Authority/file.html"}, |
| {"custom://Authority/path/","file.html",true, |
| "custom://Authority/path/file.html"}, |
| {"custom://Authority:NoCanon/path/","file.html",true, |
| "custom://Authority:NoCanon/path/file.html"}, |
| // A path with an authority section gets canonicalized under standard URL |
| // rules, even though the base was non-standard. Host case sensitivity |
| // should be preserved and trailing slash after a host soulld be removed. |
| {"content://content.Provider/","//other.Provider",true, |
| "content://other.Provider"}, |
| // Resolving an absolute URL doesn't cause canonicalization of the |
| // result. |
| {"about:blank","custom://Authority",true,"custom://Authority"}, |
| // Fragment URLs can be resolved against a non-standard base. |
| {"scheme://Authority/path","#fragment",true, |
| "scheme://Authority/path#fragment"}, |
| {"scheme://Authority/","#fragment",true, |
| "scheme://Authority/#fragment"}, |
| // Test resolving a fragment (only) against any kind of base-URL. |
| {"about:blank","#id42",true,"about:blank#id42"}, |
| {"about:blank"," #id42",true,"about:blank#id42"}, |
| {"about:blank#oldfrag","#newfrag",true,"about:blank#newfrag"}, |
| {"about:blank"," #id:42",true,"about:blank#id:42"}, |
| // A surprising side effect of allowing fragments to resolve against |
| // any URL scheme is we might break #"714"> | {"#"715"> | "#"716"> | }; |
| |
| for(constauto& test: resolve_non_standard_cases){ |
| SCOPED_TRACE(testing::Message() |
| <<"base: "<< test.base<<", rel: "<< test.rel); |
| |
| Parsed base_parsed=ParseNonSpecialURL(test.base); |
| std::string resolved; |
| StdStringCanonOutput output(&resolved); |
| Parsed resolved_parsed; |
| bool valid= |
| ResolveRelative(test.base, strlen(test.base), base_parsed, test.rel, |
| strlen(test.rel),nullptr,&output,&resolved_parsed); |
| output.Complete(); |
| |
| EXPECT_EQ(test.is_valid, valid); |
| if(test.is_valid&& valid){ |
| EXPECT_EQ(test.out, resolved); |
| } |
| } |
| } |
| |
| TEST_F(URLUtilTest,Cannolicalize){ |
| // Verify that the feature flag changes canonicalization behavior, |
| // focusing on key cases here as comprehesive testing is covered in other unit |
| // tests. |
| URLCase cases[]={ |
| {"git://host/..","git://host/",true}, |
| {"git:// /","git:///",false}, |
| {"git:/..","git:/",true}, |
| {"mailto:/..","mailto:/",true}, |
| }; |
| for(constauto& i: cases){ |
| TestCanonicalize(i); |
| } |
| } |
| |
| TEST_F(URLUtilTest,TestResolveRelativeWithNonSpecialBase){ |
| ResolveRelativeCase cases[]={ |
| {"scheme://Authority","path","scheme://Authority/path"}, |
| }; |
| for(constauto& i: cases){ |
| TestResolveRelative(i); |
| } |
| } |
| |
| TEST_F(URLUtilTest,OpaqueNonSpecialScheme){ |
| // Ensure that the behavior of "android:" scheme URL is preserved, which is |
| // not URL Standard compliant. |
| // |
| // URL Standard-wise, "android://a b" is an invalid URL because the host part |
| // includes a space character, which is not allowed. |
| std::optional<std::string> res=CanonicalizeSpec("android://a b",false); |
| ASSERT_TRUE(res); |
| EXPECT_EQ(*res,"android://a b"); |
| |
| // Test a "git:" scheme URL for comparison. |
| res=CanonicalizeSpec("git://a b",false); |
| // This is correct behavior because "git://a b" is an invalid URL. |
| EXPECT_FALSE(res); |
| } |
| |
| }// namespace url |