Movatterモバイル変換

chromium /chromium /src /refs/heads/main /. /url /url_util_unittest.cc

blob: b8aafb5b49958a61ebc603925a9fd1b71db6f86a [file] [log] [blame] [edit]

// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/390223051): Remove C-library calls to fix the errors.
#pragma allow_unsafe_libc_calls
#endif

#include"url/url_util.h"

#include<stddef.h>

#include<optional>
#include<string_view>

#include"build/build_config.h"
#include"testing/gtest/include/gtest/gtest-message.h"
#include"testing/gtest/include/gtest/gtest.h"
#include"url/third_party/mozilla/url_parse.h"
#include"url/url_canon.h"
#include"url/url_canon_stdstring.h"
#include"url/url_test_utils.h"

namespace url{

classURLUtilTest:public testing::Test{
public:
URLUtilTest()=default;

URLUtilTest(constURLUtilTest&)=delete;
URLUtilTest&operator=(constURLUtilTest&)=delete;

~URLUtilTest() override=default;

protected:
structURLCase{
const std::string_view input;
const std::string_view expected;
bool expected_success;
};

structResolveRelativeCase{
const std::string_view base;
const std::string_view rel;
std::optional<std::string_view> expected;
};

voidTestCanonicalize(constURLCase& url_case){
std::string canonicalized;
StdStringCanonOutput output(&canonicalized);
Parsed parsed;
bool success=
Canonicalize(url_case.input.data(), url_case.input.size(),
/trim_path_end=/false,
/charset_converter=/nullptr,&output,&parsed);
output.Complete();
EXPECT_EQ(success, url_case.expected_success);
EXPECT_EQ(output.view(), url_case.expected);
}

voidTestResolveRelative(constResolveRelativeCase& test){
SCOPED_TRACE(testing::Message()
<<"base: "<< test.base<<", rel: "<< test.rel);

Parsed base_parsed=ParseNonSpecialURL(test.base);

std::string resolved;
StdStringCanonOutput output(&resolved);

Parsed resolved_parsed;
bool valid=ResolveRelative(test.base.data(), test.base.size(),
base_parsed, test.rel.data(), test.rel.size(),
nullptr,&output,&resolved_parsed);
output.Complete();

if(valid){
ASSERT_TRUE(test.expected);
EXPECT_EQ(resolved,*test.expected);
}else{
EXPECT_FALSE(test.expected);
}
}

private:
ScopedSchemeRegistryForTests scoped_registry_;
};

TEST_F(URLUtilTest,FindAndCompareScheme){
Component found_scheme;

// Simple case where the scheme is found and matches.
constchar kStr1[]="http://www.com/";
EXPECT_TRUE(FindAndCompareScheme(kStr1,static_cast<int>(strlen(kStr1)),
"http",nullptr));
EXPECT_TRUE(FindAndCompareScheme(
kStr1,static_cast<int>(strlen(kStr1)),"http",&found_scheme));
EXPECT_TRUE(found_scheme==Component(0,4));

// A case where the scheme is found and doesn't match.
EXPECT_FALSE(FindAndCompareScheme(
kStr1,static_cast<int>(strlen(kStr1)),"https",&found_scheme));
EXPECT_TRUE(found_scheme==Component(0,4));

// A case where there is no scheme.
constchar kStr2[]="httpfoobar";
EXPECT_FALSE(FindAndCompareScheme(
kStr2,static_cast<int>(strlen(kStr2)),"http",&found_scheme));
EXPECT_TRUE(found_scheme==Component());

// When there is an empty scheme, it should match the empty scheme.
constchar kStr3[]=":foo.com/";
EXPECT_TRUE(FindAndCompareScheme(
kStr3,static_cast<int>(strlen(kStr3)),"",&found_scheme));
EXPECT_TRUE(found_scheme==Component(0,0));

// But when there is no scheme, it should fail.
EXPECT_FALSE(FindAndCompareScheme("",0,"",&found_scheme));
EXPECT_TRUE(found_scheme==Component());

// When there is a whitespace char in scheme, it should canonicalize the URL
// before comparison.
constchar whtspc_str[]=" \r\n\tjav\ra\nscri\tpt:alert(1)";
EXPECT_TRUE(FindAndCompareScheme(whtspc_str,
static_cast<int>(strlen(whtspc_str)),
"javascript",&found_scheme));
EXPECT_TRUE(found_scheme==Component(1,10));

// Control characters should be stripped out on the ends, and kept in the
// middle.
constchar ctrl_str[]="\02jav\02scr\03ipt:alert(1)";
EXPECT_FALSE(FindAndCompareScheme(ctrl_str,
static_cast<int>(strlen(ctrl_str)),
"javascript",&found_scheme));
EXPECT_TRUE(found_scheme==Component(1,11));
}

TEST_F(URLUtilTest,IsStandard){
constchar kHTTPScheme[]="http";
EXPECT_TRUE(IsStandard(kHTTPScheme));

constchar kFooScheme[]="foo";
EXPECT_FALSE(IsStandard(kFooScheme));
}

TEST_F(URLUtilTest,IsReferrerScheme){
constchar kHTTPScheme[]="http";
EXPECT_TRUE(IsReferrerScheme(kHTTPScheme));

constchar kFooScheme[]="foo";
EXPECT_FALSE(IsReferrerScheme(kFooScheme));
}

TEST_F(URLUtilTest,AddReferrerScheme){
staticconstchar kFooScheme[]="foo";
EXPECT_FALSE(IsReferrerScheme(kFooScheme));

url::ScopedSchemeRegistryForTests scoped_registry;
AddReferrerScheme(kFooScheme, url::SCHEME_WITH_HOST);
EXPECT_TRUE(IsReferrerScheme(kFooScheme));
}

TEST_F(URLUtilTest,ShutdownCleansUpSchemes){
staticconstchar kFooScheme[]="foo";
EXPECT_FALSE(IsReferrerScheme(kFooScheme));

{
url::ScopedSchemeRegistryForTests scoped_registry;
AddReferrerScheme(kFooScheme, url::SCHEME_WITH_HOST);
EXPECT_TRUE(IsReferrerScheme(kFooScheme));
}

EXPECT_FALSE(IsReferrerScheme(kFooScheme));
}

TEST_F(URLUtilTest,GetStandardSchemeType){
url::SchemeType scheme_type;

constchar kHTTPScheme[]="http";
scheme_type= url::SCHEME_WITHOUT_AUTHORITY;
EXPECT_TRUE(GetStandardSchemeType(kHTTPScheme,
&scheme_type));
EXPECT_EQ(url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, scheme_type);

constchar kFilesystemScheme[]="filesystem";
scheme_type= url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
EXPECT_TRUE(GetStandardSchemeType(kFilesystemScheme,
&scheme_type));
EXPECT_EQ(url::SCHEME_WITHOUT_AUTHORITY, scheme_type);

constchar kFooScheme[]="foo";
scheme_type= url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
EXPECT_FALSE(GetStandardSchemeType(kFooScheme,
&scheme_type));
}

TEST_F(URLUtilTest,GetStandardSchemes){
std::vector<std::string> expected={
kHttpsScheme, kHttpScheme, kFileScheme, kFtpScheme,
kWssScheme, kWsScheme, kFileSystemScheme,"foo",
};
AddStandardScheme("foo", url::SCHEME_WITHOUT_AUTHORITY);
EXPECT_EQ(expected,GetStandardSchemes());
}

TEST_F(URLUtilTest,ReplaceComponents){
Parsed parsed;
RawCanonOutputT<char> output;
Parsed new_parsed;

// Check that the following calls do not cause crash
Replacements<char> replacements;
replacements.SetRef("test",Component(0,4));
ReplaceComponents(nullptr,0, parsed, replacements,nullptr,&output,
&new_parsed);
ReplaceComponents("",0, parsed, replacements,nullptr,&output,&new_parsed);
replacements.ClearRef();
replacements.SetHost("test",Component(0,4));
ReplaceComponents(nullptr,0, parsed, replacements,nullptr,&output,
&new_parsed);
ReplaceComponents("",0, parsed, replacements,nullptr,&output,&new_parsed);

replacements.ClearHost();
ReplaceComponents(nullptr,0, parsed, replacements,nullptr,&output,
&new_parsed);
ReplaceComponents("",0, parsed, replacements,nullptr,&output,&new_parsed);
ReplaceComponents(nullptr,0, parsed, replacements,nullptr,&output,
&new_parsed);
ReplaceComponents("",0, parsed, replacements,nullptr,&output,&new_parsed);
}

static std::stringCheckReplaceScheme(constchar* base_url,
constchar* scheme){
// Make sure the input is canonicalized.
RawCanonOutput<32> original;
Parsed original_parsed;
Canonicalize(base_url, strlen(base_url),true,nullptr,&original,
&original_parsed);

Replacements<char> replacements;
replacements.SetScheme(scheme,Component(0, strlen(scheme)));

std::string output_string;
StdStringCanonOutput output(&output_string);
Parsed output_parsed;
ReplaceComponents(original.data(), original.length(), original_parsed,
replacements,nullptr,&output,&output_parsed);

output.Complete();
return output_string;
}

TEST_F(URLUtilTest,ReplaceScheme){
EXPECT_EQ("https://google.com/",
CheckReplaceScheme("http://google.com/","https"));
EXPECT_EQ("file://google.com/",
CheckReplaceScheme("http://google.com/","file"));
EXPECT_EQ("http://home/Build",
CheckReplaceScheme("file:///Home/Build","http"));
EXPECT_EQ("#"261">	CheckReplaceScheme("about:foo","javascript"));
EXPECT_EQ("://google.com/",
CheckReplaceScheme("http://google.com/",""));
EXPECT_EQ("http://google.com/",
CheckReplaceScheme("about:google.com","http"));
EXPECT_EQ("http:",CheckReplaceScheme("","http"));

#ifdef WIN32
// Magic Windows drive letter behavior when converting to a file URL.
EXPECT_EQ("file:///E:/foo/",
CheckReplaceScheme("http://localhost/e:foo/","file"));
#endif

// This will probably change to "about://google.com/" when we fix
// http://crbug.com/160 which should also be an acceptable result.
EXPECT_EQ("about://google.com/",
CheckReplaceScheme("http://google.com/","about"));

EXPECT_EQ("http://example.com/%20hello%20#%20world",
CheckReplaceScheme("myscheme:example.com/ hello # world ","http"));
}

TEST_F(URLUtilTest,DecodeURLEscapeSequences){
structDecodeCase{
constchar* input;
constchar* output;
} decode_cases[]={
{"hello, world","hello, world"},
{"%01%02%03%04%05%06%07%08%09%0a%0B%0C%0D%0e%0f/",
"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0B\x0C\x0D\x0e\x0f/"},
{"%10%11%12%13%14%15%16%17%18%19%1a%1B%1C%1D%1e%1f/",
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1B\x1C\x1D\x1e\x1f/"},
{"%20%21%22%23%24%25%26%27%28%29%2a%2B%2C%2D%2e%2f/",
" !\"#$%&'()*+,-.//"},
{"%30%31%32%33%34%35%36%37%38%39%3a%3B%3C%3D%3e%3f/",
"0123456789:;<=>?/"},
{"%40%41%42%43%44%45%46%47%48%49%4a%4B%4C%4D%4e%4f/",
"@ABCDEFGHIJKLMNO/"},
{"%50%51%52%53%54%55%56%57%58%59%5a%5B%5C%5D%5e%5f/",
"PQRSTUVWXYZ[\\]^_/"},
{"%60%61%62%63%64%65%66%67%68%69%6a%6B%6C%6D%6e%6f/",
"`abcdefghijklmno/"},
{"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/",
"pqrstuvwxyz{\|}~\x7f/"},
{"%e4%bd%a0%e5%a5%bd","\xe4\xbd\xa0\xe5\xa5\xbd"},
// U+FFFF (Noncharacter) should not be replaced with U+FFFD (Replacement
// Character) (http://crbug.com/1416021)
{"%ef%bf%bf","\xef\xbf\xbf"},
// U+FDD0 (Noncharacter)
{"%ef%b7%90","\xef\xb7\x90"},
// U+FFFD (Replacement Character)
{"%ef%bf%bd","\xef\xbf\xbd"},
};

for(constauto& decode_case: decode_cases){
RawCanonOutputT<char16_t> output;
DecodeURLEscapeSequences(decode_case.input,
DecodeURLMode::kUTF8OrIsomorphic,&output);
EXPECT_EQ(decode_case.output, base::UTF16ToUTF8(std::u16string(
output.data(), output.length())));

RawCanonOutputT<char16_t> output_utf8;
DecodeURLEscapeSequences(decode_case.input,DecodeURLMode::kUTF8,
&output_utf8);
EXPECT_EQ(decode_case.output,
base::UTF16ToUTF8(
std::u16string(output_utf8.data(), output_utf8.length())));
}

// Our decode should decode %00
constchar zero_input[]="%00";
RawCanonOutputT<char16_t> zero_output;
DecodeURLEscapeSequences(zero_input,DecodeURLMode::kUTF8,&zero_output);
EXPECT_NE("%00", base::UTF16ToUTF8(std::u16string(zero_output.data(),
zero_output.length())));

// Test the error behavior for invalid UTF-8.
structUtf8DecodeCase{
constchar* input;
std::vector<char16_t> expected_iso;
std::vector<char16_t> expected_utf8;
} utf8_decode_cases[]={
// %e5%a5%bd is a valid UTF-8 sequence. U+597D
{"%e4%a0%e5%a5%bd",
{0x00e4,0x00a0,0x00e5,0x00a5,0x00bd,0},
{0xfffd,0x597d,0}},
{"%e5%a5%bd%e4%a0",
{0x00e5,0x00a5,0x00bd,0x00e4,0x00a0,0},
{0x597d,0xfffd,0}},
{"%e4%a0%e5%bd",
{0x00e4,0x00a0,0x00e5,0x00bd,0},
{0xfffd,0xfffd,0}},
};

for(constauto& utf8_decode_case: utf8_decode_cases){
RawCanonOutputT<char16_t> output_iso;
DecodeURLEscapeSequences(utf8_decode_case.input,
DecodeURLMode::kUTF8OrIsomorphic,&output_iso);
EXPECT_EQ(std::u16string(utf8_decode_case.expected_iso.data()),
std::u16string(output_iso.data(), output_iso.length()));

RawCanonOutputT<char16_t> output_utf8;
DecodeURLEscapeSequences(utf8_decode_case.input,DecodeURLMode::kUTF8,
&output_utf8);
EXPECT_EQ(std::u16string(utf8_decode_case.expected_utf8.data()),
std::u16string(output_utf8.data(), output_utf8.length()));
}
}

TEST_F(URLUtilTest,TestEncodeURIComponent){
structEncodeCase{
constchar* input;
constchar* output;
} encode_cases[]={
{"hello, world","hello%2C%20world"},
{"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
"%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F"},
{"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
"%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F"},
{" !\"#$%&'()*+,-./",
"%20!%22%23%24%25%26%27()*%2B%2C-.%2F"},
{"0123456789:;<=>?",
"0123456789%3A%3B%3C%3D%3E%3F"},
{"@ABCDEFGHIJKLMNO",
"%40ABCDEFGHIJKLMNO"},
{"PQRSTUVWXYZ[\\]^_",
"PQRSTUVWXYZ%5B%5C%5D%5E_"},
{"`abcdefghijklmno",
"%60abcdefghijklmno"},
{"pqrstuvwxyz{\|}~\x7f",
"pqrstuvwxyz%7B%7C%7D~%7F"},
};

for(constauto& encode_case: encode_cases){
RawCanonOutputT<char> buffer;
EncodeURIComponent(encode_case.input,&buffer);
std::string output(buffer.data(), buffer.length());
EXPECT_EQ(encode_case.output, output);
}
}

TEST_F(URLUtilTest,PotentiallyDanglingMarkup){
structResolveRelativeCase{
constchar* base;
constchar* rel;
bool potentially_dangling_markup;
constchar* out;
} cases[]={
{"https://example.com/","/path<",false,"https://example.com/path%3C"},
{"https://example.com/","\n/path<",true,"https://example.com/path%3C"},
{"https://example.com/","\r/path<",true,"https://example.com/path%3C"},
{"https://example.com/","\t/path<",true,"https://example.com/path%3C"},
{"https://example.com/","/pa\nth<",true,"https://example.com/path%3C"},
{"https://example.com/","/pa\rth<",true,"https://example.com/path%3C"},
{"https://example.com/","/pa\tth<",true,"https://example.com/path%3C"},
{"https://example.com/","/path\n<",true,"https://example.com/path%3C"},
{"https://example.com/","/path\r<",true,"https://example.com/path%3C"},
{"https://example.com/","/path\r<",true,"https://example.com/path%3C"},
{"https://example.com/","\n/<path",true,"https://example.com/%3Cpath"},
{"https://example.com/","\r/<path",true,"https://example.com/%3Cpath"},
{"https://example.com/","\t/<path",true,"https://example.com/%3Cpath"},
{"https://example.com/","/<pa\nth",true,"https://example.com/%3Cpath"},
{"https://example.com/","/<pa\rth",true,"https://example.com/%3Cpath"},
{"https://example.com/","/<pa\tth",true,"https://example.com/%3Cpath"},
{"https://example.com/","/<path\n",true,"https://example.com/%3Cpath"},
{"https://example.com/","/<path\r",true,"https://example.com/%3Cpath"},
{"https://example.com/","/<path\r",true,"https://example.com/%3Cpath"},
};

for(constauto& test: cases){
SCOPED_TRACE(::testing::Message()<< test.base<<", "<< test.rel);
Parsed base_parsed=ParseStandardURL(test.base);

std::string resolved;
StdStringCanonOutput output(&resolved);
Parsed resolved_parsed;
bool valid=
ResolveRelative(test.base, strlen(test.base), base_parsed, test.rel,
strlen(test.rel),nullptr,&output,&resolved_parsed);
ASSERT_TRUE(valid);
output.Complete();

EXPECT_EQ(test.potentially_dangling_markup,
resolved_parsed.potentially_dangling_markup);
EXPECT_EQ(test.out, resolved);
}
}

TEST_F(URLUtilTest,PotentiallyDanglingMarkupAfterReplacement){
// Parse a URL with potentially dangling markup.
Parsed original_parsed;
RawCanonOutput<32> original;
constchar* url="htt\nps://example.com/<path";
Canonicalize(url, strlen(url),false,nullptr,&original,&original_parsed);
ASSERT_TRUE(original_parsed.potentially_dangling_markup);

// Perform a replacement, and validate that the potentially_dangling_markup
// flag carried over to the new Parsed object.
Replacements<char> replacements;
replacements.ClearRef();
Parsed replaced_parsed;
RawCanonOutput<32> replaced;
ReplaceComponents(original.data(), original.length(), original_parsed,
replacements,nullptr,&replaced,&replaced_parsed);
EXPECT_TRUE(replaced_parsed.potentially_dangling_markup);
}

TEST_F(URLUtilTest,PotentiallyDanglingMarkupAfterSchemeOnlyReplacement){
// Parse a URL with potentially dangling markup.
Parsed original_parsed;
RawCanonOutput<32> original;
constchar* url="http://example.com/\n/<path";
Canonicalize(url, strlen(url),false,nullptr,&original,&original_parsed);
ASSERT_TRUE(original_parsed.potentially_dangling_markup);

// Perform a replacement, and validate that the potentially_dangling_markup
// flag carried over to the new Parsed object.
Replacements<char> replacements;
constchar* new_scheme="https";
replacements.SetScheme(new_scheme,Component(0, strlen(new_scheme)));
Parsed replaced_parsed;
RawCanonOutput<32> replaced;
ReplaceComponents(original.data(), original.length(), original_parsed,
replacements,nullptr,&replaced,&replaced_parsed);
EXPECT_TRUE(replaced_parsed.potentially_dangling_markup);
}

TEST_F(URLUtilTest,TestDomainIs){
conststruct{
constchar* canonicalized_host;
constchar* lower_ascii_domain;
bool expected_domain_is;
} kTestCases[]={
{"google.com","google.com",true},
{"www.google.com","google.com",true},// Subdomain is ignored.
{"www.google.com.cn","google.com",false},// Different TLD.
{"www.google.comm","google.com",false},
{"www.iamnotgoogle.com","google.com",false},// Different hostname.
{"www.google.com","Google.com",false},// The input is not lower-cased.

// If the host ends with a dot, it matches domains with or without a dot.
{"www.google.com.","google.com",true},
{"www.google.com.","google.com.",true},
{"www.google.com.",".com",true},
{"www.google.com.",".com.",true},

// But, if the host doesn't end with a dot and the input domain does, then
// it's considered to not match.
{"www.google.com","google.com.",false},

// If the host ends with two dots, it doesn't match.
{"www.google.com..","google.com",false},

// Empty parameters.
{"www.google.com","",false},
{"","www.google.com",false},
{"","",false},
};

for(constauto& test_case: kTestCases){
SCOPED_TRACE(testing::Message()<<"(host, domain): ("
<< test_case.canonicalized_host<<", "
<< test_case.lower_ascii_domain<<")");

EXPECT_EQ(
test_case.expected_domain_is,
DomainIs(test_case.canonicalized_host, test_case.lower_ascii_domain));
}
}

namespace{
std::optional<std::string>CanonicalizeSpec(std::string_view spec,
bool trim_path_end){
std::string canonicalized;
StdStringCanonOutput output(&canonicalized);
Parsed parsed;
if(!Canonicalize(spec.data(), spec.size(), trim_path_end,
/charset_converter=/nullptr,&output,&parsed)){
return{};
}
output.Complete();// Must be called before string is used.
return canonicalized;
}
}// namespace

#if BUILDFLAG(IS_WIN)
// Regression test for https://crbug.com/1252658.
TEST_F(URLUtilTest,TestCanonicalizeWindowsPathWithLeadingNUL){
autoPrefixWithNUL=[](std::string&& s)-> std::string{return'\0'+ s;};
EXPECT_EQ(CanonicalizeSpec(PrefixWithNUL("w:"),/trim_path_end=/false),
std::make_optional("file:///W:"));
EXPECT_EQ(CanonicalizeSpec(PrefixWithNUL("\\\\server\\share"),
/trim_path_end=/false),
std::make_optional("file://server/share"));
}
#endif

TEST_F(URLUtilTest,TestCanonicalizeIdempotencyWithLeadingControlCharacters){
std::string spec="_w:";
// Loop over all C0 control characters and the space character.
for(char c='\0'; c<=' '; c++){
SCOPED_TRACE(testing::Message()<<"c: "<< c);

// Overwrite the first character of `spec`. Note that replacing the first
// character with NUL will not change the length!
spec[0]= c;

for(bool trim_path_end:{false,true}){
SCOPED_TRACE(testing::Message()<<"trim_path_end: "<< trim_path_end);

std::optional<std::string> canonicalized=
CanonicalizeSpec(spec, trim_path_end);
ASSERT_TRUE(canonicalized);
EXPECT_EQ(canonicalized,CanonicalizeSpec(*canonicalized, trim_path_end));
}
}
}

TEST_F(URLUtilTest,TestHasInvalidURLEscapeSequences){
structTestCase{
constchar* input;
bool is_invalid;
} cases[]={
// Edge cases.
{"",false},
{"%",true},

// Single regular chars with no escaping are valid.
{"a",false},
{"g",false},
{"A",false},
{"G",false},
{":",false},
{"]",false},
{"\x00",false},// ASCII 'NUL' char
{"\x01",false},// ASCII 'SOH' char
{"\xC2\xA3",false},// UTF-8 encoded '£'.

// Longer strings without escaping are valid.
{"Hello world",false},
{"Here: [%25] <-- a percent-encoded percent character.",false},

// Valid %-escaped sequences ('%' followed by two hex digits).
{"%00",false},
{"%20",false},
{"%02",false},
{"%ff",false},
{"%FF",false},
{"%0a",false},
{"%0A",false},
{"abc%FF",false},
{"%FFabc",false},
{"abc%FFabc",false},
{"hello %FF world",false},
{"%20hello%20world",false},
{"%25",false},
{"%25%25",false},
{"%250",false},
{"%259",false},
{"%25A",false},
{"%25F",false},
{"%0a:",false},

// '%' followed by a single character is never a valid sequence.
{"%%",true},
{"%2",true},
{"%a",true},
{"%A",true},
{"%g",true},
{"%G",true},
{"%:",true},
{"%[",true},
{"%F",true},
{"%\xC2\xA3",true},//% followed by UTF-8 encoded '£'.

// String ends on a potential escape sequence but without two hex-digits
// is invalid.
{"abc%",true},
{"abc%%",true},
{"abc%%%",true},
{"abc%a",true},

// One hex and one non-hex digit is invalid.
{"%a:",true},
{"%:a",true},
{"%::",true},
{"%ag",true},
{"%ga",true},
{"%-1",true},
{"%1-",true},
{"%0\xC2\xA3",true},// %0£.
};

for(TestCase test_case: cases){
constchar* input= test_case.input;
bool result=HasInvalidURLEscapeSequences(input);
EXPECT_EQ(test_case.is_invalid, result)
<<"Invalid result for '"<< input<<"'";
}
}

TEST_F(URLUtilTest,TestResolveRelativeWithNonStandardBase){
// This tests non-standard (in the sense that IsStandard() == false)
// hierarchical schemes.
structResolveRelativeCase{
constchar* base;
constchar* rel;
bool is_valid;
constchar* out;
} resolve_non_standard_cases[]={
// Resolving a relative path against a non-hierarchical URL should fail.
{"scheme:opaque_data","/path",false,""},
// Resolving a relative path against a non-standard authority-based base
// URL doesn't alter the authority section.
{"scheme://Authority/","../path",true,"scheme://Authority/path"},
// A non-standard hierarchical base is resolved with path URL
// canonicalization rules.
{"data:/Blah:Blah/","file.html",true,"data:/Blah:Blah/file.html"},
{"data:/Path/../part/part2","file.html",true,
"data:/Path/../part/file.html"},
{"data://text/html,payload","//user:pass@host:33////payload22",true,
"data://user:pass@host:33////payload22"},
// Path URL canonicalization rules also apply to non-standard authority-
// based URLs.
{"custom://Authority/","file.html",true,
"custom://Authority/file.html"},
{"custom://Authority/","other://Auth/",true,"other://Auth/"},
{"custom://Authority/","../../file.html",true,
"custom://Authority/file.html"},
{"custom://Authority/path/","file.html",true,
"custom://Authority/path/file.html"},
{"custom://Authority:NoCanon/path/","file.html",true,
"custom://Authority:NoCanon/path/file.html"},
// A path with an authority section gets canonicalized under standard URL
// rules, even though the base was non-standard. Host case sensitivity
// should be preserved and trailing slash after a host soulld be removed.
{"content://content.Provider/","//other.Provider",true,
"content://other.Provider"},
// Resolving an absolute URL doesn't cause canonicalization of the
// result.
{"about:blank","custom://Authority",true,"custom://Authority"},
// Fragment URLs can be resolved against a non-standard base.
{"scheme://Authority/path","#fragment",true,
"scheme://Authority/path#fragment"},
{"scheme://Authority/","#fragment",true,
"scheme://Authority/#fragment"},
// Test resolving a fragment (only) against any kind of base-URL.
{"about:blank","#id42",true,"about:blank#id42"},
{"about:blank"," #id42",true,"about:blank#id42"},
{"about:blank#oldfrag","#newfrag",true,"about:blank#newfrag"},
{"about:blank"," #id:42",true,"about:blank#id:42"},
// A surprising side effect of allowing fragments to resolve against
// any URL scheme is we might break #"714">	{"#"715">	"#"716">	};

for(constauto& test: resolve_non_standard_cases){
SCOPED_TRACE(testing::Message()
<<"base: "<< test.base<<", rel: "<< test.rel);

Parsed base_parsed=ParseNonSpecialURL(test.base);
std::string resolved;
StdStringCanonOutput output(&resolved);
Parsed resolved_parsed;
bool valid=
ResolveRelative(test.base, strlen(test.base), base_parsed, test.rel,
strlen(test.rel),nullptr,&output,&resolved_parsed);
output.Complete();

EXPECT_EQ(test.is_valid, valid);
if(test.is_valid&& valid){
EXPECT_EQ(test.out, resolved);
}
}
}

TEST_F(URLUtilTest,Cannolicalize){
// Verify that the feature flag changes canonicalization behavior,
// focusing on key cases here as comprehesive testing is covered in other unit
// tests.
URLCase cases[]={
{"git://host/..","git://host/",true},
{"git:// /","git:///",false},
{"git:/..","git:/",true},
{"mailto:/..","mailto:/",true},
};
for(constauto& i: cases){
TestCanonicalize(i);
}
}

TEST_F(URLUtilTest,TestResolveRelativeWithNonSpecialBase){
ResolveRelativeCase cases[]={
{"scheme://Authority","path","scheme://Authority/path"},
};
for(constauto& i: cases){
TestResolveRelative(i);
}
}

TEST_F(URLUtilTest,OpaqueNonSpecialScheme){
// Ensure that the behavior of "android:" scheme URL is preserved, which is
// not URL Standard compliant.
//
// URL Standard-wise, "android://a b" is an invalid URL because the host part
// includes a space character, which is not allowed.
std::optional<std::string> res=CanonicalizeSpec("android://a b",false);
ASSERT_TRUE(res);
EXPECT_EQ(*res,"android://a b");

// Test a "git:" scheme URL for comparison.
res=CanonicalizeSpec("git://a b",false);
// This is correct behavior because "git://a b" is an invalid URL.
EXPECT_FALSE(res);
}

}// namespace url