Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parse absolute invalid urls #369

Merged
merged 1 commit into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
39 changes: 36 additions & 3 deletions url/parsers.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,47 @@ func absoluteURLParser(u *URL) (*URL, error) {
}
copy(u.URL, urlparse)
} else {

// try parsing with fallback if it is invalid URL escape error
// split and read until first / and then parse the url
parsed, err := url.Parse(HTTPS + SchemeSeparator + u.Original)
if err != nil {
if !strings.Contains(err.Error(), "invalid URL escape") {
// if it is not a invalid URL escape error then it is most likely a relative path
u.IsRelative = true
return u, nil
}
} else {
// successfully parsed absolute url
parsed.Scheme = "" // remove newly added scheme
copy(u.URL, parsed)
return u, nil
}

// this is most likely a url of type scanme.sh/%2s/%invalid
// if no prefix try to parse it with https
// if failed we consider it as a relative path and not a full url
urlparse, parseErr := url.Parse(HTTPS + SchemeSeparator + u.Original)
pathIndex := strings.IndexRune(u.Original, '/')
if pathIndex == -1 {
// no path found most likely a relative path or localhost path
urlparse, parseErr := url.Parse(HTTPS + SchemeSeparator + u.Original)
if parseErr != nil {
// most likely a relativeurls
u.IsRelative = true
} else {
urlparse.Scheme = "" // remove newly added scheme
copy(u.URL, urlparse)
}
return u, nil
}
// split until first / and then parse the url to handle invalid urls like
// scnme.sh/xyz/%u2s/%invalid
urlparse, parseErr := url.Parse(HTTPS + SchemeSeparator + u.Original[:pathIndex])
if parseErr != nil {
// most likely a relativeurl
// most likely a relativeurls
u.IsRelative = true
// TODO: investigate if prefix / should be added
} else {
urlparse.Path = u.Original[pathIndex:]
urlparse.Scheme = "" // remove newly added scheme
copy(u.URL, urlparse)
}
Expand Down
6 changes: 4 additions & 2 deletions url/url_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,13 @@ func TestInvalidURLs(t *testing.T) {
"https://127.0.0.1:52272/%invalid",
"http.s3.amazonaws.com",
"https.s3.amazonaws.com",
"scanme.sh/xyz/invalid",
"scanme.sh/xyz/%u2s/%invalid",
}
for _, v := range testcases {
urlx, err := ParseURL(v, true)
urlx, err := ParseAbsoluteURL(v, true)
require.Nilf(t, err, "got error for url %v", v)
require.Equal(t, urlx.String(), v)
require.Equal(t, v, urlx.String())
}
}

Expand Down