feat: use libpsl (#2575)

Use libpsl to calculate public and private parts of URL hosts.
This commit is contained in:
Charles Kerr
2022-02-12 11:30:27 -06:00
committed by GitHub
parent 708fc1531e
commit e14c7f38e5
33 changed files with 538 additions and 406 deletions

View File

@@ -15,6 +15,9 @@
#include <event2/buffer.h>
#define PSL_STATIC
#include <libpsl.h>
#include "transmission.h"
#include "net.h"
@@ -292,6 +295,48 @@ bool tr_isValidTrackerScheme(std::string_view scheme)
return std::find(std::begin(Schemes), std::end(Schemes), scheme) != std::end(Schemes);
}
// www.example.com -> example
// www.example.co.uk -> example
// 127.0.0.1 -> 127.0.0.1
std::string_view getSiteName(std::string_view host)
{
// is it empty?
if (std::empty(host))
{
return host;
}
// is it an IP?
auto addr = tr_address{};
auto const szhost = std::string(host);
if (tr_address_from_string(&addr, szhost.c_str()))
{
return host;
}
// is it a registered name?
char* lower = nullptr;
if (PSL_SUCCESS == psl_str_to_utf8lower(szhost.c_str(), nullptr, nullptr, &lower))
{
// www.example.com -> example.com
char const* const top = psl_registrable_domain(psl_builtin(), lower);
if (top != nullptr)
{
host.remove_prefix(top - lower);
}
psl_free_string(lower);
}
// example.com -> example
auto const dot_pos = host.find('.');
if (dot_pos != std::string_view::npos)
{
host = host.substr(0, dot_pos);
}
return host;
}
} // namespace
std::optional<tr_url_parsed_t> tr_urlParse(std::string_view url)
@@ -336,6 +381,7 @@ std::optional<tr_url_parsed_t> tr_urlParse(std::string_view url)
auto remain = parsed.authority;
parsed.host = tr_strvSep(&remain, ':');
parsed.sitename = getSiteName(parsed.host);
parsed.portstr = !std::empty(remain) ? remain : getPortForScheme(parsed.scheme);
parsed.port = parsePort(parsed.portstr);
}