feat: allow non-ASCII announce and webseed URLs (#8420)

* test: `tr_urlPercentEncode()`

* refactor: percent-encode URLs provided in announce list or webseeds

* test: announce list and webseed
This commit is contained in:
Yat Ho
2026-02-11 06:19:04 +08:00
committed by GitHub
parent a89f4bd5bd
commit 4bc74e3dd4
5 changed files with 100 additions and 6 deletions

View File

@@ -14,6 +14,8 @@
#include "libtransmission/transmission.h"
#include "libtransmission/announce-list.h"
#include "tr-strbuf.h"
#include "libtransmission/error.h"
#include "libtransmission/quark.h"
#include "libtransmission/tr-assert.h"
@@ -79,8 +81,16 @@ bool tr_announce_list::replace(tr_tracker_id_t id, std::string_view announce_url
bool tr_announce_list::add(std::string_view announce_url, tr_tracker_tier_t tier)
{
// This step allows for URLs that contain character outside the allowed set
// defined by RFC 3986. The URL we store is "equivalent" to the provided URL
// according to the definition in RFC 3986 Section 6.1, while consisting
// of only ASCII characters. This ensures the URLs be represented correctly
// when transmitted via UTF-8 mediums, for example JSON.
auto normalized_url = tr_urlbuf{};
tr_urlPercentEncode(std::back_inserter(normalized_url), announce_url, false);
// Make sure the announce URL is usable before we intern it.
if (auto const announce = tr_urlParseTracker(announce_url); !announce || !can_add(*announce))
if (auto const announce = tr_urlParseTracker(normalized_url); !announce || !can_add(*announce))
{
return false;
}
@@ -88,7 +98,7 @@ bool tr_announce_list::add(std::string_view announce_url, tr_tracker_tier_t tier
// Parse again with the interned string so that `parsed` fields all
// point to the interned addresses. This second call should never
// fail, but check anyway to make the linter happy.
auto const announce_interned = tr_interned_string{ announce_url };
auto const announce_interned = tr_interned_string{ normalized_url.sv() };
auto const parsed = tr_urlParseTracker(announce_interned.sv());
if (!parsed)
{

View File

@@ -200,19 +200,27 @@ void tr_magnet_metainfo::set_name(std::string_view name)
void tr_magnet_metainfo::add_webseed(std::string_view webseed)
{
if (!tr_urlIsValid(webseed))
// This step allows for URLs that contain character outside the allowed set
// defined by RFC 3986. The URL we store is "equivalent" to the provided URL
// according to the definition in RFC 3986 Section 6.1, while consisting
// of only ASCII characters. This ensures the URLs be represented correctly
// when transmitted via UTF-8 mediums, for example JSON.
auto normalized = tr_urlbuf{};
tr_urlPercentEncode(std::back_inserter(normalized), webseed, false);
if (!tr_urlIsValid(normalized))
{
return;
}
auto& urls = webseed_urls_;
if (auto const it = std::ranges::find(urls, webseed); it != std::ranges::end(urls))
if (auto const it = std::ranges::find(urls, normalized.sv()); it != std::ranges::end(urls))
{
return;
}
urls.emplace_back(webseed);
urls.emplace_back(normalized.sv());
}
bool tr_magnet_metainfo::parseMagnet(std::string_view magnet_link, tr_error* error)

View File

@@ -34,7 +34,7 @@ TEST_F(AnnounceListTest, canAdd)
auto constexpr Announce = "https://example.org/announce"sv;
auto announce_list = tr_announce_list{};
EXPECT_EQ(1, announce_list.add(Announce, Tier));
EXPECT_TRUE(announce_list.add(Announce, Tier));
auto const tracker = announce_list.at(0);
EXPECT_EQ(Announce, tracker.announce.sv());
EXPECT_EQ("https://example.org/scrape"sv, tracker.scrape.sv());
@@ -44,6 +44,34 @@ TEST_F(AnnounceListTest, canAdd)
EXPECT_EQ(443, tracker.announce_parsed.port);
}
TEST_F(AnnounceListTest, canAddForeignCharset)
{
auto constexpr Tier = tr_tracker_tier_t{ 2 };
auto constexpr Announce = "udp://你好.com:6771/announce"sv;
auto constexpr AnnounceEncoded = "udp://%E4%BD%A0%E5%A5%BD.com:6771/announce"sv;
auto announce_list = tr_announce_list{};
EXPECT_TRUE(announce_list.add(Announce, Tier));
auto tracker = announce_list.at(0);
EXPECT_EQ(AnnounceEncoded, tracker.announce.sv());
EXPECT_EQ("udp://%E4%BD%A0%E5%A5%BD.com:6771/scrape"sv, tracker.scrape.sv());
EXPECT_EQ(Tier, tracker.tier);
EXPECT_EQ("%E4%BD%A0%E5%A5%BD.com", tracker.announce_parsed.host);
EXPECT_EQ("%E4%BD%A0%E5%A5%BD.com:6771"sv, tracker.announce_parsed.authority);
EXPECT_EQ(6771, tracker.announce_parsed.port);
// This ensures the URL doesn't get double-encoded
announce_list = tr_announce_list{};
EXPECT_TRUE(announce_list.add(AnnounceEncoded, Tier));
tracker = announce_list.at(0);
EXPECT_EQ(AnnounceEncoded, tracker.announce.sv());
EXPECT_EQ("udp://%E4%BD%A0%E5%A5%BD.com:6771/scrape"sv, tracker.scrape.sv());
EXPECT_EQ(Tier, tracker.tier);
EXPECT_EQ("%E4%BD%A0%E5%A5%BD.com", tracker.announce_parsed.host);
EXPECT_EQ("%E4%BD%A0%E5%A5%BD.com:6771"sv, tracker.announce_parsed.authority);
EXPECT_EQ(6771, tracker.announce_parsed.port);
}
TEST_F(AnnounceListTest, groupsSiblingsIntoSameTier)
{
auto constexpr Tier1 = tr_tracker_tier_t{ 1 };

View File

@@ -173,6 +173,31 @@ TEST_F(TorrentMetainfoTest, magnetInfoHash)
EXPECT_TRUE(tm.parse_torrent_file(src_filename));
}
TEST_F(TorrentMetainfoTest, addWebseed)
{
static auto constexpr Tests = std::array<std::pair<std::string_view, std::string_view>, 2>{ {
{ "http://www.webseed-one.com/"sv, "http://www.webseed-one.com/"sv },
{ "http://你好.com/"sv, "http://%E4%BD%A0%E5%A5%BD.com/"sv },
} };
for (auto const& [decoded, encoded] : Tests)
{
auto tm = tr_torrent_metainfo{};
tm.add_webseed(decoded);
EXPECT_EQ(1U, tm.webseed_count());
EXPECT_EQ(encoded, tm.webseed(0U));
}
// This ensures the URL doesn't get double-encoded
for (auto const& [decoded, encoded] : Tests)
{
auto tm = tr_torrent_metainfo{};
tm.add_webseed(encoded);
EXPECT_EQ(1U, tm.webseed_count());
EXPECT_EQ(encoded, tm.webseed(0U));
}
}
TEST_F(TorrentMetainfoTest, HoffmanStyleWebseeds)
{
auto const src_filename = tr_pathbuf{ LIBTRANSMISSION_TEST_ASSETS_DIR, "/debian-11.2.0-amd64-DVD-1.iso.torrent"sv };

View File

@@ -267,3 +267,26 @@ TEST_F(WebUtilsTest, urlPercentDecode)
EXPECT_EQ(decoded, tr_urlPercentDecode(encoded));
}
}
TEST_F(WebUtilsTest, urlPercentEncode)
{
static auto constexpr Tests = std::array<std::tuple<std::string_view, std::string_view, bool>, 10U>{ {
{ "192.168.202.101"sv, "192.168.202.101"sv, true },
{ "8.8.8.8"sv, "8.8.8.8"sv, true },
{ "[2001:0:0eab:dead::a0:abcd:4e]"sv, "%5B2001%3A0%3A0eab%3Adead%3A%3Aa0%3Aabcd%3A4e%5D"sv, true },
{ "你好"sv, "%E4%BD%A0%E5%A5%BD"sv, true },
{ "Letöltések"sv, "Let%C3%B6lt%C3%A9sek"sv, true },
{ "Дыскаграфія"sv, "%D0%94%D1%8B%D1%81%D0%BA%D0%B0%D0%B3%D1%80%D0%B0%D1%84%D1%96%D1%8F"sv, true },
{ "https://example.com/Letöltések"sv, "https://example.com/Let%C3%B6lt%C3%A9sek"sv, false },
{ "https://example.com/Let%C3%B6lt%C3%A9sek"sv, "https://example.com/Let%C3%B6lt%C3%A9sek"sv, false },
{ "udp://你好.com/announce"sv, "udp://%E4%BD%A0%E5%A5%BD.com/announce"sv, false },
{ "udp://%E4%BD%A0%E5%A5%BD.com/announce"sv, "udp://%E4%BD%A0%E5%A5%BD.com/announce"sv, false },
} };
for (auto const& [decoded, encoded, escape_reserved] : Tests)
{
auto buf = tr_urlbuf{};
tr_urlPercentEncode(std::back_inserter(buf), decoded, escape_reserved);
EXPECT_EQ(encoded, buf);
}
}