diff --git a/libtransmission/announce-list.cc b/libtransmission/announce-list.cc index 6f61f99a0..dbf230b95 100644 --- a/libtransmission/announce-list.cc +++ b/libtransmission/announce-list.cc @@ -14,6 +14,8 @@ #include "libtransmission/transmission.h" #include "libtransmission/announce-list.h" + +#include "tr-strbuf.h" #include "libtransmission/error.h" #include "libtransmission/quark.h" #include "libtransmission/tr-assert.h" @@ -79,8 +81,16 @@ bool tr_announce_list::replace(tr_tracker_id_t id, std::string_view announce_url bool tr_announce_list::add(std::string_view announce_url, tr_tracker_tier_t tier) { + // This step allows for URLs that contain character outside the allowed set + // defined by RFC 3986. The URL we store is "equivalent" to the provided URL + // according to the definition in RFC 3986 Section 6.1, while consisting + // of only ASCII characters. This ensures the URLs be represented correctly + // when transmitted via UTF-8 mediums, for example JSON. + auto normalized_url = tr_urlbuf{}; + tr_urlPercentEncode(std::back_inserter(normalized_url), announce_url, false); + // Make sure the announce URL is usable before we intern it. - if (auto const announce = tr_urlParseTracker(announce_url); !announce || !can_add(*announce)) + if (auto const announce = tr_urlParseTracker(normalized_url); !announce || !can_add(*announce)) { return false; } @@ -88,7 +98,7 @@ bool tr_announce_list::add(std::string_view announce_url, tr_tracker_tier_t tier // Parse again with the interned string so that `parsed` fields all // point to the interned addresses. This second call should never // fail, but check anyway to make the linter happy. - auto const announce_interned = tr_interned_string{ announce_url }; + auto const announce_interned = tr_interned_string{ normalized_url.sv() }; auto const parsed = tr_urlParseTracker(announce_interned.sv()); if (!parsed) { diff --git a/libtransmission/magnet-metainfo.cc b/libtransmission/magnet-metainfo.cc index 4a04f0afb..902e0d6de 100644 --- a/libtransmission/magnet-metainfo.cc +++ b/libtransmission/magnet-metainfo.cc @@ -200,19 +200,27 @@ void tr_magnet_metainfo::set_name(std::string_view name) void tr_magnet_metainfo::add_webseed(std::string_view webseed) { - if (!tr_urlIsValid(webseed)) + // This step allows for URLs that contain character outside the allowed set + // defined by RFC 3986. The URL we store is "equivalent" to the provided URL + // according to the definition in RFC 3986 Section 6.1, while consisting + // of only ASCII characters. This ensures the URLs be represented correctly + // when transmitted via UTF-8 mediums, for example JSON. + auto normalized = tr_urlbuf{}; + tr_urlPercentEncode(std::back_inserter(normalized), webseed, false); + + if (!tr_urlIsValid(normalized)) { return; } auto& urls = webseed_urls_; - if (auto const it = std::ranges::find(urls, webseed); it != std::ranges::end(urls)) + if (auto const it = std::ranges::find(urls, normalized.sv()); it != std::ranges::end(urls)) { return; } - urls.emplace_back(webseed); + urls.emplace_back(normalized.sv()); } bool tr_magnet_metainfo::parseMagnet(std::string_view magnet_link, tr_error* error) diff --git a/tests/libtransmission/announce-list-test.cc b/tests/libtransmission/announce-list-test.cc index c6c4f6caa..076c3c489 100644 --- a/tests/libtransmission/announce-list-test.cc +++ b/tests/libtransmission/announce-list-test.cc @@ -34,7 +34,7 @@ TEST_F(AnnounceListTest, canAdd) auto constexpr Announce = "https://example.org/announce"sv; auto announce_list = tr_announce_list{}; - EXPECT_EQ(1, announce_list.add(Announce, Tier)); + EXPECT_TRUE(announce_list.add(Announce, Tier)); auto const tracker = announce_list.at(0); EXPECT_EQ(Announce, tracker.announce.sv()); EXPECT_EQ("https://example.org/scrape"sv, tracker.scrape.sv()); @@ -44,6 +44,34 @@ TEST_F(AnnounceListTest, canAdd) EXPECT_EQ(443, tracker.announce_parsed.port); } +TEST_F(AnnounceListTest, canAddForeignCharset) +{ + auto constexpr Tier = tr_tracker_tier_t{ 2 }; + auto constexpr Announce = "udp://你好.com:6771/announce"sv; + auto constexpr AnnounceEncoded = "udp://%E4%BD%A0%E5%A5%BD.com:6771/announce"sv; + + auto announce_list = tr_announce_list{}; + EXPECT_TRUE(announce_list.add(Announce, Tier)); + auto tracker = announce_list.at(0); + EXPECT_EQ(AnnounceEncoded, tracker.announce.sv()); + EXPECT_EQ("udp://%E4%BD%A0%E5%A5%BD.com:6771/scrape"sv, tracker.scrape.sv()); + EXPECT_EQ(Tier, tracker.tier); + EXPECT_EQ("%E4%BD%A0%E5%A5%BD.com", tracker.announce_parsed.host); + EXPECT_EQ("%E4%BD%A0%E5%A5%BD.com:6771"sv, tracker.announce_parsed.authority); + EXPECT_EQ(6771, tracker.announce_parsed.port); + + // This ensures the URL doesn't get double-encoded + announce_list = tr_announce_list{}; + EXPECT_TRUE(announce_list.add(AnnounceEncoded, Tier)); + tracker = announce_list.at(0); + EXPECT_EQ(AnnounceEncoded, tracker.announce.sv()); + EXPECT_EQ("udp://%E4%BD%A0%E5%A5%BD.com:6771/scrape"sv, tracker.scrape.sv()); + EXPECT_EQ(Tier, tracker.tier); + EXPECT_EQ("%E4%BD%A0%E5%A5%BD.com", tracker.announce_parsed.host); + EXPECT_EQ("%E4%BD%A0%E5%A5%BD.com:6771"sv, tracker.announce_parsed.authority); + EXPECT_EQ(6771, tracker.announce_parsed.port); +} + TEST_F(AnnounceListTest, groupsSiblingsIntoSameTier) { auto constexpr Tier1 = tr_tracker_tier_t{ 1 }; diff --git a/tests/libtransmission/torrent-metainfo-test.cc b/tests/libtransmission/torrent-metainfo-test.cc index 20dfc30fb..109c503af 100644 --- a/tests/libtransmission/torrent-metainfo-test.cc +++ b/tests/libtransmission/torrent-metainfo-test.cc @@ -173,6 +173,31 @@ TEST_F(TorrentMetainfoTest, magnetInfoHash) EXPECT_TRUE(tm.parse_torrent_file(src_filename)); } +TEST_F(TorrentMetainfoTest, addWebseed) +{ + static auto constexpr Tests = std::array, 2>{ { + { "http://www.webseed-one.com/"sv, "http://www.webseed-one.com/"sv }, + { "http://你好.com/"sv, "http://%E4%BD%A0%E5%A5%BD.com/"sv }, + } }; + + for (auto const& [decoded, encoded] : Tests) + { + auto tm = tr_torrent_metainfo{}; + tm.add_webseed(decoded); + EXPECT_EQ(1U, tm.webseed_count()); + EXPECT_EQ(encoded, tm.webseed(0U)); + } + + // This ensures the URL doesn't get double-encoded + for (auto const& [decoded, encoded] : Tests) + { + auto tm = tr_torrent_metainfo{}; + tm.add_webseed(encoded); + EXPECT_EQ(1U, tm.webseed_count()); + EXPECT_EQ(encoded, tm.webseed(0U)); + } +} + TEST_F(TorrentMetainfoTest, HoffmanStyleWebseeds) { auto const src_filename = tr_pathbuf{ LIBTRANSMISSION_TEST_ASSETS_DIR, "/debian-11.2.0-amd64-DVD-1.iso.torrent"sv }; diff --git a/tests/libtransmission/web-utils-test.cc b/tests/libtransmission/web-utils-test.cc index d6cdced89..40789fc98 100644 --- a/tests/libtransmission/web-utils-test.cc +++ b/tests/libtransmission/web-utils-test.cc @@ -267,3 +267,26 @@ TEST_F(WebUtilsTest, urlPercentDecode) EXPECT_EQ(decoded, tr_urlPercentDecode(encoded)); } } + +TEST_F(WebUtilsTest, urlPercentEncode) +{ + static auto constexpr Tests = std::array, 10U>{ { + { "192.168.202.101"sv, "192.168.202.101"sv, true }, + { "8.8.8.8"sv, "8.8.8.8"sv, true }, + { "[2001:0:0eab:dead::a0:abcd:4e]"sv, "%5B2001%3A0%3A0eab%3Adead%3A%3Aa0%3Aabcd%3A4e%5D"sv, true }, + { "你好"sv, "%E4%BD%A0%E5%A5%BD"sv, true }, + { "Letöltések"sv, "Let%C3%B6lt%C3%A9sek"sv, true }, + { "Дыскаграфія"sv, "%D0%94%D1%8B%D1%81%D0%BA%D0%B0%D0%B3%D1%80%D0%B0%D1%84%D1%96%D1%8F"sv, true }, + { "https://example.com/Letöltések"sv, "https://example.com/Let%C3%B6lt%C3%A9sek"sv, false }, + { "https://example.com/Let%C3%B6lt%C3%A9sek"sv, "https://example.com/Let%C3%B6lt%C3%A9sek"sv, false }, + { "udp://你好.com/announce"sv, "udp://%E4%BD%A0%E5%A5%BD.com/announce"sv, false }, + { "udp://%E4%BD%A0%E5%A5%BD.com/announce"sv, "udp://%E4%BD%A0%E5%A5%BD.com/announce"sv, false }, + } }; + + for (auto const& [decoded, encoded, escape_reserved] : Tests) + { + auto buf = tr_urlbuf{}; + tr_urlPercentEncode(std::back_inserter(buf), decoded, escape_reserved); + EXPECT_EQ(encoded, buf); + } +}