refactor: add tr_strv_to_utf8_nsstring() (#8174)

* refactor: rename enum types in ip-cache private enum class is_updating_t

this avoids name collisions with YES and NO on macOS

* refactor: rename tr_strv_convert_utf8() to tr_strv_to_utf8_string()

use a name that is symmetrical with the upcoming tr_strv_to_utf8_nsstring()

* feat: add tr_strv_to_utf8_nsstring()

* test: add tests for the new tr_strv_to_utf8_nsstring() methods
This commit is contained in:
Charles Kerr
2026-01-21 08:25:07 -06:00
committed by GitHub
parent 83d49b3c7f
commit 7f5730984a
11 changed files with 171 additions and 35 deletions

View File

@@ -170,7 +170,7 @@ tr_ip_cache::~tr_ip_cache()
if (!std::all_of(
std::begin(is_updating_),
std::end(is_updating_),
[](is_updating_t const& v) { return v == is_updating_t::ABORT; }))
[](is_updating_t const& v) { return v == is_updating_t::Abort; }))
{
tr_logAddDebug("Destructed while some global IP queries were pending.");
}
@@ -185,11 +185,11 @@ bool tr_ip_cache::try_shutdown() noexcept
for (std::size_t i = 0; i < NUM_TR_AF_INET_TYPES; ++i)
{
if (is_updating_[i] == is_updating_t::YES)
if (is_updating_[i] == is_updating_t::Yes)
{
return false;
}
is_updating_[i] = is_updating_t::ABORT; // Abort any future updates
is_updating_[i] = is_updating_t::Abort; // Abort any future updates
}
return true;
}
@@ -244,7 +244,7 @@ void tr_ip_cache::update_global_addr(tr_address_type type) noexcept
{
return;
}
TR_ASSERT(is_updating_[type] == is_updating_t::YES);
TR_ASSERT(is_updating_[type] == is_updating_t::Yes);
// Update global address
static auto constexpr IPProtocolMap = std::array{
@@ -279,7 +279,7 @@ void tr_ip_cache::update_source_addr(tr_address_type type) noexcept
{
return;
}
TR_ASSERT(is_updating_[type] == is_updating_t::YES);
TR_ASSERT(is_updating_[type] == is_updating_t::Yes);
auto const protocol = tr_ip_protocol_to_sv(type);
auto err = 0;
@@ -321,7 +321,7 @@ void tr_ip_cache::update_source_addr(tr_address_type type) noexcept
void tr_ip_cache::on_response_ip_query(tr_address_type type, tr_web::FetchResponse const& response) noexcept
{
auto& ix_service = ix_service_[type];
TR_ASSERT(is_updating_[type] == is_updating_t::YES);
TR_ASSERT(is_updating_[type] == is_updating_t::Yes);
TR_ASSERT(ix_service < std::size(IPQueryServices[type]));
auto const protocol = tr_ip_protocol_to_sv(type);
@@ -398,16 +398,16 @@ void tr_ip_cache::unset_addr(tr_address_type type) noexcept
bool tr_ip_cache::set_is_updating(tr_address_type type) noexcept
{
if (is_updating_[type] != is_updating_t::NO)
if (is_updating_[type] != is_updating_t::No)
{
return false;
}
is_updating_[type] = is_updating_t::YES;
is_updating_[type] = is_updating_t::Yes;
return true;
}
void tr_ip_cache::unset_is_updating(tr_address_type type) noexcept
{
TR_ASSERT(is_updating_[type] == is_updating_t::YES);
is_updating_[type] = is_updating_t::NO;
TR_ASSERT(is_updating_[type] == is_updating_t::Yes);
is_updating_[type] = is_updating_t::No;
}

View File

@@ -119,9 +119,9 @@ private:
enum class is_updating_t : uint8_t
{
NO = 0,
YES,
ABORT
No = 0,
Yes,
Abort
};
array_ip_t<is_updating_t> is_updating_ = {};

View File

@@ -195,7 +195,7 @@ std::string tr_magnet_metainfo::magnet() const
void tr_magnet_metainfo::set_name(std::string_view name)
{
name_ = tr_strv_convert_utf8(name);
name_ = tr_strv_to_utf8_string(name);
}
void tr_magnet_metainfo::add_webseed(std::string_view webseed)

View File

@@ -1320,7 +1320,7 @@ void tr_peerMsgsImpl::parse_ltep_handshake(MessageReader& payload)
// peer id encoding.
if (auto const sv = map->value_if<std::string_view>(TR_KEY_v))
{
set_user_agent(tr_interned_string{ tr_strv_convert_utf8(*sv) });
set_user_agent(tr_interned_string{ tr_strv_to_utf8_string(*sv) });
}
// https://www.bittorrent.org/beps/bep_0010.html

View File

@@ -329,11 +329,11 @@ struct MetainfoHandler final : public transmission::benc::BasicHandler<MaxBencDe
}
else if (pathIs(CommentKey) || pathIs(CommentUtf8Key))
{
tm_.comment_ = tr_strv_convert_utf8(value);
tm_.comment_ = tr_strv_to_utf8_string(value);
}
else if (pathIs(CreatedByKey) || pathIs(CreatedByUtf8Key))
{
tm_.creator_ = tr_strv_convert_utf8(value);
tm_.creator_ = tr_strv_to_utf8_string(value);
}
else if (
pathIs(SourceKey) || pathIs(InfoKey, SourceKey) || //
@@ -344,7 +344,7 @@ struct MetainfoHandler final : public transmission::benc::BasicHandler<MaxBencDe
// to have the same use as the 'source' key
// http://wiki.bitcomet.com/inside_bitcomet
tm_.source_ = tr_strv_convert_utf8(value);
tm_.source_ = tr_strv_to_utf8_string(value);
}
else if (pathIs(AnnounceKey))
{

View File

@@ -297,7 +297,7 @@ double tr_getRatio(uint64_t numerator, uint64_t denominator)
#if !(defined(__APPLE__) && defined(__clang__))
std::string tr_strv_convert_utf8(std::string_view sv)
std::string tr_strv_to_utf8_string(std::string_view sv)
{
return tr_strv_replace_invalid(sv);
}

View File

@@ -192,7 +192,16 @@ constexpr bool tr_strv_sep(std::string_view* sv, std::string_view* token, Args&&
[[nodiscard]] std::string_view tr_strv_strip(std::string_view str);
[[nodiscard]] std::string tr_strv_convert_utf8(std::string_view sv);
[[nodiscard]] std::string tr_strv_to_utf8_string(std::string_view sv);
#ifdef __APPLE__
#ifdef __OBJC__
@class NSString;
[[nodiscard]] std::string tr_strv_to_utf8_string(NSString* str);
[[nodiscard]] NSString* tr_strv_to_utf8_nsstring(std::string_view sv);
[[nodiscard]] NSString* tr_strv_to_utf8_nsstring(std::string_view sv, NSString* key, NSString* comment);
#endif
#endif
[[nodiscard]] std::string tr_strv_replace_invalid(std::string_view sv, uint32_t replacement = 0xFFFD /*<2A>*/);

View File

@@ -10,44 +10,60 @@
#include "libtransmission/utils.h"
// macOS implementation of tr_strv_convert_utf8() that autodetects the encoding.
// macOS implementation of tr_strv_to_utf8_string() that autodetects the encoding.
// This replaces the generic implementation of the function in utils.cc.
std::string tr_strv_convert_utf8(std::string_view sv)
std::string tr_strv_to_utf8_string(std::string_view sv)
{
// local pool for non-app tools like transmission-daemon, transmission-remote, transmission-create, ...
@autoreleasepool
{
// UTF-8 encoding
NSString* const utf8 = [[NSString alloc] initWithBytes:std::data(sv) length:std::size(sv) encoding:NSUTF8StringEncoding];
if (utf8 != nil)
if (utf8 != nil && utf8.UTF8String != nullptr)
{
return std::string{ utf8.UTF8String };
return tr_strv_to_utf8_string(utf8);
}
// autodetection of the encoding (#3434)
// Try to make a UTF8 string from the detected encoding.
// 1. Disallow lossy conversion in this step. Lossy conversion
// is done as last resort later in `tr_strv_replace_invalid()`.
// 2. We only provide the likely language. If we also supplied
// suggested encodings, the first one listed could override the
// others (e.g. cp932 vs cp866).
NSString* convertedString;
NSStringEncoding stringEncoding = [NSString
stringEncodingForData:[NSData dataWithBytes:std::data(sv) length:std::size(sv)]
encodingOptions:@{
// We disallow lossy conversion, and will leave it to `utf8::unchecked::replace_invalid`.
NSStringEncodingDetectionAllowLossyKey : @NO,
// We only set the likely language.
// If we were to set suggested encodings, then whatever is listed first would take precedence on all others, making for instance kCFStringEncodingDOSJapanese (cp932) and kCFStringEncodingDOSRussian (cp866) taking priority on each other.
NSStringEncodingDetectionLikelyLanguageKey : NSLocale.currentLocale.languageCode
}
convertedString:&convertedString
usedLossyConversion:nil];
if (stringEncoding)
if (stringEncoding && convertedString != nil && convertedString.UTF8String != nullptr)
{
if (convertedString.UTF8String != nullptr)
{
return std::string{ convertedString.UTF8String };
}
return tr_strv_to_utf8_string(convertedString);
}
// invalid encoding
return tr_strv_replace_invalid(sv);
}
}
std::string tr_strv_to_utf8_string(NSString* str)
{
return std::string{ str.UTF8String };
}
NSString* tr_strv_to_utf8_nsstring(std::string_view const sv)
{
NSString* str = [[NSString alloc] initWithBytes:std::data(sv) length:std::size(sv) encoding:NSUTF8StringEncoding];
return str ?: @"";
}
NSString* tr_strv_to_utf8_nsstring(std::string_view const sv, NSString* key, NSString* comment)
{
NSString* str = [[NSString alloc] initWithBytes:std::data(sv) length:std::size(sv) encoding:NSUTF8StringEncoding];
return str ?: NSLocalizedString(key, comment);
}

View File

@@ -60,6 +60,12 @@ target_sources(libtransmission-test
watchdir-test.cc
web-utils-test.cc)
if(APPLE)
target_sources(libtransmission-test
PRIVATE
utils-apple-test.mm)
endif()
set_property(
TARGET libtransmission-test
PROPERTY FOLDER "tests")

View File

@@ -0,0 +1,105 @@
// This file Copyright © Mnemosyne LLC.
// It may be used under GPLv2 (SPDX: GPL-2.0-only), GPLv3 (SPDX: GPL-3.0-only),
// or any future license endorsed by Mnemosyne LLC.
// License text can be found in the licenses/ folder.
#import <Foundation/Foundation.h>
#include <algorithm>
#include <string_view>
#include <libtransmission/utils.h>
#include "gtest/gtest.h"
#include "test-fixtures.h"
using UtilsTest = ::libtransmission::test::TransmissionTest;
using namespace std::literals;
namespace
{
[[nodiscard]] constexpr size_t count_replacement_char(std::string_view const sv)
{
size_t count = 0;
constexpr auto needle = "\xEF\xBF\xBD"sv; // U+FFFD replacement
auto pos = std::string_view::size_type{};
while ((pos = sv.find(needle, pos)) != std::string_view::npos)
{
++count;
pos += std::size(needle);
}
return count;
}
[[nodiscard]] constexpr bool has_non_ascii(std::string_view const sv)
{
return std::any_of(std::begin(sv), std::end(sv), [](unsigned char ch) { return ch >= 0x80; });
}
} // namespace
TEST_F(UtilsTest, trStrvToUtf8NsstringValid)
{
@autoreleasepool
{
NSString* str = tr_strv_to_utf8_nsstring("hello"sv);
EXPECT_TRUE([str isEqualToString:@"hello"]);
}
}
TEST_F(UtilsTest, trStrvToUtf8NsstringInvalid)
{
@autoreleasepool
{
constexpr auto bad = "\xF4\x33\x81\x82"sv;
NSString* str = tr_strv_to_utf8_nsstring(bad);
EXPECT_TRUE([str isEqualToString:@""]);
}
}
TEST_F(UtilsTest, trStrvToUtf8NsstringFallback)
{
@autoreleasepool
{
constexpr auto bad = "\xF4\x33\x81\x82"sv;
NSString* const key = @"tr.strv.to.utf8.fallback";
NSString* const comment = @"fallback string for tests";
NSString* str = tr_strv_to_utf8_nsstring(bad, key, comment);
EXPECT_TRUE([str isEqualToString:key]);
}
}
TEST_F(UtilsTest, trStrvToUtf8StringMixedInvalid)
{
constexpr auto input = "hello \xF0\x28\x8C\x28 world"sv;
auto const out = tr_strv_to_utf8_string(input);
EXPECT_FALSE(out.empty());
EXPECT_EQ(out, tr_strv_replace_invalid(out));
EXPECT_EQ(out, tr_strv_to_utf8_string(out));
}
TEST_F(UtilsTest, trStrvToUtf8StringAutodetectImproves)
{
// Shift_JIS-encoded filename from a real-world report in
// https://github.com/transmission/transmission/pull/5244#issuecomment-1474442137
constexpr auto input =
"\x93\xC1\x96\xBD\x8C\x57\x92\xB7\x81\x45\x91\xFC\x96\xEC\x90\x6D"
"\x83\x8A\x83\x5E\x81\x5B\x83\x93\x83\x59 (D-ABC 704x396 DivX511).avi"sv;
auto const replace_only = tr_strv_replace_invalid(input);
auto const autodetect = tr_strv_to_utf8_string(input);
EXPECT_FALSE(autodetect.empty());
EXPECT_EQ(autodetect, tr_strv_replace_invalid(autodetect));
// Autodetect should preserve more readable text than replacement-only.
EXPECT_LT(count_replacement_char(autodetect), count_replacement_char(replace_only));
// If autodetect improves, it should yield valid UTF-8 with real non-ASCII characters.
if (count_replacement_char(autodetect) < count_replacement_char(replace_only))
{
EXPECT_EQ(0U, count_replacement_char(autodetect));
EXPECT_TRUE(has_non_ascii(autodetect));
}
}

View File

@@ -210,8 +210,8 @@ TEST_F(UtilsTest, strvConvertUtf8Fuzz)
{
buf.resize(tr_rand_int(4096U));
tr_rand_buffer(std::data(buf), std::size(buf));
auto const out = tr_strv_convert_utf8({ std::data(buf), std::size(buf) });
EXPECT_EQ(out, tr_strv_convert_utf8(out));
auto const out = tr_strv_to_utf8_string({ std::data(buf), std::size(buf) });
EXPECT_EQ(out, tr_strv_to_utf8_string(out));
}
}