mirror of
https://github.com/transmission/transmission.git
synced 2026-02-15 07:26:49 +00:00
* test: add benc2cpp.py, a benc beautifier for hardcoded cpp test cases * test: add .resume file unit test * refactor: use api_compat::convert_incoming_data() and convert_outgoing_data() on .resume files * chore: mark TR_KEY_peers2_6_kebab as APICOMPAT * chore: mark TR_KEY_speed_Bps_kebab as APICOMPAT * chore: mark TR_KEY_use_speed_limit_kebab as APICOMPAT * chore: mark as APICOMPAT: TR_KEY_use_global_speed_limit_kebab * chore: mark as APICOMPAT: TR_KEY_ratio_mode_kebab * chore: mark as APICOMPAT: TR_KEY_idle_limit_kebab * chore: mark as APICOMPAT: TR_KEY_idle_mode_kebab * chore: mark as APICOMPAT: TR_KEY_max_peers_kebab * chore: mark as APICOMPAT: TR_KEY_added_date_kebab * chore: mark as APICOMPAT: TR_KEY_seeding_time_seconds_kebab * chore: mark as APICOMPAT: TR_KEY_downloading_time_seconds_kebab * chore: mark as APICOMPAT: TR_KEY_bandwidth_priority * chore: mark as APICOMPAT: TR_KEY_done_date_kebab * chore: mark as APICOMPAT: TR_KEY_activity_date_kebab * chore: remove remaining _kebab cases from resume.cc * chore: clang-format
264 lines
7.5 KiB
Python
264 lines
7.5 KiB
Python
#!/usr/bin/env python3
|
|
#
|
|
# Created by GitHub Copilot (GPT-5.2 (Preview)).
|
|
#
|
|
# License: Same terms as Transmission itself (see COPYING). Transmission
|
|
# permits redistribution/modification under GNU GPLv2, GPLv3, or any future
|
|
# license endorsed by Mnemosyne LLC.
|
|
#
|
|
# Purpose:
|
|
# Convert a bencoded (benc) file into a C++ concatenated string-literal
|
|
# fragment that preserves the exact original bytes. Output is whitespace-only
|
|
# formatted for readability (4-space indentation), similar in spirit to
|
|
# pretty-printed JSON.
|
|
#
|
|
# Usage:
|
|
# tests/assets/benc2cpp.py path/to/file.benc > out.cppfrag
|
|
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
def bytes_to_cpp_string_literal(data: bytes) -> str:
|
|
r"""Return a single C++ string literal token for arbitrary bytes.
|
|
|
|
Uses normal (non-raw) string literals and emits \xNN for bytes that are not
|
|
safe/pleasant as-is.
|
|
"""
|
|
|
|
out = '"'
|
|
prev_was_hex_escape = False
|
|
for b in data:
|
|
ch = chr(b)
|
|
|
|
# C/C++ rule: \x escapes consume *all following hex digits*.
|
|
# If we emit "\xNN" and then a literal '0'..'9'/'a'..'f'/'A'..'F',
|
|
# it becomes a single (larger) hex escape and may fail to compile.
|
|
if (
|
|
prev_was_hex_escape
|
|
and (
|
|
(ord('0') <= b <= ord('9'))
|
|
or (ord('a') <= b <= ord('f'))
|
|
or (ord('A') <= b <= ord('F'))
|
|
)
|
|
):
|
|
out += f"\\x{b:02x}"
|
|
prev_was_hex_escape = True
|
|
continue
|
|
|
|
if ch == "\\":
|
|
out += r"\\\\"
|
|
prev_was_hex_escape = False
|
|
elif ch == '"':
|
|
out += r"\\\""
|
|
prev_was_hex_escape = False
|
|
elif 0x20 <= b <= 0x7E:
|
|
out += ch
|
|
prev_was_hex_escape = False
|
|
else:
|
|
out += f"\\x{b:02x}"
|
|
prev_was_hex_escape = True
|
|
out += '"'
|
|
return out
|
|
|
|
|
|
def bencode_tokenize(data: bytes) -> list[bytes]:
|
|
r"""Tokenize bencode into syntactic units without changing bytes.
|
|
|
|
Tokens are:
|
|
- b"d", b"l", b"e"
|
|
- b"i...e" (entire integer token)
|
|
- b"<len>:<payload>" (entire string token, including length and colon)
|
|
|
|
This is a tokenizer only. It assumes the input is valid bencode.
|
|
"""
|
|
|
|
tokens: list[bytes] = []
|
|
i = 0
|
|
n = len(data)
|
|
|
|
def need(cond: bool, msg: str) -> None:
|
|
if not cond:
|
|
raise ValueError(f"Invalid bencode at offset {i}: {msg}")
|
|
|
|
while i < n:
|
|
b = data[i]
|
|
|
|
if b in (ord('d'), ord('l'), ord('e')):
|
|
tokens.append(bytes([b]))
|
|
i += 1
|
|
continue
|
|
|
|
if b == ord('i'):
|
|
j = data.find(b'e', i + 1)
|
|
need(j != -1, "unterminated integer")
|
|
tokens.append(data[i:j + 1])
|
|
i = j + 1
|
|
continue
|
|
|
|
if ord('0') <= b <= ord('9'):
|
|
j = i
|
|
while j < n and ord('0') <= data[j] <= ord('9'):
|
|
j += 1
|
|
need(j < n and data[j] == ord(':'), "string length missing colon")
|
|
strlen = int(data[i:j].decode('ascii'))
|
|
start = j + 1
|
|
end = start + strlen
|
|
need(end <= n, "string payload truncated")
|
|
tokens.append(data[i:end])
|
|
i = end
|
|
continue
|
|
|
|
msg = f"Invalid bencode at offset {i}: unexpected byte 0x{b:02x}"
|
|
raise ValueError(msg)
|
|
|
|
return tokens
|
|
|
|
|
|
def render_bencode_tokens_pretty(
|
|
tokens: list[bytes],
|
|
*,
|
|
base_indent: int = 4,
|
|
indent_step: int = 4,
|
|
) -> list[str]:
|
|
"""Render bencode tokens into indented C++ string literal lines.
|
|
|
|
Whitespace-only pretty-printing rules:
|
|
- One token per line by default.
|
|
- For dictionaries, if a key's value is a scalar (string or integer),
|
|
render the key and value on the same line separated by a space.
|
|
|
|
This changes only whitespace between C string fragments; the concatenated
|
|
bytes are identical to the input.
|
|
"""
|
|
|
|
lines: list[str] = []
|
|
|
|
# Stack entries are either:
|
|
# ('list', None)
|
|
# ('dict', expecting_key: bool)
|
|
stack: list[tuple[str, bool | None]] = []
|
|
pending_dict_key: bytes | None = None
|
|
|
|
def depth() -> int:
|
|
return len(stack)
|
|
|
|
def indent() -> str:
|
|
return ' ' * (base_indent + depth() * indent_step)
|
|
|
|
def is_scalar_token(t: bytes) -> bool:
|
|
return t.startswith(b'i') or (t[:1].isdigit())
|
|
|
|
i = 0
|
|
while i < len(tokens):
|
|
tok = tokens[i]
|
|
|
|
if tok == b'e':
|
|
if pending_dict_key is not None:
|
|
key_lit = bytes_to_cpp_string_literal(pending_dict_key)
|
|
lines.append(indent() + key_lit)
|
|
pending_dict_key = None
|
|
|
|
if stack:
|
|
stack.pop()
|
|
|
|
lines.append(indent() + bytes_to_cpp_string_literal(tok))
|
|
|
|
# If this closed a value container in a dict,
|
|
# the parent dict is now ready for next key.
|
|
if stack and stack[-1][0] == 'dict' and stack[-1][1] is False:
|
|
stack[-1] = ('dict', True)
|
|
|
|
i += 1
|
|
continue
|
|
|
|
# Dict key collection
|
|
if stack and stack[-1][0] == 'dict' and stack[-1][1] is True:
|
|
pending_dict_key = tok
|
|
stack[-1] = ('dict', False)
|
|
i += 1
|
|
continue
|
|
|
|
# Dict value emission
|
|
is_dict_value = (
|
|
stack
|
|
and stack[-1][0] == 'dict'
|
|
and stack[-1][1] is False
|
|
and pending_dict_key is not None
|
|
)
|
|
if is_dict_value:
|
|
if is_scalar_token(tok):
|
|
lines.append(
|
|
indent()
|
|
+ bytes_to_cpp_string_literal(pending_dict_key)
|
|
+ ' '
|
|
+ bytes_to_cpp_string_literal(tok)
|
|
)
|
|
pending_dict_key = None
|
|
stack[-1] = ('dict', True)
|
|
i += 1
|
|
continue
|
|
|
|
# Non-scalar (container) value: key on its own line, then container
|
|
# token.
|
|
key_lit = bytes_to_cpp_string_literal(pending_dict_key)
|
|
lines.append(indent() + key_lit)
|
|
pending_dict_key = None
|
|
|
|
lines.append(indent() + bytes_to_cpp_string_literal(tok))
|
|
if tok == b'd':
|
|
stack.append(('dict', True))
|
|
elif tok == b'l':
|
|
stack.append(('list', None))
|
|
else:
|
|
stack[-1] = ('dict', True)
|
|
|
|
i += 1
|
|
continue
|
|
|
|
# Default emission
|
|
lines.append(indent() + bytes_to_cpp_string_literal(tok))
|
|
if tok == b'd':
|
|
stack.append(('dict', True))
|
|
elif tok == b'l':
|
|
stack.append(('list', None))
|
|
|
|
i += 1
|
|
|
|
if pending_dict_key is not None:
|
|
lines.append(indent() + bytes_to_cpp_string_literal(pending_dict_key))
|
|
|
|
return lines
|
|
|
|
|
|
def main(argv: list[str]) -> int:
|
|
if len(argv) != 2:
|
|
sys.stderr.write(f"Usage: {Path(argv[0]).name} path/to/file.benc\n")
|
|
return 2
|
|
|
|
in_path = Path(argv[1])
|
|
data = in_path.read_bytes()
|
|
|
|
tokens = bencode_tokenize(data)
|
|
pretty_lines = render_bencode_tokens_pretty(tokens)
|
|
|
|
sys.stdout.write("// clang-format off\n")
|
|
sys.stdout.write("constexpr std::string_view Benc =\n")
|
|
if not pretty_lines:
|
|
sys.stdout.write(" \"\";\n")
|
|
else:
|
|
for line in pretty_lines[:-1]:
|
|
sys.stdout.write(line)
|
|
sys.stdout.write("\n")
|
|
sys.stdout.write(pretty_lines[-1])
|
|
sys.stdout.write(";\n")
|
|
sys.stdout.write("// clang-format on\n")
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main(sys.argv))
|