From 06f5a42a7fb6877e9a5834683055d60dc70e88bf Mon Sep 17 00:00:00 2001 From: Rob Gill Date: Sun, 9 Nov 2025 14:30:59 +1000 Subject: [PATCH] gravity update - silently discard unicode BOM if present Some adlists in UTF-8 may contain unnecessary unicode BOM at the start of the file. This change silently discards these, if present, instead of flagging them as non-domain entries. Signed-off-by: Rob Gill --- src/tools/gravity-parseList.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/tools/gravity-parseList.c b/src/tools/gravity-parseList.c index 5e543451..5375372e 100644 --- a/src/tools/gravity-parseList.c +++ b/src/tools/gravity-parseList.c @@ -272,6 +272,18 @@ int gravity_parseList(const char *infile, const char *outfile, const char *adlis unsigned int exact_domains = 0, abp_domains = 0, invalid_domains = 0; while((read = getline(&line, &len, fpin)) != -1) { + + // Handle UTF-8 BOM (Byte Order Mark) if present at start of file + if (read >= 3 && + (unsigned char)line[0] == 0xEF && + (unsigned char)line[1] == 0xBB && + (unsigned char)line[2] == 0xBF) + { + // Shift line contents left by 3 bytes to remove BOM + memmove(line, line + 3, read - 3); + read -= 3; + } + // Update total read bytes total_read += read; lineno++;