From 92faf195494841fc44588cf0c6958eaf7c3f8761 Mon Sep 17 00:00:00 2001 From: Ross Healy Date: Wed, 7 Feb 2024 19:54:34 +0000 Subject: [PATCH] Updated extractor, omitted urls. --- lottery-co-uk-scraper/Utilities/URLExtractor.cs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/lottery-co-uk-scraper/Utilities/URLExtractor.cs b/lottery-co-uk-scraper/Utilities/URLExtractor.cs index 6f12585..5a9a52b 100644 --- a/lottery-co-uk-scraper/Utilities/URLExtractor.cs +++ b/lottery-co-uk-scraper/Utilities/URLExtractor.cs @@ -6,23 +6,29 @@ namespace lottery_co_uk_scraper.Utilities { public static async Task> ExtractUrlsAsync(string url) { - List urls = []; + List urls = new List(); - using (HttpClient client = new()) + using (HttpClient client = new HttpClient()) { string content = await client.GetStringAsync(url); MatchCollection matches = MyRegex().Matches(content); - foreach (Match match in matches.Cast()) + foreach (Match match in matches) { - urls.Add(match.Value); + string capturedUrl = match.Groups[1].Value; + + if (capturedUrl.StartsWith("")) + { + string modifiedUrl = "" + capturedUrl; + urls.Add(modifiedUrl); + } } } return urls; } - [GeneratedRegex(@"(?<=