From 47c587926e96e5c6c13bfd2789800a4d312decbf Mon Sep 17 00:00:00 2001 From: Ross Healy Date: Wed, 7 Feb 2024 19:26:59 +0000 Subject: [PATCH] URL Extractor from information source. --- lottery-co-uk-scraper/Program.cs | 17 +++++++++-- .../Utilities/URLExtractor.cs | 28 +++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 lottery-co-uk-scraper/Utilities/URLExtractor.cs diff --git a/lottery-co-uk-scraper/Program.cs b/lottery-co-uk-scraper/Program.cs index d464722..7a94908 100644 --- a/lottery-co-uk-scraper/Program.cs +++ b/lottery-co-uk-scraper/Program.cs @@ -1,4 +1,5 @@ using lottery_co_uk_scraper.NationalLottery; +using lottery_co_uk_scraper.Utilities; namespace lottery_co_uk_scraper { @@ -7,9 +8,21 @@ namespace lottery_co_uk_scraper static async Task Main() { using HttpClient client = new(); - string url = ""; - await Lotto.GetLottoNumbers(url, client); + List urls = + [ + "", + "", + ]; + + foreach (string url in urls) + { + List extractedUrls = await URLExtractor.ExtractUrlsAsync(url); + foreach (string extractedUrl in extractedUrls) + { + await Lotto.GetLottoNumbers(url, client); + } + } } } } \ No newline at end of file diff --git a/lottery-co-uk-scraper/Utilities/URLExtractor.cs b/lottery-co-uk-scraper/Utilities/URLExtractor.cs new file mode 100644 index 0000000..6f12585 --- /dev/null +++ b/lottery-co-uk-scraper/Utilities/URLExtractor.cs @@ -0,0 +1,28 @@ +using System.Text.RegularExpressions; + +namespace lottery_co_uk_scraper.Utilities +{ + internal partial class URLExtractor + { + public static async Task> ExtractUrlsAsync(string url) + { + List urls = []; + + using (HttpClient client = new()) + { + string content = await client.GetStringAsync(url); + MatchCollection matches = MyRegex().Matches(content); + + foreach (Match match in matches.Cast()) + { + urls.Add(match.Value); + } + } + + return urls; + } + + [GeneratedRegex(@"(?<=