URL Extractor from information source.

2024-02-07 19:26:59 +00:00
parent 9106dd85d7
commit 47c587926e
2 changed files with 43 additions and 2 deletions
--- a/lottery-co-uk-scraper/Program.cs
+++ b/lottery-co-uk-scraper/Program.cs
@@ -1,4 +1,5 @@
 using lottery_co_uk_scraper.NationalLottery;
+using lottery_co_uk_scraper.Utilities;

 namespace lottery_co_uk_scraper
 {
@@ -7,9 +8,21 @@ namespace lottery_co_uk_scraper
        static async Task Main()
        {
            using HttpClient client = new();
-            string url = "";

+            List<string> urls =
+            [
+                "",
+                "",
+            ];
+
+            foreach (string url in urls)
+            {
+                List<string> extractedUrls = await URLExtractor.ExtractUrlsAsync(url);
+                foreach (string extractedUrl in extractedUrls)
+                {
                    await Lotto.GetLottoNumbers(url, client);
                }
            }            
        }
+    }
+}
--- a/lottery-co-uk-scraper/Utilities/URLExtractor.cs
+++ b/lottery-co-uk-scraper/Utilities/URLExtractor.cs
@@ -0,0 +1,28 @@
+using System.Text.RegularExpressions;
+
+namespace lottery_co_uk_scraper.Utilities
+{
+    internal partial class URLExtractor
+    {
+        public static async Task<List<string>> ExtractUrlsAsync(string url)
+        {
+            List<string> urls = [];
+
+            using (HttpClient client = new())
+            {
+                string content = await client.GetStringAsync(url);
+                MatchCollection matches = MyRegex().Matches(content);
+
+                foreach (Match match in matches.Cast<Match>())
+                {
+                    urls.Add(match.Value);
+                }
+            }
+
+            return urls;
+        }
+
+        [GeneratedRegex(@"(?<=<a href=""https:\/\/www\.lottery\.co\.uk\/lotto\/results-)[^\s""']+")]
+        private static partial Regex MyRegex();
+    }
+}