URL Extractor from information source.

This commit is contained in:
Ross Healy
2024-02-07 19:26:59 +00:00
parent 9106dd85d7
commit 47c587926e
2 changed files with 43 additions and 2 deletions

View File

@@ -1,4 +1,5 @@
using lottery_co_uk_scraper.NationalLottery; using lottery_co_uk_scraper.NationalLottery;
using lottery_co_uk_scraper.Utilities;
namespace lottery_co_uk_scraper namespace lottery_co_uk_scraper
{ {
@@ -7,9 +8,21 @@ namespace lottery_co_uk_scraper
static async Task Main() static async Task Main()
{ {
using HttpClient client = new(); using HttpClient client = new();
string url = "";
List<string> urls =
[
"",
"",
];
foreach (string url in urls)
{
List<string> extractedUrls = await URLExtractor.ExtractUrlsAsync(url);
foreach (string extractedUrl in extractedUrls)
{
await Lotto.GetLottoNumbers(url, client); await Lotto.GetLottoNumbers(url, client);
} }
} }
}
}
} }

View File

@@ -0,0 +1,28 @@
using System.Text.RegularExpressions;
namespace lottery_co_uk_scraper.Utilities
{
internal partial class URLExtractor
{
public static async Task<List<string>> ExtractUrlsAsync(string url)
{
List<string> urls = [];
using (HttpClient client = new())
{
string content = await client.GetStringAsync(url);
MatchCollection matches = MyRegex().Matches(content);
foreach (Match match in matches.Cast<Match>())
{
urls.Add(match.Value);
}
}
return urls;
}
[GeneratedRegex(@"(?<=<a href=""https:\/\/www\.lottery\.co\.uk\/lotto\/results-)[^\s""']+")]
private static partial Regex MyRegex();
}
}