Updated extractor, omitted urls.

This commit is contained in:
Ross Healy
2024-02-07 19:54:34 +00:00
parent f08949a912
commit 92faf19549

View File

@@ -6,23 +6,29 @@ namespace lottery_co_uk_scraper.Utilities
{
public static async Task<List<string>> ExtractUrlsAsync(string url)
{
List<string> urls = [];
List<string> urls = new List<string>();
using (HttpClient client = new())
using (HttpClient client = new HttpClient())
{
string content = await client.GetStringAsync(url);
MatchCollection matches = MyRegex().Matches(content);
foreach (Match match in matches.Cast<Match>())
foreach (Match match in matches)
{
urls.Add(match.Value);
string capturedUrl = match.Groups[1].Value;
if (capturedUrl.StartsWith(""))
{
string modifiedUrl = "" + capturedUrl;
urls.Add(modifiedUrl);
}
}
}
return urls;
}
[GeneratedRegex(@"(?<=<a href=""https:\/\/www\.lottery\.co\.uk\/lotto\/results-)[^\s""']+")]
[GeneratedRegex(@"<a\s+href=""([^""]+)""")]
private static partial Regex MyRegex();
}
}