Updated extractor, omitted urls.
This commit is contained in:
@@ -6,23 +6,29 @@ namespace lottery_co_uk_scraper.Utilities
|
||||
{
|
||||
public static async Task<List<string>> ExtractUrlsAsync(string url)
|
||||
{
|
||||
List<string> urls = [];
|
||||
List<string> urls = new List<string>();
|
||||
|
||||
using (HttpClient client = new())
|
||||
using (HttpClient client = new HttpClient())
|
||||
{
|
||||
string content = await client.GetStringAsync(url);
|
||||
MatchCollection matches = MyRegex().Matches(content);
|
||||
|
||||
foreach (Match match in matches.Cast<Match>())
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
urls.Add(match.Value);
|
||||
string capturedUrl = match.Groups[1].Value;
|
||||
|
||||
if (capturedUrl.StartsWith(""))
|
||||
{
|
||||
string modifiedUrl = "" + capturedUrl;
|
||||
urls.Add(modifiedUrl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return urls;
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"(?<=<a href=""https:\/\/www\.lottery\.co\.uk\/lotto\/results-)[^\s""']+")]
|
||||
[GeneratedRegex(@"<a\s+href=""([^""]+)""")]
|
||||
private static partial Regex MyRegex();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user