URL Extractor from information source.
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
using lottery_co_uk_scraper.NationalLottery;
|
||||
using lottery_co_uk_scraper.Utilities;
|
||||
|
||||
namespace lottery_co_uk_scraper
|
||||
{
|
||||
@@ -7,9 +8,21 @@ namespace lottery_co_uk_scraper
|
||||
static async Task Main()
|
||||
{
|
||||
using HttpClient client = new();
|
||||
string url = "";
|
||||
|
||||
List<string> urls =
|
||||
[
|
||||
"",
|
||||
"",
|
||||
];
|
||||
|
||||
foreach (string url in urls)
|
||||
{
|
||||
List<string> extractedUrls = await URLExtractor.ExtractUrlsAsync(url);
|
||||
foreach (string extractedUrl in extractedUrls)
|
||||
{
|
||||
await Lotto.GetLottoNumbers(url, client);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
28
lottery-co-uk-scraper/Utilities/URLExtractor.cs
Normal file
28
lottery-co-uk-scraper/Utilities/URLExtractor.cs
Normal file
@@ -0,0 +1,28 @@
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace lottery_co_uk_scraper.Utilities
|
||||
{
|
||||
internal partial class URLExtractor
|
||||
{
|
||||
public static async Task<List<string>> ExtractUrlsAsync(string url)
|
||||
{
|
||||
List<string> urls = [];
|
||||
|
||||
using (HttpClient client = new())
|
||||
{
|
||||
string content = await client.GetStringAsync(url);
|
||||
MatchCollection matches = MyRegex().Matches(content);
|
||||
|
||||
foreach (Match match in matches.Cast<Match>())
|
||||
{
|
||||
urls.Add(match.Value);
|
||||
}
|
||||
}
|
||||
|
||||
return urls;
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"(?<=<a href=""https:\/\/www\.lottery\.co\.uk\/lotto\/results-)[^\s""']+")]
|
||||
private static partial Regex MyRegex();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user