Compare commits
2 Commits
58d6b821f6
...
0cd68968d9
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0cd68968d9 | ||
|
|
fbbb2e9d6f |
@@ -1,5 +1,7 @@
|
||||
using HtmlAgilityPack;
|
||||
using lottery_co_uk_scraper.Utilities;
|
||||
using System.Globalization;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace lottery_co_uk_scraper
|
||||
{
|
||||
@@ -269,6 +271,47 @@ namespace lottery_co_uk_scraper
|
||||
|
||||
}
|
||||
|
||||
var metaKeywords = doc.DocumentNode.Descendants("meta")
|
||||
.FirstOrDefault(x => x.GetAttributeValue("name", "") == "keywords");
|
||||
|
||||
if (metaKeywords != null)
|
||||
{
|
||||
var keywordsText = metaKeywords.GetAttributeValue("content", "");
|
||||
var drawNumberMatch = Regex.Match(keywordsText, @"lotto draw (\d+)");
|
||||
|
||||
if (drawNumberMatch.Success)
|
||||
{
|
||||
var drawNumber = int.Parse(drawNumberMatch.Groups[1].Value);
|
||||
Console.WriteLine("Draw Number: " + drawNumber);
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine("Draw Number not found.");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine("Meta keywords not found.");
|
||||
}
|
||||
|
||||
var title = doc.DocumentNode.Descendants("title")
|
||||
.FirstOrDefault();
|
||||
|
||||
if (title != null)
|
||||
{
|
||||
var titleText = title.InnerText;
|
||||
var date = TextRemoval.ParseDateString(titleText);
|
||||
|
||||
string formattedDate = date.ToString("yyyy-MM-dd");
|
||||
|
||||
Console.WriteLine("Draw Date: " + formattedDate);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine("Title not found.");
|
||||
}
|
||||
|
||||
var rolloverElement = doc.DocumentNode.Descendants("span")
|
||||
.FirstOrDefault(x => x.InnerText.Trim() == "Rollover");
|
||||
|
||||
|
||||
38
lottery-co-uk-scraper/Utilities/TextRemoval.cs
Normal file
38
lottery-co-uk-scraper/Utilities/TextRemoval.cs
Normal file
@@ -0,0 +1,38 @@
|
||||
using System.Globalization;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace lottery_co_uk_scraper.Utilities
|
||||
{
|
||||
public class TextRemoval
|
||||
{
|
||||
public static string RemoveOrdinalSuffix(string input)
|
||||
{
|
||||
return Regex.Replace(input, "(?<=\\d)(st|nd|rd|th)\\b", "");
|
||||
}
|
||||
|
||||
public static DateTime ParseDateString(string dateString)
|
||||
{
|
||||
// Define the prefix to be ignored
|
||||
string prefixToIgnore = "Lotto Results ";
|
||||
|
||||
// Check if the input string starts with the specified prefix
|
||||
if (dateString.StartsWith(prefixToIgnore))
|
||||
{
|
||||
// Remove the prefix before attempting to parse the date
|
||||
dateString = dateString.Substring(prefixToIgnore.Length);
|
||||
}
|
||||
|
||||
// Helper function to remove ordinal suffix from day
|
||||
string cleanedDateString = RemoveOrdinalSuffix(dateString);
|
||||
|
||||
if (DateTime.TryParseExact(cleanedDateString, "dddd d MMMM yyyy", CultureInfo.InvariantCulture, DateTimeStyles.None, out var parsedDate))
|
||||
{
|
||||
return parsedDate;
|
||||
}
|
||||
else
|
||||
{
|
||||
return DateTime.MinValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user