diff --git a/lottery-co-uk-scraper/Utilities/TextRemoval.cs b/lottery-co-uk-scraper/Utilities/TextRemoval.cs new file mode 100644 index 0000000..a8afb3c --- /dev/null +++ b/lottery-co-uk-scraper/Utilities/TextRemoval.cs @@ -0,0 +1,38 @@ +using System.Globalization; +using System.Text.RegularExpressions; + +namespace lottery_co_uk_scraper.Utilities +{ + public class TextRemoval + { + public static string RemoveOrdinalSuffix(string input) + { + return Regex.Replace(input, "(?<=\\d)(st|nd|rd|th)\\b", ""); + } + + public static DateTime ParseDateString(string dateString) + { + // Define the prefix to be ignored + string prefixToIgnore = "Lotto Results "; + + // Check if the input string starts with the specified prefix + if (dateString.StartsWith(prefixToIgnore)) + { + // Remove the prefix before attempting to parse the date + dateString = dateString.Substring(prefixToIgnore.Length); + } + + // Helper function to remove ordinal suffix from day + string cleanedDateString = RemoveOrdinalSuffix(dateString); + + if (DateTime.TryParseExact(cleanedDateString, "dddd d MMMM yyyy", CultureInfo.InvariantCulture, DateTimeStyles.None, out var parsedDate)) + { + return parsedDate; + } + else + { + return DateTime.MinValue; + } + } + } +} \ No newline at end of file