From 096a415f3b0326aa022f612b4f8678987391a87d Mon Sep 17 00:00:00 2001 From: Dylan De Faoite Date: Thu, 22 Jan 2026 14:49:01 +0000 Subject: [PATCH] fix datetime from boards.ie not being parsed properly --- connectors/boards_api.py | 4 ++++ requirements.txt | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/connectors/boards_api.py b/connectors/boards_api.py index 60e9805..4a605c4 100644 --- a/connectors/boards_api.py +++ b/connectors/boards_api.py @@ -1,3 +1,4 @@ +import datetime import requests import logging import re @@ -78,6 +79,8 @@ class BoardsAPI: if timestamp_tag: match = re.search(r"\d{2}-\d{2}-\d{4}\s+\d{2}:\d{2}[AP]M", timestamp_tag.get_text()) timestamp = match.group(0) if match else None + # convert to unix epoch + timestamp = datetime.datetime.strptime(timestamp, "%d-%m-%Y %I:%M%p").timestamp() if timestamp else None # Post ID post_num = re.search(r"discussion/(\d+)", post_url) @@ -142,6 +145,7 @@ class BoardsAPI: # Timestamp date_elem = tag.find('span', class_='DateCreated') timestamp = date_elem.get_text(strip=True) if date_elem else None + timestamp = datetime.datetime.strptime(timestamp, "%d-%m-%Y %I:%M%p").timestamp() if timestamp else None # Content message_div = tag.find('div', class_='Message userContent') diff --git a/requirements.txt b/requirements.txt index 7c8454d..96b17d5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,6 +14,8 @@ idna==3.11 itsdangerous==2.2.0 Jinja2==3.1.6 MarkupSafe==3.0.3 +numpy==2.4.1 +pandas==2.3.3 proto-plus==1.27.0 protobuf==6.33.4 psycopg2==2.9.11 @@ -21,11 +23,15 @@ psycopg2-binary==2.9.11 pyasn1==0.6.2 pyasn1_modules==0.4.2 pyparsing==3.3.1 +python-dateutil==2.9.0.post0 python-dotenv==1.2.1 +pytz==2025.2 requests==2.32.5 rsa==4.9.1 +six==1.17.0 soupsieve==2.8.1 typing_extensions==4.15.0 +tzdata==2025.3 uritemplate==4.2.0 urllib3==2.6.3 vaderSentiment==3.3.2