From 4961ddc3497b4ca16dfa2f87d8bddd55134dbc29 Mon Sep 17 00:00:00 2001 From: Dylan De Faoite Date: Mon, 2 Mar 2026 19:05:56 +0000 Subject: [PATCH] refactor: move db dir into server --- server/app.py | 6 +++--- server/auth.py | 2 +- {db => server/db}/database.py | 19 ++++--------------- {db => server/db}/schema.sql | 5 +---- .../{dataset_processor.py => enrichment.py} | 2 +- server/stat_gen.py | 2 -- server/utils.py | 2 +- 7 files changed, 11 insertions(+), 27 deletions(-) rename {db => server/db}/database.py (90%) rename {db => server/db}/schema.sql (94%) rename server/{dataset_processor.py => enrichment.py} (98%) diff --git a/server/app.py b/server/app.py index 5e63acd..7df12cb 100644 --- a/server/app.py +++ b/server/app.py @@ -12,9 +12,9 @@ from flask_jwt_extended import ( ) from server.stat_gen import StatGen -from server.dataset_processor import DatasetProcessor +from server.enrichment import DatasetEnrichment from server.exceptions import NotAuthorisedException, NotExistentDatasetException -from db.database import PostgresConnector +from server.db.database import PostgresConnector from server.auth import AuthManager from server.utils import get_request_filters, get_dataset_and_validate @@ -130,7 +130,7 @@ def upload_data(): posts_df = pd.read_json(post_file, lines=True, convert_dates=False) topics = json.load(topic_file) - processor = DatasetProcessor(posts_df, topics) + processor = DatasetEnrichment(posts_df, topics) enriched_df = processor.enrich() dataset_id = db.save_dataset_info( current_user, f"dataset_{current_user}", topics diff --git a/server/auth.py b/server/auth.py index 9d62512..2371da9 100644 --- a/server/auth.py +++ b/server/auth.py @@ -1,4 +1,4 @@ -from db.database import PostgresConnector +from server.db.database import PostgresConnector from flask_bcrypt import Bcrypt class AuthManager: diff --git a/db/database.py b/server/db/database.py similarity index 90% rename from db/database.py rename to server/db/database.py index a9c6d41..80cf5b3 100644 --- a/db/database.py +++ b/server/db/database.py @@ -3,7 +3,6 @@ import psycopg2 import pandas as pd from psycopg2.extras import RealDictCursor from psycopg2.extras import execute_batch, Json -from server.exceptions import NotExistentDatasetException class PostgresConnector: @@ -68,7 +67,6 @@ class PostgresConnector: type, parent_id, author, - title, content, timestamp, date, @@ -90,8 +88,7 @@ class PostgresConnector: %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, - %s, %s, %s, %s, %s, - %s + %s, %s, %s, %s, %s ) """ @@ -103,7 +100,6 @@ class PostgresConnector: row["type"], row["parent_id"], row["author"], - row.get("title"), row["content"], row["timestamp"], row["date"], @@ -114,7 +110,7 @@ class PostgresConnector: row["source"], row.get("topic"), row.get("topic_confidence"), - Json(row["entities"]) if row.get("entities") else None, + Json(row["ner_entities"]) if row.get("ner_entities") else None, row.get("emotion_anger"), row.get("emotion_disgust"), row.get("emotion_fear"), @@ -130,19 +126,12 @@ class PostgresConnector: def get_dataset_content(self, dataset_id: int) -> pd.DataFrame: query = "SELECT * FROM events WHERE dataset_id = %s" result = self.execute(query, (dataset_id,), fetch=True) - - if result: - return pd.DataFrame(result) - - raise NotExistentDatasetException("Dataset does not exist") + return pd.DataFrame(result) def get_dataset_info(self, dataset_id: int) -> dict: query = "SELECT * FROM datasets WHERE id = %s" result = self.execute(query, (dataset_id,), fetch=True) - if result: - return result[0] - - raise NotExistentDatasetException("Dataset does not exist") + return result[0] if result else None def close(self): if self.connection: diff --git a/db/schema.sql b/server/db/schema.sql similarity index 94% rename from db/schema.sql rename to server/db/schema.sql index 5a9eaee..693f821 100644 --- a/db/schema.sql +++ b/server/db/schema.sql @@ -30,10 +30,7 @@ CREATE TABLE events ( hour INTEGER NOT NULL, weekday VARCHAR(255) NOT NULL, - /* Posts Only */ - title VARCHAR(255), - - /* Comments Only*/ + /* Comments and Replies */ parent_id VARCHAR(255), reply_to VARCHAR(255), source VARCHAR(255) NOT NULL, diff --git a/server/dataset_processor.py b/server/enrichment.py similarity index 98% rename from server/dataset_processor.py rename to server/enrichment.py index 37e94da..065caea 100644 --- a/server/dataset_processor.py +++ b/server/enrichment.py @@ -2,7 +2,7 @@ import pandas as pd from server.analysis.nlp import NLP -class DatasetProcessor: +class DatasetEnrichment: def __init__(self, df, topics): self.df = self._explode_comments(df) self.topics = topics diff --git a/server/stat_gen.py b/server/stat_gen.py index 2ea5ac1..f9d8344 100644 --- a/server/stat_gen.py +++ b/server/stat_gen.py @@ -1,5 +1,3 @@ -import datetime - import nltk import pandas as pd from nltk.corpus import stopwords diff --git a/server/utils.py b/server/utils.py index 71e593b..5e20706 100644 --- a/server/utils.py +++ b/server/utils.py @@ -3,7 +3,7 @@ import pandas as pd from flask import request from flask_jwt_extended import get_jwt_identity -from db.database import PostgresConnector +from server.db.database import PostgresConnector from server.exceptions import NotAuthorisedException def parse_datetime_filter(value):