Refactor DB classes and management #5

Merged
dylan merged 7 commits from refactor/db-class into main 2026-03-03 11:17:50 +00:00
7 changed files with 11 additions and 27 deletions
Showing only changes of commit 4961ddc349 - Show all commits

View File

@@ -12,9 +12,9 @@ from flask_jwt_extended import (
) )
from server.stat_gen import StatGen from server.stat_gen import StatGen
from server.dataset_processor import DatasetProcessor from server.enrichment import DatasetEnrichment
from server.exceptions import NotAuthorisedException, NotExistentDatasetException from server.exceptions import NotAuthorisedException, NotExistentDatasetException
from db.database import PostgresConnector from server.db.database import PostgresConnector
from server.auth import AuthManager from server.auth import AuthManager
from server.utils import get_request_filters, get_dataset_and_validate from server.utils import get_request_filters, get_dataset_and_validate
@@ -130,7 +130,7 @@ def upload_data():
posts_df = pd.read_json(post_file, lines=True, convert_dates=False) posts_df = pd.read_json(post_file, lines=True, convert_dates=False)
topics = json.load(topic_file) topics = json.load(topic_file)
processor = DatasetProcessor(posts_df, topics) processor = DatasetEnrichment(posts_df, topics)
enriched_df = processor.enrich() enriched_df = processor.enrich()
dataset_id = db.save_dataset_info( dataset_id = db.save_dataset_info(
current_user, f"dataset_{current_user}", topics current_user, f"dataset_{current_user}", topics

View File

@@ -1,4 +1,4 @@
from db.database import PostgresConnector from server.db.database import PostgresConnector
from flask_bcrypt import Bcrypt from flask_bcrypt import Bcrypt
class AuthManager: class AuthManager:

View File

@@ -3,7 +3,6 @@ import psycopg2
import pandas as pd import pandas as pd
from psycopg2.extras import RealDictCursor from psycopg2.extras import RealDictCursor
from psycopg2.extras import execute_batch, Json from psycopg2.extras import execute_batch, Json
from server.exceptions import NotExistentDatasetException
class PostgresConnector: class PostgresConnector:
@@ -68,7 +67,6 @@ class PostgresConnector:
type, type,
parent_id, parent_id,
author, author,
title,
content, content,
timestamp, timestamp,
date, date,
@@ -90,8 +88,7 @@ class PostgresConnector:
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s
%s
) )
""" """
@@ -103,7 +100,6 @@ class PostgresConnector:
row["type"], row["type"],
row["parent_id"], row["parent_id"],
row["author"], row["author"],
row.get("title"),
row["content"], row["content"],
row["timestamp"], row["timestamp"],
row["date"], row["date"],
@@ -114,7 +110,7 @@ class PostgresConnector:
row["source"], row["source"],
row.get("topic"), row.get("topic"),
row.get("topic_confidence"), row.get("topic_confidence"),
Json(row["entities"]) if row.get("entities") else None, Json(row["ner_entities"]) if row.get("ner_entities") else None,
row.get("emotion_anger"), row.get("emotion_anger"),
row.get("emotion_disgust"), row.get("emotion_disgust"),
row.get("emotion_fear"), row.get("emotion_fear"),
@@ -130,19 +126,12 @@ class PostgresConnector:
def get_dataset_content(self, dataset_id: int) -> pd.DataFrame: def get_dataset_content(self, dataset_id: int) -> pd.DataFrame:
query = "SELECT * FROM events WHERE dataset_id = %s" query = "SELECT * FROM events WHERE dataset_id = %s"
result = self.execute(query, (dataset_id,), fetch=True) result = self.execute(query, (dataset_id,), fetch=True)
return pd.DataFrame(result)
if result:
return pd.DataFrame(result)
raise NotExistentDatasetException("Dataset does not exist")
def get_dataset_info(self, dataset_id: int) -> dict: def get_dataset_info(self, dataset_id: int) -> dict:
query = "SELECT * FROM datasets WHERE id = %s" query = "SELECT * FROM datasets WHERE id = %s"
result = self.execute(query, (dataset_id,), fetch=True) result = self.execute(query, (dataset_id,), fetch=True)
if result: return result[0] if result else None
return result[0]
raise NotExistentDatasetException("Dataset does not exist")
def close(self): def close(self):
if self.connection: if self.connection:

View File

@@ -30,10 +30,7 @@ CREATE TABLE events (
hour INTEGER NOT NULL, hour INTEGER NOT NULL,
weekday VARCHAR(255) NOT NULL, weekday VARCHAR(255) NOT NULL,
/* Posts Only */ /* Comments and Replies */
title VARCHAR(255),
/* Comments Only*/
parent_id VARCHAR(255), parent_id VARCHAR(255),
reply_to VARCHAR(255), reply_to VARCHAR(255),
source VARCHAR(255) NOT NULL, source VARCHAR(255) NOT NULL,

View File

@@ -2,7 +2,7 @@ import pandas as pd
from server.analysis.nlp import NLP from server.analysis.nlp import NLP
class DatasetProcessor: class DatasetEnrichment:
def __init__(self, df, topics): def __init__(self, df, topics):
self.df = self._explode_comments(df) self.df = self._explode_comments(df)
self.topics = topics self.topics = topics

View File

@@ -1,5 +1,3 @@
import datetime
import nltk import nltk
import pandas as pd import pandas as pd
from nltk.corpus import stopwords from nltk.corpus import stopwords

View File

@@ -3,7 +3,7 @@ import pandas as pd
from flask import request from flask import request
from flask_jwt_extended import get_jwt_identity from flask_jwt_extended import get_jwt_identity
from db.database import PostgresConnector from server.db.database import PostgresConnector
from server.exceptions import NotAuthorisedException from server.exceptions import NotAuthorisedException
def parse_datetime_filter(value): def parse_datetime_filter(value):