Compare commits

..

4 Commits

6 changed files with 31 additions and 25 deletions

View File

@@ -23,6 +23,7 @@ services:
container_name: crosspost_flask container_name: crosspost_flask
volumes: volumes:
- .:/app - .:/app
- model_cache:/models
env_file: env_file:
- .env - .env
ports: ports:
@@ -36,6 +37,7 @@ services:
build: . build: .
volumes: volumes:
- .:/app - .:/app
- model_cache:/models
container_name: crosspost_worker container_name: crosspost_worker
env_file: env_file:
- .env - .env
@@ -46,3 +48,6 @@ services:
depends_on: depends_on:
- postgres - postgres
- redis - redis
volumes:
model_cache:

View File

@@ -3,7 +3,7 @@ import pandas as pd
from server.analysis.nlp import NLP from server.analysis.nlp import NLP
class DatasetEnrichment: class DatasetEnrichment:
def __init__(self, df, topics): def __init__(self, df: pd.DataFrame, topics: dict):
self.df = self._explode_comments(df) self.df = self._explode_comments(df)
self.topics = topics self.topics = topics
self.nlp = NLP(self.df, "title", "content", self.topics) self.nlp = NLP(self.df, "title", "content", self.topics)

View File

@@ -16,7 +16,7 @@ from flask_jwt_extended import (
from server.analysis.stat_gen import StatGen from server.analysis.stat_gen import StatGen
from server.analysis.enrichment import DatasetEnrichment from server.analysis.enrichment import DatasetEnrichment
from server.exceptions import NotAuthorisedException, NotExistentDatasetException from server.exceptions import NotAuthorisedException, NonExistentDatasetException
from server.db.database import PostgresConnector from server.db.database import PostgresConnector
from server.core.auth import AuthManager from server.core.auth import AuthManager
from server.core.datasets import DatasetManager from server.core.datasets import DatasetManager
@@ -162,7 +162,7 @@ def get_dataset(dataset_id):
return jsonify(filtered_dataset), 200 return jsonify(filtered_dataset), 200
except NotAuthorisedException: except NotAuthorisedException:
return jsonify({"error": "User is not authorised to access this content"}), 403 return jsonify({"error": "User is not authorised to access this content"}), 403
except NotExistentDatasetException: except NonExistentDatasetException:
return jsonify({"error": "Dataset does not exist"}), 404 return jsonify({"error": "Dataset does not exist"}), 404
except Exception: except Exception:
print(traceback.format_exc()) print(traceback.format_exc())

View File

@@ -1,7 +1,7 @@
import pandas as pd import pandas as pd
from server.db.database import PostgresConnector from server.db.database import PostgresConnector
from psycopg2.extras import Json from psycopg2.extras import Json
from server.exceptions import NotAuthorisedException from server.exceptions import NotAuthorisedException, NonExistentDatasetException
class DatasetManager: class DatasetManager:
def __init__(self, db: PostgresConnector): def __init__(self, db: PostgresConnector):
@@ -23,21 +23,20 @@ class DatasetManager:
def get_dataset_info(self, dataset_id: int) -> dict: def get_dataset_info(self, dataset_id: int) -> dict:
query = "SELECT * FROM datasets WHERE id = %s" query = "SELECT * FROM datasets WHERE id = %s"
result = self.db.execute(query, (dataset_id,), fetch=True) result = self.db.execute(query, (dataset_id,), fetch=True)
return result[0] if result else None
if not result:
raise NonExistentDatasetException(f"Dataset {dataset_id} does not exist")
return result[0]
def save_dataset_info(self, user_id: int, dataset_name: str, topics: dict) -> int: def save_dataset_info(self, user_id: int, dataset_name: str, topics: dict) -> int:
query = """ query = """
INSERT INTO datasets (user_id, name, topics) INSERT INTO datasets (user_id, name, topics)
VALUES (%s, %s, %s) VALUES (%s, %s, %s)
RETURNING id RETURNING id
""" """
result = self.db.execute(query, (user_id, dataset_name, Json(topics)), fetch=True) result = self.db.execute(query, (user_id, dataset_name, Json(topics)), fetch=True)
return result[0]["id"] if result else None return result[0]["id"] if result else None
def get_dataset_content(self, dataset_id: int) -> pd.DataFrame:
query = "SELECT * FROM events WHERE dataset_id = %s"
result = self.db.execute(query, (dataset_id,), fetch=True)
return pd.DataFrame(result)
def save_dataset_content(self, dataset_id: int, event_data: pd.DataFrame): def save_dataset_content(self, dataset_id: int, event_data: pd.DataFrame):
if event_data.empty: if event_data.empty:

View File

@@ -27,19 +27,21 @@ class PostgresConnector:
self.connection.autocommit = False self.connection.autocommit = False
def execute(self, query, params=None, fetch=False) -> list: def execute(self, query, params=None, fetch=False) -> list:
with self.connection.cursor(cursor_factory=RealDictCursor) as cursor: try:
cursor.execute(query, params) with self.connection.cursor(cursor_factory=RealDictCursor) as cursor:
if fetch: cursor.execute(query, params)
return cursor.fetchall() result = cursor.fetchall() if fetch else None
self.connection.commit() self.connection.commit()
return result
except Exception:
self.connection.rollback()
raise
def execute_batch(self, query, values): def execute_batch(self, query, values):
with self.connection.cursor(cursor_factory=RealDictCursor) as cursor: with self.connection.cursor(cursor_factory=RealDictCursor) as cursor:
execute_batch(cursor, query, values) execute_batch(cursor, query, values)
self.connection.commit() self.connection.commit()
## User Management Methods
def close(self): def close(self):
if self.connection: if self.connection:
self.connection.close() self.connection.close()

View File

@@ -1,7 +1,7 @@
class NotAuthorisedException(Exception): class NotAuthorisedException(Exception):
pass pass
class NotExistentDatasetException(Exception): class NonExistentDatasetException(Exception):
pass pass
class DatabaseNotConfiguredException(Exception): class DatabaseNotConfiguredException(Exception):