diff --git a/server/app.py b/server/app.py index 7df12cb..f7ad584 100644 --- a/server/app.py +++ b/server/app.py @@ -16,14 +16,14 @@ from server.enrichment import DatasetEnrichment from server.exceptions import NotAuthorisedException, NotExistentDatasetException from server.db.database import PostgresConnector from server.auth import AuthManager -from server.utils import get_request_filters, get_dataset_and_validate +from server.datasets import DatasetManager +from server.utils import get_request_filters import pandas as pd import traceback import json app = Flask(__name__) -db = PostgresConnector() # Env Variables load_dotenv() @@ -40,11 +40,12 @@ app.config["JWT_ACCESS_TOKEN_EXPIRES"] = jwt_access_token_expires bcrypt = Bcrypt(app) jwt = JWTManager(app) + +db = PostgresConnector() auth_manager = AuthManager(db, bcrypt) - +dataset_manager = DatasetManager(db) stat_gen = StatGen() - @app.route("/register", methods=["POST"]) def register_user(): data = request.get_json() @@ -132,10 +133,8 @@ def upload_data(): processor = DatasetEnrichment(posts_df, topics) enriched_df = processor.enrich() - dataset_id = db.save_dataset_info( - current_user, f"dataset_{current_user}", topics - ) - db.save_dataset_content(dataset_id, enriched_df) + dataset_id = dataset_manager.save_dataset_info(current_user, f"dataset_{current_user}", topics) + dataset_manager.save_dataset_content(dataset_id, enriched_df) return jsonify( { @@ -154,7 +153,8 @@ def upload_data(): @jwt_required() def get_dataset(dataset_id): try: - dataset_content = get_dataset_and_validate(dataset_id, db) + user_id = get_jwt_identity() + dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id)) filters = get_request_filters() filtered_dataset = stat_gen.filter_dataset(dataset_content, filters) return jsonify(filtered_dataset), 200 @@ -171,7 +171,8 @@ def get_dataset(dataset_id): @jwt_required() def content_endpoint(dataset_id): try: - dataset_content = get_dataset_and_validate(dataset_id, db) + user_id = get_jwt_identity() + dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id)) filters = get_request_filters() return jsonify(stat_gen.get_content_analysis(dataset_content, filters)), 200 except NotAuthorisedException: @@ -187,7 +188,8 @@ def content_endpoint(dataset_id): @jwt_required() def get_summary(dataset_id): try: - dataset_content = get_dataset_and_validate(dataset_id, db) + user_id = get_jwt_identity() + dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id)) filters = get_request_filters() return jsonify(stat_gen.summary(dataset_content, filters)), 200 except NotAuthorisedException: @@ -203,7 +205,8 @@ def get_summary(dataset_id): @jwt_required() def get_time_analysis(dataset_id): try: - dataset_content = get_dataset_and_validate(dataset_id, db) + user_id = get_jwt_identity() + dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id)) filters = get_request_filters() return jsonify(stat_gen.get_time_analysis(dataset_content, filters)), 200 except NotAuthorisedException: @@ -219,7 +222,8 @@ def get_time_analysis(dataset_id): @jwt_required() def get_user_analysis(dataset_id): try: - dataset_content = get_dataset_and_validate(dataset_id, db) + user_id = get_jwt_identity() + dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id)) filters = get_request_filters() return jsonify(stat_gen.get_user_analysis(dataset_content, filters)), 200 except NotAuthorisedException: @@ -235,7 +239,8 @@ def get_user_analysis(dataset_id): @jwt_required() def get_cultural_analysis(dataset_id): try: - dataset_content = get_dataset_and_validate(dataset_id, db) + user_id = get_jwt_identity() + dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id)) filters = get_request_filters() return jsonify(stat_gen.get_cultural_analysis(dataset_content, filters)), 200 except NotAuthorisedException: @@ -251,7 +256,8 @@ def get_cultural_analysis(dataset_id): @jwt_required() def get_interaction_analysis(dataset_id): try: - dataset_content = get_dataset_and_validate(dataset_id, db) + user_id = get_jwt_identity() + dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id)) filters = get_request_filters() return jsonify(stat_gen.get_interactional_analysis(dataset_content, filters)), 200 except NotAuthorisedException: diff --git a/server/auth.py b/server/auth.py index 2371da9..625c3c2 100644 --- a/server/auth.py +++ b/server/auth.py @@ -6,19 +6,28 @@ class AuthManager: self.db = db self.bcrypt = bcrypt + # private + def _save_user(self, username, email, password_hash): + query = """ + INSERT INTO users (username, email, password_hash) + VALUES (%s, %s, %s) + """ + self.db.execute(query, (username, email, password_hash)) + + # public def register_user(self, username, email, password): hashed_password = self.bcrypt.generate_password_hash(password).decode("utf-8") - if self.db.get_user_by_email(email): + if self.get_user_by_email(email): raise ValueError("Email already registered") - if self.db.get_user_by_username(username): + if self.get_user_by_username(username): raise ValueError("Username already taken") - self.db.save_user(username, email, hashed_password) + self._save_user(username, email, hashed_password) def authenticate_user(self, username, password): - user = self.db.get_user_by_username(username) + user = self.get_user_by_username(username) if user and self.bcrypt.check_password_hash(user['password_hash'], password): return user return None @@ -26,4 +35,14 @@ class AuthManager: def get_user_by_id(self, user_id): query = "SELECT id, username, email FROM users WHERE id = %s" result = self.db.execute(query, (user_id,), fetch=True) - return result[0] if result else None \ No newline at end of file + return result[0] if result else None + + def get_user_by_username(self, username) -> dict: + query = "SELECT id, username, email, password_hash FROM users WHERE username = %s" + result = self.db.execute(query, (username,), fetch=True) + return result[0] if result else None + + def get_user_by_email(self, email) -> dict: + query = "SELECT id, username, email, password_hash FROM users WHERE email = %s" + result = self.db.execute(query, (email,), fetch=True) + return result[0] if result else None diff --git a/server/datasets.py b/server/datasets.py new file mode 100644 index 0000000..f741d7d --- /dev/null +++ b/server/datasets.py @@ -0,0 +1,104 @@ +import pandas as pd +from server.db.database import PostgresConnector +from psycopg2.extras import Json +from server.exceptions import NotAuthorisedException +from flask_jwt_extended import get_jwt_identity + +class DatasetManager: + def __init__(self, db: PostgresConnector): + self.db = db + + def get_dataset_and_validate(self, dataset_id: int, user_id: int) -> pd.DataFrame: + dataset_info = self.get_dataset_info(dataset_id) + + if dataset_info.get("user_id") != user_id: + raise NotAuthorisedException("This user is not authorised to access this dataset") + + return self.get_dataset_content(dataset_id) + + def get_dataset_content(self, dataset_id: int) -> pd.DataFrame: + query = "SELECT * FROM events WHERE dataset_id = %s" + result = self.db.execute(query, (dataset_id,), fetch=True) + return pd.DataFrame(result) + + def get_dataset_info(self, dataset_id: int) -> dict: + query = "SELECT * FROM datasets WHERE id = %s" + result = self.db.execute(query, (dataset_id,), fetch=True) + return result[0] if result else None + + def save_dataset_info(self, user_id: int, dataset_name: str, topics: dict) -> int: + query = """ + INSERT INTO datasets (user_id, name, topics) + VALUES (%s, %s, %s) + RETURNING id + """ + result = self.db.execute(query, (user_id, dataset_name, Json(topics)), fetch=True) + return result[0]["id"] if result else None + + def get_dataset_content(self, dataset_id: int) -> pd.DataFrame: + query = "SELECT * FROM events WHERE dataset_id = %s" + result = self.db.execute(query, (dataset_id,), fetch=True) + return pd.DataFrame(result) + + def save_dataset_content(self, dataset_id: int, event_data: pd.DataFrame): + if event_data.empty: + return + + query = """ + INSERT INTO events ( + dataset_id, + type, + parent_id, + author, + content, + timestamp, + date, + dt, + hour, + weekday, + reply_to, + source, + topic, + topic_confidence, + ner_entities, + emotion_anger, + emotion_disgust, + emotion_fear, + emotion_joy, + emotion_sadness + ) + VALUES ( + %s, %s, %s, %s, %s, + %s, %s, %s, %s, %s, + %s, %s, %s, %s, %s, + %s, %s, %s, %s, %s + ) + """ + + values = [ + ( + dataset_id, + row["type"], + row["parent_id"], + row["author"], + row["content"], + row["timestamp"], + row["date"], + row["dt"], + row["hour"], + row["weekday"], + row.get("reply_to"), + row["source"], + row.get("topic"), + row.get("topic_confidence"), + Json(row["ner_entities"]) if row.get("ner_entities") else None, + row.get("emotion_anger"), + row.get("emotion_disgust"), + row.get("emotion_fear"), + row.get("emotion_joy"), + row.get("emotion_sadness"), + ) + for _, row in event_data.iterrows() + ] + + self.db.execute_batch(query, values) \ No newline at end of file diff --git a/server/db/database.py b/server/db/database.py index 80cf5b3..34a63f2 100644 --- a/server/db/database.py +++ b/server/db/database.py @@ -27,112 +27,13 @@ class PostgresConnector: return cursor.fetchall() self.connection.commit() - def executemany(self, query, param_list) -> list: - with self.connection.cursor(cursor_factory=RealDictCursor) as cursor: - cursor.executemany(query, param_list) - self.connection.commit() - - ## User Management Methods - def save_user(self, username, email, password_hash): - query = """ - INSERT INTO users (username, email, password_hash) - VALUES (%s, %s, %s) - """ - self.execute(query, (username, email, password_hash)) - - def get_user_by_username(self, username) -> dict: - query = "SELECT id, username, email, password_hash FROM users WHERE username = %s" - result = self.execute(query, (username,), fetch=True) - return result[0] if result else None - - def get_user_by_email(self, email) -> dict: - query = "SELECT id, username, email, password_hash FROM users WHERE email = %s" - result = self.execute(query, (email,), fetch=True) - return result[0] if result else None - - # Dataset Management Methods - def save_dataset_info(self, user_id: int, dataset_name: str, topics: dict) -> int: - query = """ - INSERT INTO datasets (user_id, name, topics) - VALUES (%s, %s, %s) - RETURNING id - """ - result = self.execute(query, (user_id, dataset_name, Json(topics)), fetch=True) - return result[0]["id"] if result else None - - def save_dataset_content(self, dataset_id: int, event_data: pd.DataFrame): - query = """ - INSERT INTO events ( - dataset_id, - type, - parent_id, - author, - content, - timestamp, - date, - dt, - hour, - weekday, - reply_to, - source, - topic, - topic_confidence, - ner_entities, - emotion_anger, - emotion_disgust, - emotion_fear, - emotion_joy, - emotion_sadness - ) - VALUES ( - %s, %s, %s, %s, %s, - %s, %s, %s, %s, %s, - %s, %s, %s, %s, %s, - %s, %s, %s, %s, %s - ) - """ - - values = [] - - for _, row in event_data.iterrows(): - values.append(( - dataset_id, - row["type"], - row["parent_id"], - row["author"], - row["content"], - row["timestamp"], - row["date"], - row["dt"], - row["hour"], - row["weekday"], - row.get("reply_to"), - row["source"], - row.get("topic"), - row.get("topic_confidence"), - Json(row["ner_entities"]) if row.get("ner_entities") else None, - row.get("emotion_anger"), - row.get("emotion_disgust"), - row.get("emotion_fear"), - row.get("emotion_joy"), - row.get("emotion_sadness"), - )) - - + def execute_batch(self, query, values): with self.connection.cursor(cursor_factory=RealDictCursor) as cursor: execute_batch(cursor, query, values) self.connection.commit() - def get_dataset_content(self, dataset_id: int) -> pd.DataFrame: - query = "SELECT * FROM events WHERE dataset_id = %s" - result = self.execute(query, (dataset_id,), fetch=True) - return pd.DataFrame(result) - - def get_dataset_info(self, dataset_id: int) -> dict: - query = "SELECT * FROM datasets WHERE id = %s" - result = self.execute(query, (dataset_id,), fetch=True) - return result[0] if result else None + ## User Management Methods def close(self): if self.connection: self.connection.close() \ No newline at end of file diff --git a/server/utils.py b/server/utils.py index 5e20706..815739f 100644 --- a/server/utils.py +++ b/server/utils.py @@ -1,10 +1,5 @@ import datetime -import pandas as pd - from flask import request -from flask_jwt_extended import get_jwt_identity -from server.db.database import PostgresConnector -from server.exceptions import NotAuthorisedException def parse_datetime_filter(value): if not value: @@ -53,12 +48,3 @@ def get_request_filters() -> dict: filters["data_sources"] = data_sources return filters - -def get_dataset_and_validate(dataset_id: int, db: PostgresConnector) -> pd.DataFrame: - current_user = get_jwt_identity() - dataset = db.get_dataset_info(dataset_id) - - if dataset.get("user_id") != int(current_user): - raise NotAuthorisedException("This user is not authorised to access this dataset") - - return db.get_dataset_content(dataset_id)