refactor: move core files into separate dirs

This commit is contained in:
2026-03-03 11:13:33 +00:00
parent 8b8462fd58
commit 87bdc0245a
5 changed files with 4 additions and 4 deletions

272
server/core/app.py Normal file
View File

@@ -0,0 +1,272 @@
import os
import pandas as pd
import traceback
import json
from dotenv import load_dotenv
from flask import Flask, jsonify, request
from flask_cors import CORS
from flask_bcrypt import Bcrypt
from flask_jwt_extended import (
JWTManager,
create_access_token,
jwt_required,
get_jwt_identity,
)
from server.analysis.stat_gen import StatGen
from server.analysis.enrichment import DatasetEnrichment
from server.exceptions import NotAuthorisedException, NotExistentDatasetException
from server.db.database import PostgresConnector
from server.core.auth import AuthManager
from server.core.datasets import DatasetManager
from server.utils import get_request_filters
app = Flask(__name__)
# Env Variables
load_dotenv()
frontend_url = os.getenv("FRONTEND_URL", "http://localhost:5173")
jwt_secret_key = os.getenv("JWT_SECRET_KEY", "super-secret-change-this")
jwt_access_token_expires = int(
os.getenv("JWT_ACCESS_TOKEN_EXPIRES", 1200)
) # Default to 20 minutes
# Flask Configuration
CORS(app, resources={r"/*": {"origins": frontend_url}})
app.config["JWT_SECRET_KEY"] = jwt_secret_key
app.config["JWT_ACCESS_TOKEN_EXPIRES"] = jwt_access_token_expires
bcrypt = Bcrypt(app)
jwt = JWTManager(app)
db = PostgresConnector()
auth_manager = AuthManager(db, bcrypt)
dataset_manager = DatasetManager(db)
stat_gen = StatGen()
@app.route("/register", methods=["POST"])
def register_user():
data = request.get_json()
if (
not data
or "username" not in data
or "email" not in data
or "password" not in data
):
return jsonify({"error": "Missing username, email, or password"}), 400
username = data["username"]
email = data["email"]
password = data["password"]
try:
auth_manager.register_user(username, email, password)
except ValueError as e:
return jsonify({"error": str(e)}), 400
except Exception as e:
print(traceback.format_exc())
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
print(f"Registered new user: {username}")
return jsonify({"message": f"User '{username}' registered successfully"}), 200
@app.route("/login", methods=["POST"])
def login_user():
data = request.get_json()
if not data or "username" not in data or "password" not in data:
return jsonify({"error": "Missing username or password"}), 400
username = data["username"]
password = data["password"]
try:
user = auth_manager.authenticate_user(username, password)
if user:
access_token = create_access_token(identity=str(user["id"]))
return jsonify({"access_token": access_token}), 200
else:
return jsonify({"error": "Invalid username or password"}), 401
except Exception as e:
print(traceback.format_exc())
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route("/profile", methods=["GET"])
@jwt_required()
def profile():
current_user = get_jwt_identity()
return jsonify(
message="Access granted", user=auth_manager.get_user_by_id(current_user)
), 200
@app.route("/upload", methods=["POST"])
@jwt_required()
def upload_data():
if "posts" not in request.files or "topics" not in request.files:
return jsonify({"error": "Missing required files or form data"}), 400
post_file = request.files["posts"]
topic_file = request.files["topics"]
if post_file.filename == "" or topic_file.filename == "":
return jsonify({"error": "Empty filename"}), 400
if not post_file.filename.endswith(".jsonl") or not topic_file.filename.endswith(
".json"
):
return jsonify(
{"error": "Invalid file type. Only .jsonl and .json files are allowed."}
), 400
try:
current_user = get_jwt_identity()
posts_df = pd.read_json(post_file, lines=True, convert_dates=False)
topics = json.load(topic_file)
processor = DatasetEnrichment(posts_df, topics)
enriched_df = processor.enrich()
dataset_id = dataset_manager.save_dataset_info(current_user, f"dataset_{current_user}", topics)
dataset_manager.save_dataset_content(dataset_id, enriched_df)
return jsonify(
{
"message": "File uploaded successfully",
"event_count": len(enriched_df),
"dataset_id": dataset_id,
}
), 200
except ValueError as e:
return jsonify({"error": f"Failed to read JSONL file: {str(e)}"}), 400
except Exception as e:
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route("/dataset/<int:dataset_id>", methods=["GET"])
@jwt_required()
def get_dataset(dataset_id):
try:
user_id = get_jwt_identity()
dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id))
filters = get_request_filters()
filtered_dataset = stat_gen.filter_dataset(dataset_content, filters)
return jsonify(filtered_dataset), 200
except NotAuthorisedException:
return jsonify({"error": "User is not authorised to access this content"}), 403
except NotExistentDatasetException:
return jsonify({"error": "Dataset does not exist"}), 404
except Exception:
print(traceback.format_exc())
return jsonify({"error": "An unexpected error occured"}), 500
@app.route("/dataset/<int:dataset_id>/content", methods=["GET"])
@jwt_required()
def content_endpoint(dataset_id):
try:
user_id = get_jwt_identity()
dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id))
filters = get_request_filters()
return jsonify(stat_gen.get_content_analysis(dataset_content, filters)), 200
except NotAuthorisedException:
return jsonify({"error": "User is not authorised to access this content"}), 403
except ValueError as e:
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
except Exception as e:
print(traceback.format_exc())
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route("/dataset/<int:dataset_id>/summary", methods=["GET"])
@jwt_required()
def get_summary(dataset_id):
try:
user_id = get_jwt_identity()
dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id))
filters = get_request_filters()
return jsonify(stat_gen.summary(dataset_content, filters)), 200
except NotAuthorisedException:
return jsonify({"error": "User is not authorised to access this content"}), 403
except ValueError as e:
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
except Exception as e:
print(traceback.format_exc())
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route("/dataset/<int:dataset_id>/time", methods=["GET"])
@jwt_required()
def get_time_analysis(dataset_id):
try:
user_id = get_jwt_identity()
dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id))
filters = get_request_filters()
return jsonify(stat_gen.get_time_analysis(dataset_content, filters)), 200
except NotAuthorisedException:
return jsonify({"error": "User is not authorised to access this content"}), 403
except ValueError as e:
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
except Exception as e:
print(traceback.format_exc())
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route("/dataset/<int:dataset_id>/user", methods=["GET"])
@jwt_required()
def get_user_analysis(dataset_id):
try:
user_id = get_jwt_identity()
dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id))
filters = get_request_filters()
return jsonify(stat_gen.get_user_analysis(dataset_content, filters)), 200
except NotAuthorisedException:
return jsonify({"error": "User is not authorised to access this content"}), 403
except ValueError as e:
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
except Exception as e:
print(traceback.format_exc())
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route("/dataset/<int:dataset_id>/cultural", methods=["GET"])
@jwt_required()
def get_cultural_analysis(dataset_id):
try:
user_id = get_jwt_identity()
dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id))
filters = get_request_filters()
return jsonify(stat_gen.get_cultural_analysis(dataset_content, filters)), 200
except NotAuthorisedException:
return jsonify({"error": "User is not authorised to access this content"}), 403
except ValueError as e:
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
except Exception as e:
print(traceback.format_exc())
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route("/dataset/<int:dataset_id>/interaction", methods=["GET"])
@jwt_required()
def get_interaction_analysis(dataset_id):
try:
user_id = get_jwt_identity()
dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id))
filters = get_request_filters()
return jsonify(stat_gen.get_interactional_analysis(dataset_content, filters)), 200
except NotAuthorisedException:
return jsonify({"error": "User is not authorised to access this content"}), 403
except ValueError as e:
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
except Exception as e:
print(traceback.format_exc())
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
if __name__ == "__main__":
app.run(debug=True)

48
server/core/auth.py Normal file
View File

@@ -0,0 +1,48 @@
from server.db.database import PostgresConnector
from flask_bcrypt import Bcrypt
class AuthManager:
def __init__(self, db: PostgresConnector, bcrypt: Bcrypt):
self.db = db
self.bcrypt = bcrypt
# private
def _save_user(self, username, email, password_hash):
query = """
INSERT INTO users (username, email, password_hash)
VALUES (%s, %s, %s)
"""
self.db.execute(query, (username, email, password_hash))
# public
def register_user(self, username, email, password):
hashed_password = self.bcrypt.generate_password_hash(password).decode("utf-8")
if self.get_user_by_email(email):
raise ValueError("Email already registered")
if self.get_user_by_username(username):
raise ValueError("Username already taken")
self._save_user(username, email, hashed_password)
def authenticate_user(self, username, password):
user = self.get_user_by_username(username)
if user and self.bcrypt.check_password_hash(user['password_hash'], password):
return user
return None
def get_user_by_id(self, user_id):
query = "SELECT id, username, email FROM users WHERE id = %s"
result = self.db.execute(query, (user_id,), fetch=True)
return result[0] if result else None
def get_user_by_username(self, username) -> dict:
query = "SELECT id, username, email, password_hash FROM users WHERE username = %s"
result = self.db.execute(query, (username,), fetch=True)
return result[0] if result else None
def get_user_by_email(self, email) -> dict:
query = "SELECT id, username, email, password_hash FROM users WHERE email = %s"
result = self.db.execute(query, (email,), fetch=True)
return result[0] if result else None

103
server/core/datasets.py Normal file
View File

@@ -0,0 +1,103 @@
import pandas as pd
from server.db.database import PostgresConnector
from psycopg2.extras import Json
from server.exceptions import NotAuthorisedException
class DatasetManager:
def __init__(self, db: PostgresConnector):
self.db = db
def get_dataset_and_validate(self, dataset_id: int, user_id: int) -> pd.DataFrame:
dataset_info = self.get_dataset_info(dataset_id)
if dataset_info.get("user_id") != user_id:
raise NotAuthorisedException("This user is not authorised to access this dataset")
return self.get_dataset_content(dataset_id)
def get_dataset_content(self, dataset_id: int) -> pd.DataFrame:
query = "SELECT * FROM events WHERE dataset_id = %s"
result = self.db.execute(query, (dataset_id,), fetch=True)
return pd.DataFrame(result)
def get_dataset_info(self, dataset_id: int) -> dict:
query = "SELECT * FROM datasets WHERE id = %s"
result = self.db.execute(query, (dataset_id,), fetch=True)
return result[0] if result else None
def save_dataset_info(self, user_id: int, dataset_name: str, topics: dict) -> int:
query = """
INSERT INTO datasets (user_id, name, topics)
VALUES (%s, %s, %s)
RETURNING id
"""
result = self.db.execute(query, (user_id, dataset_name, Json(topics)), fetch=True)
return result[0]["id"] if result else None
def get_dataset_content(self, dataset_id: int) -> pd.DataFrame:
query = "SELECT * FROM events WHERE dataset_id = %s"
result = self.db.execute(query, (dataset_id,), fetch=True)
return pd.DataFrame(result)
def save_dataset_content(self, dataset_id: int, event_data: pd.DataFrame):
if event_data.empty:
return
query = """
INSERT INTO events (
dataset_id,
type,
parent_id,
author,
content,
timestamp,
date,
dt,
hour,
weekday,
reply_to,
source,
topic,
topic_confidence,
ner_entities,
emotion_anger,
emotion_disgust,
emotion_fear,
emotion_joy,
emotion_sadness
)
VALUES (
%s, %s, %s, %s, %s,
%s, %s, %s, %s, %s,
%s, %s, %s, %s, %s,
%s, %s, %s, %s, %s
)
"""
values = [
(
dataset_id,
row["type"],
row["parent_id"],
row["author"],
row["content"],
row["timestamp"],
row["date"],
row["dt"],
row["hour"],
row["weekday"],
row.get("reply_to"),
row["source"],
row.get("topic"),
row.get("topic_confidence"),
Json(row["ner_entities"]) if row.get("ner_entities") else None,
row.get("emotion_anger"),
row.get("emotion_disgust"),
row.get("emotion_fear"),
row.get("emotion_joy"),
row.get("emotion_sadness"),
)
for _, row in event_data.iterrows()
]
self.db.execute_batch(query, values)