At the moment only allows for the updating of the name. Which seems to be the only editable part of dataset metadata.
365 lines
14 KiB
Python
365 lines
14 KiB
Python
import os
|
|
import pandas as pd
|
|
import traceback
|
|
import json
|
|
|
|
from dotenv import load_dotenv
|
|
from flask import Flask, jsonify, request
|
|
from flask_cors import CORS
|
|
from flask_bcrypt import Bcrypt
|
|
from flask_jwt_extended import (
|
|
JWTManager,
|
|
create_access_token,
|
|
jwt_required,
|
|
get_jwt_identity,
|
|
)
|
|
|
|
from server.analysis.stat_gen import StatGen
|
|
from server.exceptions import NotAuthorisedException, NonExistentDatasetException
|
|
from server.db.database import PostgresConnector
|
|
from server.core.auth import AuthManager
|
|
from server.core.datasets import DatasetManager
|
|
from server.utils import get_request_filters
|
|
from server.queue.tasks import process_dataset
|
|
|
|
app = Flask(__name__)
|
|
|
|
# Env Variables
|
|
load_dotenv()
|
|
frontend_url = os.getenv("FRONTEND_URL", "http://localhost:5173")
|
|
jwt_secret_key = os.getenv("JWT_SECRET_KEY", "super-secret-change-this")
|
|
jwt_access_token_expires = int(
|
|
os.getenv("JWT_ACCESS_TOKEN_EXPIRES", 1200)
|
|
) # Default to 20 minutes
|
|
|
|
# Flask Configuration
|
|
CORS(app, resources={r"/*": {"origins": frontend_url}})
|
|
app.config["JWT_SECRET_KEY"] = jwt_secret_key
|
|
app.config["JWT_ACCESS_TOKEN_EXPIRES"] = jwt_access_token_expires
|
|
|
|
bcrypt = Bcrypt(app)
|
|
jwt = JWTManager(app)
|
|
|
|
db = PostgresConnector()
|
|
auth_manager = AuthManager(db, bcrypt)
|
|
dataset_manager = DatasetManager(db)
|
|
stat_gen = StatGen()
|
|
|
|
|
|
@app.route("/register", methods=["POST"])
|
|
def register_user():
|
|
data = request.get_json()
|
|
|
|
if (
|
|
not data
|
|
or "username" not in data
|
|
or "email" not in data
|
|
or "password" not in data
|
|
):
|
|
return jsonify({"error": "Missing username, email, or password"}), 400
|
|
|
|
username = data["username"]
|
|
email = data["email"]
|
|
password = data["password"]
|
|
|
|
try:
|
|
auth_manager.register_user(username, email, password)
|
|
except ValueError as e:
|
|
return jsonify({"error": str(e)}), 400
|
|
except Exception as e:
|
|
print(traceback.format_exc())
|
|
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
|
|
|
print(f"Registered new user: {username}")
|
|
return jsonify({"message": f"User '{username}' registered successfully"}), 200
|
|
|
|
|
|
@app.route("/login", methods=["POST"])
|
|
def login_user():
|
|
data = request.get_json()
|
|
|
|
if not data or "username" not in data or "password" not in data:
|
|
return jsonify({"error": "Missing username or password"}), 400
|
|
|
|
username = data["username"]
|
|
password = data["password"]
|
|
|
|
try:
|
|
user = auth_manager.authenticate_user(username, password)
|
|
if user:
|
|
access_token = create_access_token(identity=str(user["id"]))
|
|
return jsonify({"access_token": access_token}), 200
|
|
else:
|
|
return jsonify({"error": "Invalid username or password"}), 401
|
|
except Exception as e:
|
|
print(traceback.format_exc())
|
|
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
|
|
|
|
|
@app.route("/profile", methods=["GET"])
|
|
@jwt_required()
|
|
def profile():
|
|
current_user = get_jwt_identity()
|
|
|
|
return jsonify(
|
|
message="Access granted", user=auth_manager.get_user_by_id(current_user)
|
|
), 200
|
|
|
|
@app.route("/user/datasets")
|
|
@jwt_required()
|
|
def get_user_datasets():
|
|
current_user = int(get_jwt_identity())
|
|
return jsonify(dataset_manager.get_user_datasets(current_user)), 200
|
|
|
|
@app.route("/upload", methods=["POST"])
|
|
@jwt_required()
|
|
def upload_data():
|
|
if "posts" not in request.files or "topics" not in request.files:
|
|
return jsonify({"error": "Missing required files or form data"}), 400
|
|
|
|
post_file = request.files["posts"]
|
|
topic_file = request.files["topics"]
|
|
dataset_name = (request.form.get("name") or "").strip()
|
|
|
|
if not dataset_name:
|
|
return jsonify({"error": "Missing required dataset name"}), 400
|
|
|
|
if post_file.filename == "" or topic_file.filename == "":
|
|
return jsonify({"error": "Empty filename"}), 400
|
|
|
|
if not post_file.filename.endswith(".jsonl") or not topic_file.filename.endswith(
|
|
".json"
|
|
):
|
|
return jsonify(
|
|
{"error": "Invalid file type. Only .jsonl and .json files are allowed."}
|
|
), 400
|
|
|
|
try:
|
|
current_user = int(get_jwt_identity())
|
|
|
|
posts_df = pd.read_json(post_file, lines=True, convert_dates=False)
|
|
topics = json.load(topic_file)
|
|
dataset_id = dataset_manager.save_dataset_info(current_user, dataset_name, topics)
|
|
|
|
process_dataset.delay(dataset_id, posts_df.to_dict(orient="records"), topics)
|
|
|
|
return jsonify(
|
|
{
|
|
"message": "Dataset queued for processing",
|
|
"dataset_id": dataset_id,
|
|
"status": "processing",
|
|
}
|
|
), 202
|
|
except ValueError as e:
|
|
return jsonify({"error": f"Failed to read JSONL file: {str(e)}"}), 400
|
|
except Exception as e:
|
|
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
|
|
|
@app.route("/dataset/<int:dataset_id>", methods=["GET"])
|
|
@jwt_required()
|
|
def get_dataset(dataset_id):
|
|
try:
|
|
user_id = int(get_jwt_identity())
|
|
|
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
|
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
|
|
|
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
|
filters = get_request_filters()
|
|
filtered_dataset = stat_gen.filter_dataset(dataset_content, filters)
|
|
return jsonify(filtered_dataset), 200
|
|
except NotAuthorisedException:
|
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
|
except NonExistentDatasetException:
|
|
return jsonify({"error": "Dataset does not exist"}), 404
|
|
except Exception:
|
|
print(traceback.format_exc())
|
|
return jsonify({"error": "An unexpected error occured"}), 500
|
|
|
|
@app.route("/dataset/<int:dataset_id>", methods=["PATCH"])
|
|
@jwt_required()
|
|
def update_dataset(dataset_id):
|
|
try:
|
|
user_id = int(get_jwt_identity())
|
|
|
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
|
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
|
|
|
body = request.get_json()
|
|
new_name = body.get("name")
|
|
|
|
if not new_name or not new_name.strip():
|
|
return jsonify({"error": "A valid name must be provided"}), 400
|
|
|
|
dataset_manager.update_dataset_name(dataset_id, new_name.strip())
|
|
return jsonify({"message": f"Dataset {dataset_id} renamed to '{new_name.strip()}'"}), 200
|
|
except NotAuthorisedException:
|
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
|
except NonExistentDatasetException:
|
|
return jsonify({"error": "Dataset does not exist"}), 404
|
|
except Exception:
|
|
print(traceback.format_exc())
|
|
return jsonify({"error": "An unexpected error occurred"}), 500
|
|
|
|
@app.route("/dataset/<int:dataset_id>", methods=["DELETE"])
|
|
@jwt_required()
|
|
def delete_dataset(dataset_id):
|
|
try:
|
|
user_id = int(get_jwt_identity())
|
|
|
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
|
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
|
|
|
dataset_manager.delete_dataset_info(dataset_id)
|
|
dataset_manager.delete_dataset_content(dataset_id)
|
|
return jsonify({"message": f"Dataset {dataset_id} metadata and content successfully deleted"}), 200
|
|
except NotAuthorisedException:
|
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
|
except NonExistentDatasetException:
|
|
return jsonify({"error": "Dataset does not exist"}), 404
|
|
except Exception:
|
|
print(traceback.format_exc())
|
|
return jsonify({"error": "An unexpected error occured"}), 500
|
|
|
|
@app.route("/dataset/<int:dataset_id>/status", methods=["GET"])
|
|
@jwt_required()
|
|
def get_dataset_status(dataset_id):
|
|
try:
|
|
user_id = int(get_jwt_identity())
|
|
|
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
|
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
|
|
|
dataset_status = dataset_manager.get_dataset_status(dataset_id)
|
|
return jsonify(dataset_status), 200
|
|
except NotAuthorisedException:
|
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
|
except NonExistentDatasetException:
|
|
return jsonify({"error": "Dataset does not exist"}), 404
|
|
except Exception:
|
|
print(traceback.format_exc())
|
|
return jsonify({"error": "An unexpected error occured"}), 500
|
|
|
|
@app.route("/dataset/<int:dataset_id>/content", methods=["GET"])
|
|
@jwt_required()
|
|
def content_endpoint(dataset_id):
|
|
try:
|
|
user_id = int(get_jwt_identity())
|
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
|
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
|
|
|
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
|
filters = get_request_filters()
|
|
return jsonify(stat_gen.get_content_analysis(dataset_content, filters)), 200
|
|
except NotAuthorisedException:
|
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
|
except ValueError as e:
|
|
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
|
except Exception as e:
|
|
print(traceback.format_exc())
|
|
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
|
|
|
|
|
@app.route("/dataset/<int:dataset_id>/summary", methods=["GET"])
|
|
@jwt_required()
|
|
def get_summary(dataset_id):
|
|
try:
|
|
user_id = int(get_jwt_identity())
|
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
|
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
|
|
|
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
|
filters = get_request_filters()
|
|
return jsonify(stat_gen.summary(dataset_content, filters)), 200
|
|
except NotAuthorisedException:
|
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
|
except ValueError as e:
|
|
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
|
except Exception as e:
|
|
print(traceback.format_exc())
|
|
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
|
|
|
|
|
@app.route("/dataset/<int:dataset_id>/time", methods=["GET"])
|
|
@jwt_required()
|
|
def get_time_analysis(dataset_id):
|
|
try:
|
|
user_id = int(get_jwt_identity())
|
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
|
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
|
|
|
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
|
filters = get_request_filters()
|
|
return jsonify(stat_gen.get_time_analysis(dataset_content, filters)), 200
|
|
except NotAuthorisedException:
|
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
|
except ValueError as e:
|
|
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
|
except Exception as e:
|
|
print(traceback.format_exc())
|
|
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
|
|
|
|
|
@app.route("/dataset/<int:dataset_id>/user", methods=["GET"])
|
|
@jwt_required()
|
|
def get_user_analysis(dataset_id):
|
|
try:
|
|
user_id = int(get_jwt_identity())
|
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
|
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
|
|
|
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
|
filters = get_request_filters()
|
|
return jsonify(stat_gen.get_user_analysis(dataset_content, filters)), 200
|
|
except NotAuthorisedException:
|
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
|
except ValueError as e:
|
|
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
|
except Exception as e:
|
|
print(traceback.format_exc())
|
|
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
|
|
|
|
|
@app.route("/dataset/<int:dataset_id>/cultural", methods=["GET"])
|
|
@jwt_required()
|
|
def get_cultural_analysis(dataset_id):
|
|
try:
|
|
user_id = int(get_jwt_identity())
|
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
|
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
|
|
|
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
|
filters = get_request_filters()
|
|
return jsonify(stat_gen.get_cultural_analysis(dataset_content, filters)), 200
|
|
except NotAuthorisedException:
|
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
|
except ValueError as e:
|
|
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
|
except Exception as e:
|
|
print(traceback.format_exc())
|
|
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
|
|
|
|
|
@app.route("/dataset/<int:dataset_id>/interaction", methods=["GET"])
|
|
@jwt_required()
|
|
def get_interaction_analysis(dataset_id):
|
|
try:
|
|
user_id = int(get_jwt_identity())
|
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
|
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
|
|
|
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
|
filters = get_request_filters()
|
|
return jsonify(stat_gen.get_interactional_analysis(dataset_content, filters)), 200
|
|
except NotAuthorisedException:
|
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
|
except ValueError as e:
|
|
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
|
except Exception as e:
|
|
print(traceback.format_exc())
|
|
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
|
|
|
|
|
if __name__ == "__main__":
|
|
app.run(debug=True)
|