diff --git a/server/app.py b/server/app.py index 900469f..a92f4a7 100644 --- a/server/app.py +++ b/server/app.py @@ -126,7 +126,7 @@ def upload_data(): ), 400 try: - current_user = get_jwt_identity() + current_user = int(get_jwt_identity()) posts_df = pd.read_json(post_file, lines=True, convert_dates=False) topics = json.load(topic_file) @@ -150,13 +150,16 @@ def upload_data(): except Exception as e: return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500 - @app.route("/dataset/", methods=["GET"]) @jwt_required() def get_dataset(dataset_id): try: - user_id = get_jwt_identity() - dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id)) + user_id = int(get_jwt_identity()) + + if not dataset_manager.authorize_user_dataset(dataset_id, user_id): + raise NotAuthorisedException("This user is not authorised to access this dataset") + + dataset_content = dataset_manager.get_dataset_content(dataset_id) filters = get_request_filters() filtered_dataset = stat_gen.filter_dataset(dataset_content, filters) return jsonify(filtered_dataset), 200 @@ -168,13 +171,34 @@ def get_dataset(dataset_id): print(traceback.format_exc()) return jsonify({"error": "An unexpected error occured"}), 500 +@app.route("/dataset//status", methods=["GET"]) +@jwt_required() +def get_dataset_status(dataset_id): + try: + user_id = int(get_jwt_identity()) + + if not dataset_manager.authorize_user_dataset(dataset_id, user_id): + raise NotAuthorisedException("This user is not authorised to access this dataset") + + dataset_status = dataset_manager.get_dataset_status(dataset_id) + return jsonify(dataset_status), 200 + except NotAuthorisedException: + return jsonify({"error": "User is not authorised to access this content"}), 403 + except NonExistentDatasetException: + return jsonify({"error": "Dataset does not exist"}), 404 + except Exception: + print(traceback.format_exc()) + return jsonify({"error": "An unexpected error occured"}), 500 @app.route("/dataset//content", methods=["GET"]) @jwt_required() def content_endpoint(dataset_id): try: - user_id = get_jwt_identity() - dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id)) + user_id = int(get_jwt_identity()) + if not dataset_manager.authorize_user_dataset(dataset_id, user_id): + raise NotAuthorisedException("This user is not authorised to access this dataset") + + dataset_content = dataset_manager.get_dataset_content(dataset_id) filters = get_request_filters() return jsonify(stat_gen.get_content_analysis(dataset_content, filters)), 200 except NotAuthorisedException: @@ -190,8 +214,11 @@ def content_endpoint(dataset_id): @jwt_required() def get_summary(dataset_id): try: - user_id = get_jwt_identity() - dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id)) + user_id = int(get_jwt_identity()) + if not dataset_manager.authorize_user_dataset(dataset_id, user_id): + raise NotAuthorisedException("This user is not authorised to access this dataset") + + dataset_content = dataset_manager.get_dataset_content(dataset_id) filters = get_request_filters() return jsonify(stat_gen.summary(dataset_content, filters)), 200 except NotAuthorisedException: @@ -207,8 +234,11 @@ def get_summary(dataset_id): @jwt_required() def get_time_analysis(dataset_id): try: - user_id = get_jwt_identity() - dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id)) + user_id = int(get_jwt_identity()) + if not dataset_manager.authorize_user_dataset(dataset_id, user_id): + raise NotAuthorisedException("This user is not authorised to access this dataset") + + dataset_content = dataset_manager.get_dataset_content(dataset_id) filters = get_request_filters() return jsonify(stat_gen.get_time_analysis(dataset_content, filters)), 200 except NotAuthorisedException: @@ -224,8 +254,11 @@ def get_time_analysis(dataset_id): @jwt_required() def get_user_analysis(dataset_id): try: - user_id = get_jwt_identity() - dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id)) + user_id = int(get_jwt_identity()) + if not dataset_manager.authorize_user_dataset(dataset_id, user_id): + raise NotAuthorisedException("This user is not authorised to access this dataset") + + dataset_content = dataset_manager.get_dataset_content(dataset_id) filters = get_request_filters() return jsonify(stat_gen.get_user_analysis(dataset_content, filters)), 200 except NotAuthorisedException: @@ -241,8 +274,11 @@ def get_user_analysis(dataset_id): @jwt_required() def get_cultural_analysis(dataset_id): try: - user_id = get_jwt_identity() - dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id)) + user_id = int(get_jwt_identity()) + if not dataset_manager.authorize_user_dataset(dataset_id, user_id): + raise NotAuthorisedException("This user is not authorised to access this dataset") + + dataset_content = dataset_manager.get_dataset_content(dataset_id) filters = get_request_filters() return jsonify(stat_gen.get_cultural_analysis(dataset_content, filters)), 200 except NotAuthorisedException: @@ -258,8 +294,11 @@ def get_cultural_analysis(dataset_id): @jwt_required() def get_interaction_analysis(dataset_id): try: - user_id = get_jwt_identity() - dataset_content = dataset_manager.get_dataset_and_validate(dataset_id, int(user_id)) + user_id = int(get_jwt_identity()) + if not dataset_manager.authorize_user_dataset(dataset_id, user_id): + raise NotAuthorisedException("This user is not authorised to access this dataset") + + dataset_content = dataset_manager.get_dataset_content(dataset_id) filters = get_request_filters() return jsonify(stat_gen.get_interactional_analysis(dataset_content, filters)), 200 except NotAuthorisedException: diff --git a/server/core/datasets.py b/server/core/datasets.py index a1835e6..541db5d 100644 --- a/server/core/datasets.py +++ b/server/core/datasets.py @@ -7,13 +7,16 @@ class DatasetManager: def __init__(self, db: PostgresConnector): self.db = db - def get_dataset_and_validate(self, dataset_id: int, user_id: int) -> pd.DataFrame: + def authorize_user_dataset(self, dataset_id: int, user_id: int) -> bool: dataset_info = self.get_dataset_info(dataset_id) + if dataset_info.get("user_id", None) == None: + return False + if dataset_info.get("user_id") != user_id: - raise NotAuthorisedException("This user is not authorised to access this dataset") + return False - return self.get_dataset_content(dataset_id) + return True def get_dataset_content(self, dataset_id: int) -> pd.DataFrame: query = "SELECT * FROM events WHERE dataset_id = %s" @@ -102,4 +105,36 @@ class DatasetManager: for _, row in event_data.iterrows() ] - self.db.execute_batch(query, values) \ No newline at end of file + self.db.execute_batch(query, values) + + def set_dataset_status(self, dataset_id: int, status: str, status_message: str | None = None): + if status not in ["processing", "complete", "error"]: + raise ValueError("Invalid status") + + query = """ + UPDATE datasets + SET status = %s, + status_message = %s, + completed_at = CASE + WHEN %s = 'complete' THEN NOW() + ELSE NULL + END + WHERE id = %s + """ + + self.db.execute(query, (status, status_message, status, dataset_id)) + + def get_dataset_status(self, dataset_id: int): + query = """ + SELECT status, status_message, completed_at + FROM datasets + WHERE id = %s + """ + + result = self.db.execute(query, (dataset_id, ), fetch=True) + + if not result: + print(result) + raise NonExistentDatasetException(f"Dataset {dataset_id} does not exist") + + return result[0] \ No newline at end of file diff --git a/server/queue/tasks.py b/server/queue/tasks.py index 6076581..f2f3268 100644 --- a/server/queue/tasks.py +++ b/server/queue/tasks.py @@ -5,15 +5,20 @@ from server.analysis.enrichment import DatasetEnrichment @celery.task(bind=True, max_retries=3) def process_dataset(self, dataset_id: int, posts: list, topics: dict): - from server.db.database import PostgresConnector - from server.core.datasets import DatasetManager - db = PostgresConnector() - dataset_manager = DatasetManager(db) + try: + from server.db.database import PostgresConnector + from server.core.datasets import DatasetManager - df = pd.DataFrame(posts) + db = PostgresConnector() + dataset_manager = DatasetManager(db) - processor = DatasetEnrichment(df, topics) - enriched_df = processor.enrich() + df = pd.DataFrame(posts) - dataset_manager.save_dataset_content(dataset_id, enriched_df) \ No newline at end of file + processor = DatasetEnrichment(df, topics) + enriched_df = processor.enrich() + + dataset_manager.save_dataset_content(dataset_id, enriched_df) + dataset_manager.set_dataset_status(dataset_id, "complete", "NLP Processing Completed Successfully") + except Exception as e: + dataset_manager.set_dataset_status(dataset_id, "error", f"An error occurred: {e}") \ No newline at end of file