feat: update schema to include posts and comments

feat: add profile endpoint to view user details
feat: add login endpoint
2026-02-23 22:53:15 +00:00 · 2026-02-23 22:43:55 +00:00 · 2026-02-23 22:40:26 +00:00 · 2026-02-23 22:36:07 +00:00 · 2026-02-23 22:27:46 +00:00 · 2026-02-23 22:27:32 +00:00
11 changed files with 411 additions and 21 deletions
--- a/db/database.py
+++ b/db/database.py
@@ -0,0 +1,52 @@
 import os
 import psycopg2
 from psycopg2.extras import RealDictCursor
 class PostgresConnector:
    """
    Simple PostgreSQL connector (single connection).
    """
    def __init__(self):
        self.connection = psycopg2.connect(
            host=os.getenv("POSTGRES_HOST", "localhost"),
            port=os.getenv("POSTGRES_PORT", 5432),
            user=os.getenv("POSTGRES_USER", "postgres"),
            password=os.getenv("POSTGRES_PASSWORD", "postgres"),
            database=os.getenv("POSTGRES_DB", "postgres"),
        )
        self.connection.autocommit = False
    def execute(self, query, params=None, fetch=False):
        with self.connection.cursor(cursor_factory=RealDictCursor) as cursor:
            cursor.execute(query, params)
            if fetch:
                return cursor.fetchall()
            self.connection.commit()
    def executemany(self, query, param_list):
        with self.connection.cursor(cursor_factory=RealDictCursor) as cursor:
            cursor.executemany(query, param_list)
            self.connection.commit()
    def save_user(self, username, email, password_hash):
        query = """
            INSERT INTO users (username, email, password_hash)
            VALUES (%s, %s, %s)
        """
        self.execute(query, (username, email, password_hash))
    def get_user_by_username(self, username) -> dict:
        query = "SELECT id, username, email, password_hash FROM users WHERE username = %s"
        result = self.execute(query, (username,), fetch=True)
        return result[0] if result else None
    def get_user_by_email(self, email) -> dict:
        query = "SELECT id, username, email, password_hash FROM users WHERE email = %s"
        result = self.execute(query, (email,), fetch=True)
        return result[0] if result else None
    def close(self):
        if self.connection:
            self.connection.close()
--- a/db/schema.sql
+++ b/db/schema.sql
@@ -0,0 +1,35 @@
 CREATE TABLE users (
    id SERIAL PRIMARY KEY,
    username VARCHAR(255) NOT NULL UNIQUE,
    email VARCHAR(255) NOT NULL UNIQUE,
    password_hash VARCHAR(255) NOT NULL,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
 CREATE TABLE has_access (
    id SERIAL PRIMARY KEY,
    user_id INTEGER NOT NULL,
    post_id INTEGER NOT NULL,
    FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
    FOREIGN KEY (post_id) REFERENCES posts(id) ON DELETE CASCADE
 );
 CREATE TABLE posts (
    id SERIAL PRIMARY KEY,
    author VARCHAR(255) NOT NULL,
    title VARCHAR(255) NOT NULL,
    content TEXT NOT NULL,
    created_at TIMESTAMP NOT NULL,
    source VARCHAR(255) NOT NULL
 );
 CREATE TABLE comments (
    id SERIAL PRIMARY KEY,
    post_id INTEGER NOT NULL,
    author VARCHAR(255) NOT NULL,
    content TEXT NOT NULL,
    created_at TIMESTAMP NOT NULL,
    reply_to VARCHAR(255),
    source VARCHAR(255) NOT NULL,
    FOREIGN KEY (post_id) REFERENCES posts(id) ON DELETE CASCADE
 );
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,15 @@
 services:
  postgres:
    image: postgres:16
    container_name: postgres_db
    restart: unless-stopped
    env_file:
      - .env
    ports:
      - "5432:5432"
    volumes:
      - ./db/postgres_vol:/var/lib/postgresql/data
      - ./db/schema.sql:/docker-entrypoint-initdb.d/schema.sql
 volumes:
  postgres_data:
--- a/frontend/src/components/InteractionStats.tsx
+++ b/frontend/src/components/InteractionStats.tsx
@@ -34,7 +34,7 @@ function ApiToGraphData(apiData: InteractionGraph) {
 }
-const InteractionStats = (props: { data: UserAnalysisResponse }) => {
+const UserStats = (props: { data: UserAnalysisResponse }) => {
  const graphData = ApiToGraphData(props.data.interaction_graph);
  return (
@@ -44,7 +44,7 @@ const InteractionStats = (props: { data: UserAnalysisResponse }) => {
            This graph visualizes interactions between users based on comments and replies. 
            Nodes represent users, and edges represent interactions (e.g., comments or replies) between them.
        </p>
-        <div style={{ height: "600px", border: "1px solid #ccc", borderRadius: 8, marginTop: 16 }}>
+        <div>
            <ForceGraph3D
                graphData={graphData}
                nodeAutoColorBy="id"
@@ -58,4 +58,4 @@ const InteractionStats = (props: { data: UserAnalysisResponse }) => {
  );
 }
-export default InteractionStats;
+export default UserStats;
--- a/frontend/src/pages/Stats.tsx
+++ b/frontend/src/pages/Stats.tsx
@@ -3,7 +3,7 @@ import axios from "axios";
 import StatsStyling from "../styles/stats_styling";
 import SummaryStats from "../components/SummaryStats";
 import EmotionalStats from "../components/EmotionalStats";
-import InteractionStats from "../components/InteractionStats";
+import InteractionStats from "../components/UserStats";
 import { 
  type SummaryResponse, 
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,12 @@
 beautifulsoup4==4.14.3
-Flask==3.1.2
+Flask==3.1.3
 flask_cors==6.0.2
 google_api_python_client==2.188.0
 keybert==0.9.0
 nltk==3.9.2
-pandas==3.0.0
+numpy==2.4.2
 pandas==3.0.1
 psycopg2==2.9.11
 psycopg2_binary==2.9.11
 python-dotenv==1.2.1
 Requests==2.32.5
 sentence_transformers==5.2.2
--- a/server/analysis/interactional.py
+++ b/server/analysis/interactional.py
@@ -123,4 +123,86 @@ class InteractionAnalysis:
            interactions[a][b] = interactions[a].get(b, 0) + 1
-        return interactions
+        return interactions
    def average_thread_depth(self):
        depths = []
        id_to_reply = self.df.set_index("id")["reply_to"].to_dict()
        for _, row in self.df.iterrows():
            depth = 0
            current_id = row["id"]
            while True:
                reply_to = id_to_reply.get(current_id)
                if pd.isna(reply_to) or reply_to == "":
                    break
                depth += 1
                current_id = reply_to
            depths.append(depth)
        if not depths:
            return 0
        return round(sum(depths) / len(depths), 2)
    def average_thread_length_by_emotion(self):
        emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
        emotion_cols = [
            c for c in self.df.columns
            if c.startswith("emotion_") and c not in emotion_exclusions
        ]
        id_to_reply = self.df.set_index("id")["reply_to"].to_dict()
        length_cache = {}
        def thread_length_from(start_id):
            if start_id in length_cache:
                return length_cache[start_id]
            seen = set()
            length = 1
            current = start_id
            while True:
                if current in seen:
                    # infinite loop shouldn't happen, but just in case
                    break
                seen.add(current)
                reply_to = id_to_reply.get(current)
                if reply_to is None or (isinstance(reply_to, float) and pd.isna(reply_to)) or reply_to == "":
                    break
                length += 1
                current = reply_to
                if current in length_cache:
                    length += (length_cache[current] - 1)
                    break
            length_cache[start_id] = length
            return length
        emotion_to_lengths = {}
        # Fill NaNs in emotion cols to avoid max() issues
        emo_df = self.df[["id"] + emotion_cols].copy()
        emo_df[emotion_cols] = emo_df[emotion_cols].fillna(0)
        for _, row in emo_df.iterrows():
            msg_id = row["id"]
            length = thread_length_from(msg_id)
            emotions = {c: row[c] for c in emotion_cols}
            dominant = max(emotions, key=emotions.get)
            emotion_to_lengths.setdefault(dominant, []).append(length)
        return {
            emotion: round(sum(lengths) / len(lengths), 2)
            for emotion, lengths in emotion_to_lengths.items()
        }
--- a/server/analysis/linguistic.py
+++ b/server/analysis/linguistic.py
@@ -9,6 +9,10 @@ class LinguisticAnalysis:
        self.df = df
        self.word_exclusions = word_exclusions
    def _tokenize(self, text: str):
        tokens = re.findall(r"\b[a-z]{3,}\b", text)
        return [t for t in tokens if t not in self.word_exclusions]
    def _clean_text(self, text: str) -> str:
        text = re.sub(r"http\S+", "", text)        # remove URLs
        text = re.sub(r"www\S+", "", text)
@@ -65,4 +69,45 @@ class LinguisticAnalysis:
            .sort_values("count", ascending=False)
            .head(limit)
            .to_dict(orient="records")
-        )
+        )
    def identity_markers(self):
        df = self.df.copy()
        df["content"] = df["content"].fillna("").astype(str).str.lower()
        in_group_words = {"we", "us", "our", "ourselves"}
        out_group_words = {"they", "them", "their", "themselves"}
        emotion_exclusions = [
            "emotion_neutral",
            "emotion_surprise"
        ]
        emotion_cols = [
            col for col in self.df.columns
            if col.startswith("emotion_") and col not in emotion_exclusions
        ]
        in_count = 0
        out_count = 0
        in_emotions = {e: 0 for e in emotion_cols}
        out_emotions = {e: 0 for e in emotion_cols}
        total = 0
        for post in df:
            text = post["content"]
            tokens = re.findall(r"\b[a-z]{2,}\b", text)
            total += len(tokens)
            in_count += sum(t in in_group_words for t in tokens)
            out_count += sum(t in out_group_words for t in tokens)
            emotions = post[emotion_cols]
            print(emotions)
        return {
            "in_group_usage": in_count,
            "out_group_usage": out_count,
            "in_group_ratio": round(in_count / max(total, 1), 5),
            "out_group_ratio": round(out_count / max(total, 1), 5),
        }
--- a/server/app.py
+++ b/server/app.py
@@ -1,21 +1,102 @@
 import os
 from dotenv import load_dotenv
 from flask import Flask, jsonify, request
 from flask_cors import CORS
 from flask_bcrypt import Bcrypt
 from flask_jwt_extended import (
    JWTManager,
    create_access_token,
    jwt_required,
    get_jwt_identity
 )
 from server.stat_gen import StatGen
 from db.database import PostgresConnector
 from server.auth import AuthManager
 import pandas as pd
 import traceback
 import json
 app = Flask(__name__)
 db = PostgresConnector()
-# Allow for CORS from localhost:5173
+# Env Variables
-CORS(app, resources={r"/*": {"origins": "http://localhost:5173"}})
+load_dotenv()
 frontend_url = os.getenv("FRONTEND_URL", "http://localhost:5173")
 jwt_secret_key = os.getenv("JWT_SECRET_KEY", "super-secret-change-this")
 jwt_access_token_expires = int(os.getenv("JWT_ACCESS_TOKEN_EXPIRES", 1200)) # Default to 20 minutes
 # Flask Configuration
 CORS(app, resources={r"/*": {"origins": frontend_url}})
 app.config["JWT_SECRET_KEY"] = jwt_secret_key
 app.config["JWT_ACCESS_TOKEN_EXPIRES"] = jwt_access_token_expires 
 bcrypt = Bcrypt(app)
 jwt = JWTManager(app)
 auth_manager = AuthManager(db, bcrypt)
 # Global State
-posts_df = pd.read_json('small.jsonl', lines=True)
+# posts_df = pd.read_json('small.jsonl', lines=True)
-with open("topic_buckets.json", "r", encoding="utf-8") as f:
+# with open("topic_buckets.json", "r", encoding="utf-8") as f:
-    domain_topics = json.load(f)
+#     domain_topics = json.load(f)
-stat_obj = StatGen(posts_df, domain_topics)
+# stat_obj = StatGen(posts_df, domain_topics)
 stat_obj = None
@app.route('/register', methods=['POST'])
 def register_user():
    data = request.get_json()
    if not data or "username" not in data or "email" not in data or "password" not in data: 
        return jsonify({"error": "Missing username, email, or password"}), 400
    username = data["username"]
    email = data["email"]
    password = data["password"]
    try:
        auth_manager.register_user(username, email, password)
    except ValueError as e:
        return jsonify({"error": str(e)}), 400
    except Exception as e:
        print(traceback.format_exc())
        return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
    print(f"Registered new user: {username}")
    return jsonify({"message": f"User '{username}' registered successfully"}), 200
@app.route('/login', methods=['POST'])
 def login_user():
    data = request.get_json()
    if not data or "username" not in data or "password" not in data:
        return jsonify({"error": "Missing username or password"}), 400
    username = data["username"]
    password = data["password"]
    try:
        user = auth_manager.authenticate_user(username, password)
        if user:
            access_token = create_access_token(identity=str(user['id']))
            return jsonify({"access_token": access_token}), 200
        else:
            return jsonify({"error": "Invalid username or password"}), 401
    except Exception as e:
        print(traceback.format_exc())
        return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route("/profile", methods=["GET"])
@jwt_required()
 def profile():
    current_user = get_jwt_identity()
    return jsonify(
        message="Access granted",
        user=auth_manager.get_user_by_id(current_user)
    ), 200
@app.route('/upload', methods=['POST'])
 def upload_data():
@@ -55,7 +136,7 @@ def word_frequencies():
        return jsonify({"error": "No data uploaded"}), 400
    try:
-        return jsonify(stat_obj.content_analysis()), 200
+        return jsonify(stat_obj.get_content_analysis()), 200
    except ValueError as e:
        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
    except Exception as e:
@@ -80,7 +161,7 @@ def get_time_analysis():
        return jsonify({"error": "No data uploaded"}), 400
    try:
-        return jsonify(stat_obj.time_analysis()), 200
+        return jsonify(stat_obj.get_time_analysis()), 200
    except ValueError as e:
        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
    except Exception as e:
@@ -93,13 +174,39 @@ def get_user_analysis():
        return jsonify({"error": "No data uploaded"}), 400
    try:
-        return jsonify(stat_obj.user_analysis()), 200
+        return jsonify(stat_obj.get_user_analysis()), 200
    except ValueError as e:
        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
    except Exception as e:
        print(traceback.format_exc())
        return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route("/stats/cultural", methods=["GET"])
 def get_cultural_analysis():
    if stat_obj is None:
        return jsonify({"error": "No data uploaded"}), 400
    try:
        return jsonify(stat_obj.get_cultural_analysis()), 200
    except ValueError as e:
        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
    except Exception as e:
        print(traceback.format_exc())
        return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route("/stats/interaction", methods=["GET"])
 def get_interaction_analysis():
    if stat_obj is None:
        return jsonify({"error": "No data uploaded"}), 400
    try:
        return jsonify(stat_obj.get_interactional_analysis()), 200
    except ValueError as e:
        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
    except Exception as e:
        print(traceback.format_exc())
        return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route('/filter/search', methods=["POST"])
 def search_dataset():
    if stat_obj is None:
--- a/server/auth.py
+++ b/server/auth.py
@@ -0,0 +1,29 @@
 from db.database import PostgresConnector
 from flask_bcrypt import Bcrypt
 class AuthManager:
    def __init__(self, db: PostgresConnector, bcrypt: Bcrypt):
        self.db = db
        self.bcrypt = bcrypt
    def register_user(self, username, email, password):
        hashed_password = self.bcrypt.generate_password_hash(password).decode("utf-8")
        if self.db.get_user_by_email(email):
            raise ValueError("Email already registered")
        if self.db.get_user_by_username(username):
            raise ValueError("Username already taken")
        self.db.save_user(username, email, hashed_password)
    def authenticate_user(self, username, password):
        user = self.db.get_user_by_username(username)
        if user and self.bcrypt.check_password_hash(user['password_hash'], password):
            return user
        return None
    def get_user_by_id(self, user_id):
        query = "SELECT id, username, email FROM users WHERE id = %s"
        result = self.db.execute(query, (user_id,), fetch=True)
        return result[0] if result else None
--- a/server/stat_gen.py
+++ b/server/stat_gen.py
@@ -62,13 +62,18 @@ class StatGen:
        self.nlp.add_ner_cols()
    ## Public
-    def time_analysis(self) -> pd.DataFrame:
+
    # topics over time
    # emotions over time
    def get_time_analysis(self) -> pd.DataFrame:
        return {
            "events_per_day": self.temporal_analysis.posts_per_day(),
            "weekday_hour_heatmap": self.temporal_analysis.heatmap()
        }
-    def content_analysis(self) -> dict:
+    # average topic duration
    def get_content_analysis(self) -> dict:
        return {
            "word_frequencies": self.linguistic_analysis.word_frequencies(),
            "common_two_phrases": self.linguistic_analysis.ngrams(),
@@ -77,13 +82,31 @@ class StatGen:
            "reply_time_by_emotion": self.temporal_analysis.avg_reply_time_per_emotion()
        }
-    def user_analysis(self) -> dict:
+    # average emotion per user
    # average chain length
    def get_user_analysis(self) -> dict:
        return {
            "top_users": self.interaction_analysis.top_users(),
            "users": self.interaction_analysis.per_user_analysis(),
            "interaction_graph": self.interaction_analysis.interaction_graph()
        }
    # average / max thread depth
    # high engagment threads based on volume
    def get_interactional_analysis(self) -> dict:
        return {
            "average_thread_depth": self.interaction_analysis.average_thread_depth(),
            "average_thread_length_by_emotion": self.interaction_analysis.average_thread_length_by_emotion()
        }
    # detect community jargon
    # in-group and out-group linguistic markers
    def get_cultural_analysis(self) -> dict:
        return {
            "identity_markers": self.linguistic_analysis.identity_markers()
        }
    def summary(self) -> dict:
        total_posts = (self.df["type"] == "post").sum()
        total_comments = (self.df["type"] == "comment").sum()
Author	SHA1	Message	Date
Dylan De Faoite	058f3ae702	feat: update schema to include posts and comments	2026-02-23 22:53:15 +00:00
Dylan De Faoite	be6ab1f929	feat: add profile endpoint to view user details	2026-02-23 22:43:55 +00:00
Dylan De Faoite	3165bf1aa9	feat: add login endpoint	2026-02-23 22:40:26 +00:00
Dylan De Faoite	29a4e5bb22	feat: add database schema	2026-02-23 22:36:07 +00:00
Dylan De Faoite	dc919681fd	docs: update requirements.txt	2026-02-23 22:27:46 +00:00
Dylan De Faoite	0589b2c8a5	feat: add /register endpoint	2026-02-23 22:27:32 +00:00
Dylan De Faoite	96a5bcc9e8	feat: add database & auth manager classes	2026-02-23 22:27:15 +00:00
Dylan De Faoite	66f1b26cc8	build: add docker compose for postgres database	2026-02-23 22:26:58 +00:00
Dylan De Faoite	257eb80de7	feat(api): add average thread length per emotion	2026-02-23 19:09:48 +00:00
Dylan De Faoite	3a23b1f0c8	feat(api): add average thread depth	2026-02-23 18:14:34 +00:00
Dylan De Faoite	8c76476cd3	fix(api): broken analysis calls due to overlap in attribute and method names	2026-02-23 18:14:24 +00:00
Dylan De Faoite	397986dc89	refactor(frontend): rename InteractionStats to UserStats	2026-02-23 17:15:14 +00:00
Dylan De Faoite	04b7094036	feat(api): add cultural endpoint	2026-02-23 17:14:12 +00:00