Merge branch 'main' of github:ThisBirchWood/ethnograph-view

2026-01-15 12:43:53 +00:00
parent b5624035ec b0e079599a
commit 47e71113f6
6 changed files with 46 additions and 95 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
 .vscode
 __pycache__/
 *.pyc
 *.json
 *.code-workspace
--- a/connectors/reddit_api.py
+++ b/connectors/reddit_api.py
@@ -30,12 +30,30 @@ class RedditAPI:
        return self._parse_posts(data)
    def get_new_subreddit_posts(self, subreddit: str, limit: int = 10) -> list[Post]:
-        params = {
+        
-            'limit': limit
+        posts = []
-        }
+        after = None
        url = f"r/{subreddit}/new.json"
        while len(posts) < limit:
            batch_limit = min(100, limit - len(posts))
            params = {
                'limit': batch_limit,
                'after': after
            }
            data = self._fetch_data(url, params)
-        return self._parse_posts(data)
+            batch = self._parse_posts(data)
            if not batch:
                break
            posts.extend(batch)
            after = data['data'].get('after')
            if not after:
                break
        return posts
    def get_user(self, username: str) -> User:
        data = self._fetch_data(f"user/{username}/about.json", {})
--- a/create_dataset.py
+++ b/create_dataset.py
@@ -0,0 +1,19 @@
 import json
 from connectors.reddit_api import RedditAPI
 data_file = 'data/reddit_posts.json'
 reddit_connector = RedditAPI()
 def remove_empty_posts(posts):
    return [post for post in posts if post.content.strip() != ""]
 def main():
    posts = reddit_connector.get_new_subreddit_posts('cork', limit=1000)
    posts = remove_empty_posts(posts)
    print(f"Fetched {len(posts)} posts from r/cork")
    with open(data_file, 'w') as f:
        json.dump([post.__dict__ for post in posts], f, indent=4)
 if __name__ == "__main__":
    main()
--- a/db/database.py
+++ b/db/database.py
@@ -1,34 +0,0 @@
 # To connect to PostgreSQL database
 import psycopg2
 from psycopg2.extras import RealDictCursor
 from typing import Optional
 class Database:
    def __init__(self, db_name: str, user: str, password: str, host: str = 'localhost', port: int = 5432):
        self.connection = psycopg2.connect(
            dbname=db_name,
            user=user,
            password=password,
            host=host,
            port=port
        )
        self.connection.autocommit = True
    def execute_query(self, query: str, params: Optional[tuple] = None):
        with self.connection.cursor(cursor_factory=RealDictCursor) as cursor:
            cursor.execute(query, params)
            if cursor.description:
                return cursor.fetchall()
            return []
    def execute_many(self, query: str, params_list: list[tuple]):
        with self.connection.cursor(cursor_factory=RealDictCursor) as cursor:
            cursor.executemany(query, params_list)
    def close(self):
        self.connection.close()
        print("Database connection closed.")
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()
--- a/db/sql/01_schema.sql
+++ b/db/sql/01_schema.sql
@@ -1,16 +0,0 @@
 CREATE SCHEMA IF NOT EXISTS ethnograph;
 CREATE TABLE IF NOT EXISTS ethnograph.users (
    id SERIAL PRIMARY KEY,
    username VARCHAR(255) UNIQUE NOT NULL,
    created_utc TIMESTAMP NOT NULL,
    karma INTEGER
 );
 CREATE TABLE IF NOT EXISTS ethnograph.posts (
    id SERIAL PRIMARY KEY,
    title TEXT NOT NULL,
    content TEXT NOT NULL,
    author_username VARCHAR(255),
    created_utc TIMESTAMP NOT NULL
 );
--- a/server/app.py
+++ b/server/app.py
@@ -1,52 +1,14 @@
 from flask import Flask
 from db.database import Database
 from connectors.reddit_api import RedditAPI
 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
 app = Flask(__name__)
 db = Database(db_name='ethnograph', user='ethnograph_user', password='ethnograph_pass')
 reddit_connector = RedditAPI()
@app.route('/fetch_subreddit/<string:subreddit>/<int:limit>', methods=['GET'])
 def fetch_subreddit(subreddit, limit = 10):
    posts = reddit_connector.get_top_subreddit_posts(subreddit, limit=limit, timeframe='all')
-
+    return {"status": "success", "posts": [post.__dict__ for post in posts]}
    db.execute_many(
        """INSERT INTO ethnograph.posts (title, content, author_username, created_utc)
           VALUES (%s, %s, %s, to_timestamp(%s));""",
        [(post.title, post.content, post.author, post.timestamp) for post in posts]
    )
    return {"status": "success", "inserted_posts": len(posts)}
@app.route('/sentiment', methods=['GET'])
 def sentiment_analysis():
    posts = db.execute_query(
        "SELECT id, title, content FROM ethnograph.posts;"
    )
    analyzer = SentimentIntensityAnalyzer()
    total_sentiment = 0.0
    count = 0
    for post in posts:
        content = post.get("title")
        if not content:
            continue
        score = analyzer.polarity_scores(content)["compound"]
        total_sentiment += score
        count += 1
    average_sentiment = total_sentiment / count if count else 0.0
    return {
        "status": "success",
        "average_sentiment": average_sentiment,
        "posts_analyzed": count
    }
 if __name__ == "__main__":
    app.run(debug=True)