Merge branch 'main' of github:ThisBirchWood/ethnograph-view

This commit is contained in:
2026-01-15 12:43:53 +00:00
6 changed files with 46 additions and 95 deletions

4
.gitignore vendored
View File

@@ -1,3 +1,5 @@
.vscode .vscode
__pycache__/ __pycache__/
*.pyc *.pyc
*.json
*.code-workspace

View File

@@ -30,12 +30,30 @@ class RedditAPI:
return self._parse_posts(data) return self._parse_posts(data)
def get_new_subreddit_posts(self, subreddit: str, limit: int = 10) -> list[Post]: def get_new_subreddit_posts(self, subreddit: str, limit: int = 10) -> list[Post]:
params = {
'limit': limit posts = []
} after = None
url = f"r/{subreddit}/new.json" url = f"r/{subreddit}/new.json"
data = self._fetch_data(url, params)
return self._parse_posts(data) while len(posts) < limit:
batch_limit = min(100, limit - len(posts))
params = {
'limit': batch_limit,
'after': after
}
data = self._fetch_data(url, params)
batch = self._parse_posts(data)
if not batch:
break
posts.extend(batch)
after = data['data'].get('after')
if not after:
break
return posts
def get_user(self, username: str) -> User: def get_user(self, username: str) -> User:
data = self._fetch_data(f"user/{username}/about.json", {}) data = self._fetch_data(f"user/{username}/about.json", {})

19
create_dataset.py Normal file
View File

@@ -0,0 +1,19 @@
import json
from connectors.reddit_api import RedditAPI
data_file = 'data/reddit_posts.json'
reddit_connector = RedditAPI()
def remove_empty_posts(posts):
return [post for post in posts if post.content.strip() != ""]
def main():
posts = reddit_connector.get_new_subreddit_posts('cork', limit=1000)
posts = remove_empty_posts(posts)
print(f"Fetched {len(posts)} posts from r/cork")
with open(data_file, 'w') as f:
json.dump([post.__dict__ for post in posts], f, indent=4)
if __name__ == "__main__":
main()

View File

@@ -1,34 +0,0 @@
# To connect to PostgreSQL database
import psycopg2
from psycopg2.extras import RealDictCursor
from typing import Optional
class Database:
def __init__(self, db_name: str, user: str, password: str, host: str = 'localhost', port: int = 5432):
self.connection = psycopg2.connect(
dbname=db_name,
user=user,
password=password,
host=host,
port=port
)
self.connection.autocommit = True
def execute_query(self, query: str, params: Optional[tuple] = None):
with self.connection.cursor(cursor_factory=RealDictCursor) as cursor:
cursor.execute(query, params)
if cursor.description:
return cursor.fetchall()
return []
def execute_many(self, query: str, params_list: list[tuple]):
with self.connection.cursor(cursor_factory=RealDictCursor) as cursor:
cursor.executemany(query, params_list)
def close(self):
self.connection.close()
print("Database connection closed.")
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()

View File

@@ -1,16 +0,0 @@
CREATE SCHEMA IF NOT EXISTS ethnograph;
CREATE TABLE IF NOT EXISTS ethnograph.users (
id SERIAL PRIMARY KEY,
username VARCHAR(255) UNIQUE NOT NULL,
created_utc TIMESTAMP NOT NULL,
karma INTEGER
);
CREATE TABLE IF NOT EXISTS ethnograph.posts (
id SERIAL PRIMARY KEY,
title TEXT NOT NULL,
content TEXT NOT NULL,
author_username VARCHAR(255),
created_utc TIMESTAMP NOT NULL
);

View File

@@ -1,52 +1,14 @@
from flask import Flask from flask import Flask
from db.database import Database
from connectors.reddit_api import RedditAPI from connectors.reddit_api import RedditAPI
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
app = Flask(__name__) app = Flask(__name__)
db = Database(db_name='ethnograph', user='ethnograph_user', password='ethnograph_pass')
reddit_connector = RedditAPI() reddit_connector = RedditAPI()
@app.route('/fetch_subreddit/<string:subreddit>/<int:limit>', methods=['GET']) @app.route('/fetch_subreddit/<string:subreddit>/<int:limit>', methods=['GET'])
def fetch_subreddit(subreddit, limit = 10): def fetch_subreddit(subreddit, limit = 10):
posts = reddit_connector.get_top_subreddit_posts(subreddit, limit=limit, timeframe='all') posts = reddit_connector.get_top_subreddit_posts(subreddit, limit=limit, timeframe='all')
return {"status": "success", "posts": [post.__dict__ for post in posts]}
db.execute_many(
"""INSERT INTO ethnograph.posts (title, content, author_username, created_utc)
VALUES (%s, %s, %s, to_timestamp(%s));""",
[(post.title, post.content, post.author, post.timestamp) for post in posts]
)
return {"status": "success", "inserted_posts": len(posts)}
@app.route('/sentiment', methods=['GET'])
def sentiment_analysis():
posts = db.execute_query(
"SELECT id, title, content FROM ethnograph.posts;"
)
analyzer = SentimentIntensityAnalyzer()
total_sentiment = 0.0
count = 0
for post in posts:
content = post.get("title")
if not content:
continue
score = analyzer.polarity_scores(content)["compound"]
total_sentiment += score
count += 1
average_sentiment = total_sentiment / count if count else 0.0
return {
"status": "success",
"average_sentiment": average_sentiment,
"posts_analyzed": count
}
if __name__ == "__main__": if __name__ == "__main__":
app.run(debug=True) app.run(debug=True)