Merge branch 'main' of github:ThisBirchWood/ethnograph-view
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,3 +1,5 @@
|
||||
.vscode
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.json
|
||||
*.code-workspace
|
||||
@@ -30,12 +30,30 @@ class RedditAPI:
|
||||
return self._parse_posts(data)
|
||||
|
||||
def get_new_subreddit_posts(self, subreddit: str, limit: int = 10) -> list[Post]:
|
||||
params = {
|
||||
'limit': limit
|
||||
}
|
||||
|
||||
posts = []
|
||||
after = None
|
||||
url = f"r/{subreddit}/new.json"
|
||||
|
||||
while len(posts) < limit:
|
||||
batch_limit = min(100, limit - len(posts))
|
||||
params = {
|
||||
'limit': batch_limit,
|
||||
'after': after
|
||||
}
|
||||
|
||||
data = self._fetch_data(url, params)
|
||||
return self._parse_posts(data)
|
||||
batch = self._parse_posts(data)
|
||||
|
||||
if not batch:
|
||||
break
|
||||
|
||||
posts.extend(batch)
|
||||
after = data['data'].get('after')
|
||||
if not after:
|
||||
break
|
||||
|
||||
return posts
|
||||
|
||||
def get_user(self, username: str) -> User:
|
||||
data = self._fetch_data(f"user/{username}/about.json", {})
|
||||
|
||||
19
create_dataset.py
Normal file
19
create_dataset.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import json
|
||||
from connectors.reddit_api import RedditAPI
|
||||
|
||||
data_file = 'data/reddit_posts.json'
|
||||
reddit_connector = RedditAPI()
|
||||
def remove_empty_posts(posts):
|
||||
return [post for post in posts if post.content.strip() != ""]
|
||||
|
||||
def main():
|
||||
posts = reddit_connector.get_new_subreddit_posts('cork', limit=1000)
|
||||
posts = remove_empty_posts(posts)
|
||||
|
||||
print(f"Fetched {len(posts)} posts from r/cork")
|
||||
|
||||
with open(data_file, 'w') as f:
|
||||
json.dump([post.__dict__ for post in posts], f, indent=4)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,34 +0,0 @@
|
||||
# To connect to PostgreSQL database
|
||||
import psycopg2
|
||||
|
||||
from psycopg2.extras import RealDictCursor
|
||||
from typing import Optional
|
||||
|
||||
class Database:
|
||||
def __init__(self, db_name: str, user: str, password: str, host: str = 'localhost', port: int = 5432):
|
||||
self.connection = psycopg2.connect(
|
||||
dbname=db_name,
|
||||
user=user,
|
||||
password=password,
|
||||
host=host,
|
||||
port=port
|
||||
)
|
||||
self.connection.autocommit = True
|
||||
|
||||
def execute_query(self, query: str, params: Optional[tuple] = None):
|
||||
with self.connection.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(query, params)
|
||||
if cursor.description:
|
||||
return cursor.fetchall()
|
||||
return []
|
||||
|
||||
def execute_many(self, query: str, params_list: list[tuple]):
|
||||
with self.connection.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.executemany(query, params_list)
|
||||
|
||||
def close(self):
|
||||
self.connection.close()
|
||||
print("Database connection closed.")
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
@@ -1,16 +0,0 @@
|
||||
CREATE SCHEMA IF NOT EXISTS ethnograph;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ethnograph.users (
|
||||
id SERIAL PRIMARY KEY,
|
||||
username VARCHAR(255) UNIQUE NOT NULL,
|
||||
created_utc TIMESTAMP NOT NULL,
|
||||
karma INTEGER
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ethnograph.posts (
|
||||
id SERIAL PRIMARY KEY,
|
||||
title TEXT NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
author_username VARCHAR(255),
|
||||
created_utc TIMESTAMP NOT NULL
|
||||
);
|
||||
@@ -1,52 +1,14 @@
|
||||
from flask import Flask
|
||||
from db.database import Database
|
||||
from connectors.reddit_api import RedditAPI
|
||||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
||||
|
||||
app = Flask(__name__)
|
||||
db = Database(db_name='ethnograph', user='ethnograph_user', password='ethnograph_pass')
|
||||
|
||||
reddit_connector = RedditAPI()
|
||||
|
||||
@app.route('/fetch_subreddit/<string:subreddit>/<int:limit>', methods=['GET'])
|
||||
def fetch_subreddit(subreddit, limit = 10):
|
||||
posts = reddit_connector.get_top_subreddit_posts(subreddit, limit=limit, timeframe='all')
|
||||
|
||||
db.execute_many(
|
||||
"""INSERT INTO ethnograph.posts (title, content, author_username, created_utc)
|
||||
VALUES (%s, %s, %s, to_timestamp(%s));""",
|
||||
[(post.title, post.content, post.author, post.timestamp) for post in posts]
|
||||
)
|
||||
|
||||
return {"status": "success", "inserted_posts": len(posts)}
|
||||
|
||||
@app.route('/sentiment', methods=['GET'])
|
||||
def sentiment_analysis():
|
||||
posts = db.execute_query(
|
||||
"SELECT id, title, content FROM ethnograph.posts;"
|
||||
)
|
||||
|
||||
analyzer = SentimentIntensityAnalyzer()
|
||||
|
||||
total_sentiment = 0.0
|
||||
count = 0
|
||||
|
||||
for post in posts:
|
||||
content = post.get("title")
|
||||
if not content:
|
||||
continue
|
||||
|
||||
score = analyzer.polarity_scores(content)["compound"]
|
||||
total_sentiment += score
|
||||
count += 1
|
||||
|
||||
average_sentiment = total_sentiment / count if count else 0.0
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"average_sentiment": average_sentiment,
|
||||
"posts_analyzed": count
|
||||
}
|
||||
return {"status": "success", "posts": [post.__dict__ for post in posts]}
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True)
|
||||
Reference in New Issue
Block a user