Merge branch 'main' of github:ThisBirchWood/ethnograph-view
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,3 +1,5 @@
|
|||||||
.vscode
|
.vscode
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.pyc
|
*.pyc
|
||||||
|
*.json
|
||||||
|
*.code-workspace
|
||||||
@@ -30,12 +30,30 @@ class RedditAPI:
|
|||||||
return self._parse_posts(data)
|
return self._parse_posts(data)
|
||||||
|
|
||||||
def get_new_subreddit_posts(self, subreddit: str, limit: int = 10) -> list[Post]:
|
def get_new_subreddit_posts(self, subreddit: str, limit: int = 10) -> list[Post]:
|
||||||
params = {
|
|
||||||
'limit': limit
|
posts = []
|
||||||
}
|
after = None
|
||||||
url = f"r/{subreddit}/new.json"
|
url = f"r/{subreddit}/new.json"
|
||||||
|
|
||||||
|
while len(posts) < limit:
|
||||||
|
batch_limit = min(100, limit - len(posts))
|
||||||
|
params = {
|
||||||
|
'limit': batch_limit,
|
||||||
|
'after': after
|
||||||
|
}
|
||||||
|
|
||||||
data = self._fetch_data(url, params)
|
data = self._fetch_data(url, params)
|
||||||
return self._parse_posts(data)
|
batch = self._parse_posts(data)
|
||||||
|
|
||||||
|
if not batch:
|
||||||
|
break
|
||||||
|
|
||||||
|
posts.extend(batch)
|
||||||
|
after = data['data'].get('after')
|
||||||
|
if not after:
|
||||||
|
break
|
||||||
|
|
||||||
|
return posts
|
||||||
|
|
||||||
def get_user(self, username: str) -> User:
|
def get_user(self, username: str) -> User:
|
||||||
data = self._fetch_data(f"user/{username}/about.json", {})
|
data = self._fetch_data(f"user/{username}/about.json", {})
|
||||||
|
|||||||
19
create_dataset.py
Normal file
19
create_dataset.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
import json
|
||||||
|
from connectors.reddit_api import RedditAPI
|
||||||
|
|
||||||
|
data_file = 'data/reddit_posts.json'
|
||||||
|
reddit_connector = RedditAPI()
|
||||||
|
def remove_empty_posts(posts):
|
||||||
|
return [post for post in posts if post.content.strip() != ""]
|
||||||
|
|
||||||
|
def main():
|
||||||
|
posts = reddit_connector.get_new_subreddit_posts('cork', limit=1000)
|
||||||
|
posts = remove_empty_posts(posts)
|
||||||
|
|
||||||
|
print(f"Fetched {len(posts)} posts from r/cork")
|
||||||
|
|
||||||
|
with open(data_file, 'w') as f:
|
||||||
|
json.dump([post.__dict__ for post in posts], f, indent=4)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
# To connect to PostgreSQL database
|
|
||||||
import psycopg2
|
|
||||||
|
|
||||||
from psycopg2.extras import RealDictCursor
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
class Database:
|
|
||||||
def __init__(self, db_name: str, user: str, password: str, host: str = 'localhost', port: int = 5432):
|
|
||||||
self.connection = psycopg2.connect(
|
|
||||||
dbname=db_name,
|
|
||||||
user=user,
|
|
||||||
password=password,
|
|
||||||
host=host,
|
|
||||||
port=port
|
|
||||||
)
|
|
||||||
self.connection.autocommit = True
|
|
||||||
|
|
||||||
def execute_query(self, query: str, params: Optional[tuple] = None):
|
|
||||||
with self.connection.cursor(cursor_factory=RealDictCursor) as cursor:
|
|
||||||
cursor.execute(query, params)
|
|
||||||
if cursor.description:
|
|
||||||
return cursor.fetchall()
|
|
||||||
return []
|
|
||||||
|
|
||||||
def execute_many(self, query: str, params_list: list[tuple]):
|
|
||||||
with self.connection.cursor(cursor_factory=RealDictCursor) as cursor:
|
|
||||||
cursor.executemany(query, params_list)
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
self.connection.close()
|
|
||||||
print("Database connection closed.")
|
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
||||||
self.close()
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
CREATE SCHEMA IF NOT EXISTS ethnograph;
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ethnograph.users (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
username VARCHAR(255) UNIQUE NOT NULL,
|
|
||||||
created_utc TIMESTAMP NOT NULL,
|
|
||||||
karma INTEGER
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ethnograph.posts (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
title TEXT NOT NULL,
|
|
||||||
content TEXT NOT NULL,
|
|
||||||
author_username VARCHAR(255),
|
|
||||||
created_utc TIMESTAMP NOT NULL
|
|
||||||
);
|
|
||||||
@@ -1,52 +1,14 @@
|
|||||||
from flask import Flask
|
from flask import Flask
|
||||||
from db.database import Database
|
|
||||||
from connectors.reddit_api import RedditAPI
|
from connectors.reddit_api import RedditAPI
|
||||||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
db = Database(db_name='ethnograph', user='ethnograph_user', password='ethnograph_pass')
|
|
||||||
|
|
||||||
reddit_connector = RedditAPI()
|
reddit_connector = RedditAPI()
|
||||||
|
|
||||||
@app.route('/fetch_subreddit/<string:subreddit>/<int:limit>', methods=['GET'])
|
@app.route('/fetch_subreddit/<string:subreddit>/<int:limit>', methods=['GET'])
|
||||||
def fetch_subreddit(subreddit, limit = 10):
|
def fetch_subreddit(subreddit, limit = 10):
|
||||||
posts = reddit_connector.get_top_subreddit_posts(subreddit, limit=limit, timeframe='all')
|
posts = reddit_connector.get_top_subreddit_posts(subreddit, limit=limit, timeframe='all')
|
||||||
|
return {"status": "success", "posts": [post.__dict__ for post in posts]}
|
||||||
db.execute_many(
|
|
||||||
"""INSERT INTO ethnograph.posts (title, content, author_username, created_utc)
|
|
||||||
VALUES (%s, %s, %s, to_timestamp(%s));""",
|
|
||||||
[(post.title, post.content, post.author, post.timestamp) for post in posts]
|
|
||||||
)
|
|
||||||
|
|
||||||
return {"status": "success", "inserted_posts": len(posts)}
|
|
||||||
|
|
||||||
@app.route('/sentiment', methods=['GET'])
|
|
||||||
def sentiment_analysis():
|
|
||||||
posts = db.execute_query(
|
|
||||||
"SELECT id, title, content FROM ethnograph.posts;"
|
|
||||||
)
|
|
||||||
|
|
||||||
analyzer = SentimentIntensityAnalyzer()
|
|
||||||
|
|
||||||
total_sentiment = 0.0
|
|
||||||
count = 0
|
|
||||||
|
|
||||||
for post in posts:
|
|
||||||
content = post.get("title")
|
|
||||||
if not content:
|
|
||||||
continue
|
|
||||||
|
|
||||||
score = analyzer.polarity_scores(content)["compound"]
|
|
||||||
total_sentiment += score
|
|
||||||
count += 1
|
|
||||||
|
|
||||||
average_sentiment = total_sentiment / count if count else 0.0
|
|
||||||
|
|
||||||
return {
|
|
||||||
"status": "success",
|
|
||||||
"average_sentiment": average_sentiment,
|
|
||||||
"posts_analyzed": count
|
|
||||||
}
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app.run(debug=True)
|
app.run(debug=True)
|
||||||
Reference in New Issue
Block a user