Add boards.ie to dataset creation & add logging config
This commit is contained in:
@@ -1,16 +1,25 @@
|
|||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
from connectors.reddit_api import RedditAPI
|
from connectors.reddit_api import RedditAPI
|
||||||
|
from connectors.boards_api import BoardsAPI
|
||||||
|
|
||||||
data_file = 'data/reddit_posts.json'
|
data_file = 'data/reddit_posts.json'
|
||||||
reddit_connector = RedditAPI()
|
reddit_connector = RedditAPI()
|
||||||
|
boards_connector = BoardsAPI()
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
||||||
|
|
||||||
def remove_empty_posts(posts):
|
def remove_empty_posts(posts):
|
||||||
return [post for post in posts if post.content.strip() != ""]
|
return [post for post in posts if post.content.strip() != ""]
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
posts = reddit_connector.get_new_subreddit_posts('cork', limit=1000)
|
boards_posts = boards_connector.get_new_category_posts('cork-city', limit=500)
|
||||||
posts = remove_empty_posts(posts)
|
|
||||||
|
reddit_posts = reddit_connector.get_new_subreddit_posts('cork', limit=500)
|
||||||
|
reddit_posts = remove_empty_posts(reddit_posts)
|
||||||
|
|
||||||
print(f"Fetched {len(posts)} posts from r/cork")
|
posts = boards_posts + reddit_posts
|
||||||
|
|
||||||
with open(data_file, 'w') as f:
|
with open(data_file, 'w') as f:
|
||||||
json.dump([post.__dict__ for post in posts], f, indent=4)
|
json.dump([post.__dict__ for post in posts], f, indent=4)
|
||||||
|
|||||||
Reference in New Issue
Block a user