Refactor post fetching to use ThreadPoolExecutor for improved concurrency
This commit is contained in:
@@ -4,6 +4,7 @@ import re
|
|||||||
|
|
||||||
from dto.post import Post
|
from dto.post import Post
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -43,15 +44,21 @@ class BoardsAPI:
|
|||||||
# Fetch post details for each URL and create Post objects
|
# Fetch post details for each URL and create Post objects
|
||||||
posts = []
|
posts = []
|
||||||
|
|
||||||
for index, post_url in enumerate(urls):
|
def fetch_and_parse(post_url):
|
||||||
logger.debug(f"Fetching Post {index + 1} / {len(urls)} details from URL: {post_url}")
|
|
||||||
|
|
||||||
html = self._fetch_page(post_url)
|
html = self._fetch_page(post_url)
|
||||||
post = self._parse_thread(html, post_url)
|
return self._parse_thread(html, post_url)
|
||||||
posts.append(post)
|
|
||||||
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||||
|
futures = {executor.submit(fetch_and_parse, url): url for url in urls}
|
||||||
|
|
||||||
|
for i, future in enumerate(as_completed(futures)):
|
||||||
|
post_url = futures[future]
|
||||||
|
logger.debug(f"Fetching Post {i + 1} / {len(urls)} details from URL: {post_url}")
|
||||||
|
posts.append(future.result())
|
||||||
|
|
||||||
return posts
|
return posts
|
||||||
|
|
||||||
|
|
||||||
def _fetch_page(self, url: str) -> str:
|
def _fetch_page(self, url: str) -> str:
|
||||||
response = requests.get(url, headers=HEADERS)
|
response = requests.get(url, headers=HEADERS)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|||||||
Reference in New Issue
Block a user