Refactor post fetching to use ThreadPoolExecutor for improved concurrency

2026-01-17 16:05:37 +00:00
parent d44b247bda
commit ed3d89fd27
1 changed files with 12 additions and 5 deletions
--- a/connectors/boards_api.py
+++ b/connectors/boards_api.py
@@ -4,6 +4,7 @@ import re
 from dto.post import Post
 from bs4 import BeautifulSoup
 from concurrent.futures import ThreadPoolExecutor, as_completed
 logger = logging.getLogger(__name__)
@@ -43,15 +44,21 @@ class BoardsAPI:
        # Fetch post details for each URL and create Post objects
        posts = []
-        for index, post_url in enumerate(urls):
+        def fetch_and_parse(post_url):
            logger.debug(f"Fetching Post {index + 1} / {len(urls)} details from URL: {post_url}")
            html = self._fetch_page(post_url)
-            post = self._parse_thread(html, post_url)
+            return self._parse_thread(html, post_url)
-            posts.append(post)
+
        with ThreadPoolExecutor(max_workers=10) as executor:
            futures = {executor.submit(fetch_and_parse, url): url for url in urls}
            for i, future in enumerate(as_completed(futures)):
                post_url = futures[future]
                logger.debug(f"Fetching Post {i + 1} / {len(urls)} details from URL: {post_url}")
                posts.append(future.result())
        return posts
    def _fetch_page(self, url: str) -> str:
        response = requests.get(url, headers=HEADERS)
        response.raise_for_status()