From bc901cb1a0b65278dc1d6578f53f0c664f202ccb Mon Sep 17 00:00:00 2001 From: Julien Pontoire Date: Fri, 20 Dec 2024 13:52:36 +0100 Subject: [PATCH] Optimization with yield --- minet/cli/reddit/posts.py | 4 ---- minet/reddit/scraper.py | 11 +++-------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/minet/cli/reddit/posts.py b/minet/cli/reddit/posts.py index 777f88813d..68dd9d68cb 100644 --- a/minet/cli/reddit/posts.py +++ b/minet/cli/reddit/posts.py @@ -41,10 +41,6 @@ def action(cli_args, enricher, loading_bar): ) continue - list_posts = [] for post in posts: - list_posts.append(post) - - for post in list_posts: loading_bar.nested_advance() enricher.writerow(row, post) diff --git a/minet/reddit/scraper.py b/minet/reddit/scraper.py index b3e2a78397..04100ef091 100644 --- a/minet/reddit/scraper.py +++ b/minet/reddit/scraper.py @@ -94,7 +94,6 @@ def get_childs_l500(self, url, list_comments, parent_id): return list_comments def get_posts(self, url: str, add_text: bool, nb_post=25): - list_posts = [] nb_pages = ceil(int(nb_post) / 25) old_url = get_old_url(get_url_from_subreddit(url)) n_crawled = 0 @@ -151,15 +150,12 @@ def get_posts(self, url: str, add_text: bool, nb_post=25): published_date=published_date, link=resolve_relative_url(link), ) - - list_posts.append(data) + yield data n_crawled += 1 old_url = soup.scrape("span[class='next-button'] a", "href")[0] - return list(list_posts) def get_comments(self, url: str, all): - list_return = [] m_comments = [] old_url = get_old_url(url) url_limit = old_url + "?limit=500" @@ -173,7 +169,7 @@ def get_comments(self, url: str, all): current_id = get_current_id(com) comment_url = com.scrape_one("a[class='bylink']", 'href') try_author = com.scrape_one("a[class^='author']", 'href') - author = try_author.get_text() if try_author else "Deleted" + author = try_author if try_author else "Deleted" com_points = com.scrape_one("span[class='score unvoted']") match = re.search(r"-?\d+\s+point(?:s)?", com_points) com_points = int(re.search(r"-?\d+", match.group()).group()) @@ -223,5 +219,4 @@ def get_comments(self, url: str, all): comment=com.scrape_one("div[class='md']:not(div.child a)"), ) if data.id != "": - list_return.append(data) - return list_return + yield data