Optimization with yield

medialab · Dec 20, 2024 · bc901cb · bc901cb
1 parent 3ab4b42
commit bc901cb
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 12 deletions.
diff --git a/minet/cli/reddit/posts.py b/minet/cli/reddit/posts.py
@@ -41,10 +41,6 @@ def action(cli_args, enricher, loading_bar):
                 )
                 continue
 
-            list_posts = []
             for post in posts:
-                list_posts.append(post)
-
-            for post in list_posts:
                 loading_bar.nested_advance()
                 enricher.writerow(row, post)
diff --git a/minet/reddit/scraper.py b/minet/reddit/scraper.py
@@ -94,7 +94,6 @@ def get_childs_l500(self, url, list_comments, parent_id):
         return list_comments
 
     def get_posts(self, url: str, add_text: bool, nb_post=25):
-        list_posts = []
         nb_pages = ceil(int(nb_post) / 25)
         old_url = get_old_url(get_url_from_subreddit(url))
         n_crawled = 0
@@ -151,15 +150,12 @@ def get_posts(self, url: str, add_text: bool, nb_post=25):
                         published_date=published_date,
                         link=resolve_relative_url(link),
                     )
-
-                    list_posts.append(data)
+                    yield data
                     n_crawled += 1
             old_url = soup.scrape("span[class='next-button'] a", "href")[0]
-        return list(list_posts)
 
 
     def get_comments(self, url: str, all):
-        list_return = []
         m_comments = []
         old_url = get_old_url(url)
         url_limit = old_url + "?limit=500"
@@ -173,7 +169,7 @@ def get_comments(self, url: str, all):
             current_id = get_current_id(com)
             comment_url = com.scrape_one("a[class='bylink']", 'href')
             try_author = com.scrape_one("a[class^='author']", 'href')
-            author = try_author.get_text() if try_author else "Deleted"
+            author = try_author if try_author else "Deleted"
             com_points = com.scrape_one("span[class='score unvoted']")
             match = re.search(r"-?\d+\s+point(?:s)?", com_points)
             com_points = int(re.search(r"-?\d+", match.group()).group())
@@ -223,5 +219,4 @@ def get_comments(self, url: str, all):
                     comment=com.scrape_one("div[class='md']:not(div.child a)"),
                 )
                 if data.id != "":
-                    list_return.append(data)
-        return list_return
+                    yield data