Skip to content

Commit

Permalink
Optimization with yield
Browse files Browse the repository at this point in the history
  • Loading branch information
jpontoire committed Dec 20, 2024
1 parent 3ab4b42 commit bc901cb
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 12 deletions.
4 changes: 0 additions & 4 deletions minet/cli/reddit/posts.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,6 @@ def action(cli_args, enricher, loading_bar):
)
continue

list_posts = []
for post in posts:
list_posts.append(post)

for post in list_posts:
loading_bar.nested_advance()
enricher.writerow(row, post)
11 changes: 3 additions & 8 deletions minet/reddit/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ def get_childs_l500(self, url, list_comments, parent_id):
return list_comments

def get_posts(self, url: str, add_text: bool, nb_post=25):
list_posts = []
nb_pages = ceil(int(nb_post) / 25)
old_url = get_old_url(get_url_from_subreddit(url))
n_crawled = 0
Expand Down Expand Up @@ -151,15 +150,12 @@ def get_posts(self, url: str, add_text: bool, nb_post=25):
published_date=published_date,
link=resolve_relative_url(link),
)

list_posts.append(data)
yield data
n_crawled += 1
old_url = soup.scrape("span[class='next-button'] a", "href")[0]
return list(list_posts)


def get_comments(self, url: str, all):
list_return = []
m_comments = []
old_url = get_old_url(url)
url_limit = old_url + "?limit=500"
Expand All @@ -173,7 +169,7 @@ def get_comments(self, url: str, all):
current_id = get_current_id(com)
comment_url = com.scrape_one("a[class='bylink']", 'href')
try_author = com.scrape_one("a[class^='author']", 'href')
author = try_author.get_text() if try_author else "Deleted"
author = try_author if try_author else "Deleted"
com_points = com.scrape_one("span[class='score unvoted']")
match = re.search(r"-?\d+\s+point(?:s)?", com_points)
com_points = int(re.search(r"-?\d+", match.group()).group())
Expand Down Expand Up @@ -223,5 +219,4 @@ def get_comments(self, url: str, all):
comment=com.scrape_one("div[class='md']:not(div.child a)"),
)
if data.id != "":
list_return.append(data)
return list_return
yield data

0 comments on commit bc901cb

Please sign in to comment.