-
Notifications
You must be signed in to change notification settings - Fork 870
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Reduce deadlocks by inserting contributors in batches #3036
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -274,6 +274,17 @@ def facade_bulk_insert_commits(logger, records): | |
raise e | ||
|
||
|
||
def batch_insert_contributors(logger, data: Union[List[dict], dict]) -> Optional[List[dict]]: | ||
|
||
batch_size = 1000 | ||
|
||
for i in range(0, len(data), batch_size): | ||
batch = data[i:i + batch_size] | ||
|
||
bulk_insert_dicts(logger, batch, Contributor, ['cntrb_id']) | ||
|
||
|
||
|
||
def bulk_insert_dicts(logger, data: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
|
||
if isinstance(data, list) is False: | ||
|
@@ -383,7 +394,7 @@ def bulk_insert_dicts(logger, data: Union[List[dict], dict], table, natural_keys | |
|
||
else: | ||
logger.error("Unable to insert data in 10 attempts") | ||
return None | ||
raise Exception("Unable to insert and return data in 10 attempts") | ||
|
||
if deadlock_detected is True: | ||
logger.error("Made it through even though Deadlock was detected") | ||
|
@@ -421,7 +432,7 @@ def bulk_insert_dicts(logger, data: Union[List[dict], dict], table, natural_keys | |
|
||
else: | ||
logger.error("Unable to insert and return data in 10 attempts") | ||
return None | ||
raise Exception("Unable to insert and return data in 10 attempts") | ||
|
||
if deadlock_detected is True: | ||
logger.error("Made it through even though Deadlock was detected") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,7 @@ | |
from augur.tasks.github.util.util import get_owner_repo | ||
from augur.tasks.util.worker_util import remove_duplicate_dicts | ||
from augur.application.db.models import PullRequestEvent, IssueEvent, Contributor, Repo | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_issues_by_repo_id, get_pull_requests_by_repo_id, update_issue_closed_cntrbs_by_repo_id, get_session, get_engine, get_core_data_last_collected | ||
from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_issues_by_repo_id, get_pull_requests_by_repo_id, update_issue_closed_cntrbs_by_repo_id, get_session, get_engine, get_core_data_last_collected, batch_insert_contributors | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
|
||
|
||
platform_id = 1 | ||
|
@@ -82,7 +82,7 @@ def _insert_pr_events(self, events): | |
bulk_insert_dicts(self._logger, events, PullRequestEvent, pr_event_natural_keys) | ||
|
||
def _insert_contributors(self, contributors): | ||
bulk_insert_dicts(self._logger, contributors, Contributor, ["cntrb_id"]) | ||
batch_insert_contributors(self._logger, contributors) | ||
|
||
def _process_github_event_contributors(self, event): | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,7 @@ | |
from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth | ||
from augur.application.db.models import Contributor | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
from augur.tasks.github.facade_github.core import * | ||
from augur.application.db.lib import execute_sql, get_contributor_aliases_by_email, get_unresolved_commit_emails_by_name, get_contributors_by_full_name, get_repo_by_repo_git | ||
from augur.application.db.lib import execute_sql, get_contributor_aliases_by_email, get_unresolved_commit_emails_by_name, get_contributors_by_full_name, get_repo_by_repo_git, batch_insert_contributors | ||
from augur.tasks.git.util.facade_worker.facade_worker.facade00mainprogram import * | ||
|
||
|
||
|
@@ -127,8 +127,7 @@ def process_commit_metadata(logger, auth, contributorQueue, repo_id, platform_id | |
|
||
#Executes an upsert with sqlalchemy | ||
cntrb_natural_keys = ['cntrb_id'] | ||
|
||
bulk_insert_dicts(logger, cntrb,Contributor,cntrb_natural_keys) | ||
batch_insert_contributors(logger, [cntrb]) | ||
|
||
try: | ||
# Update alias after insertion. Insertion needs to happen first so we can get the autoincrementkey | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,7 @@ | |
from augur.tasks.util.worker_util import remove_duplicate_dicts | ||
from augur.application.db.models import Issue, IssueLabel, IssueAssignee, Contributor | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
from augur.application.config import get_development_flag | ||
from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_core_data_last_collected | ||
from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_core_data_last_collected, batch_insert_contributors | ||
|
||
|
||
development = get_development_flag() | ||
|
@@ -130,7 +130,7 @@ def process_issues(issues, task_name, repo_id, logger) -> None: | |
|
||
# insert contributors from these issues | ||
logger.info(f"{task_name}: Inserting {len(contributors)} contributors") | ||
bulk_insert_dicts(logger, contributors, Contributor, ["cntrb_id"]) | ||
batch_insert_contributors(logger, contributors) | ||
|
||
|
||
# insert the issues into the issues table. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ | |
|
||
from augur.application.db.data_parse import * | ||
from augur.application.db.session import DatabaseSession | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
from augur.application.db.lib import bulk_insert_dicts | ||
from augur.application.db.lib import bulk_insert_dicts, batch_insert_contributors | ||
from augur.tasks.github.util.util import add_key_value_pair_to_dicts | ||
from augur.tasks.util.worker_util import remove_duplicate_dicts | ||
from augur.application.db.models import PullRequest, PullRequestLabel, PullRequestReviewer, PullRequestMeta, PullRequestAssignee, Contributor | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
|
@@ -144,7 +144,7 @@ def insert_pr_contributors(contributors: List[dict], logger, task_name: str) -> | |
|
||
# insert contributors from these prs | ||
logger.info(f"{task_name}: Inserting {len(contributors)} contributors") | ||
bulk_insert_dicts(logger, contributors, Contributor, ["cntrb_id"]) | ||
batch_insert_contributors(logger, contributors) | ||
|
||
|
||
def insert_prs(pr_dicts: List[dict], logger, task_name: str) -> Optional[List[dict]]: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,7 @@ | |
from augur.application.db.models import PullRequest, Message, PullRequestReview, PullRequestLabel, PullRequestReviewer, PullRequestMeta, PullRequestAssignee, PullRequestReviewMessageRef, Contributor, Repo | ||
from augur.tasks.github.util.github_task_session import GithubTaskManifest | ||
from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth | ||
from augur.application.db.lib import get_session, get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id | ||
from augur.application.db.lib import get_session, get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id, batch_insert_contributors | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
from augur.application.db.util import execute_session_query | ||
from ..messages import process_github.ghproxy.topment_contributors | ||
from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs, get_core_data_last_collected | ||
|
@@ -260,7 +260,7 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) - | |
contributors.append(contributor) | ||
|
||
logger.info(f"{owner}/{repo} Pr review messages: Inserting {len(contributors)} contributors") | ||
bulk_insert_dicts(logger, contributors, Contributor, ["cntrb_id"]) | ||
batch_insert_contributors(logger, contributors) | ||
|
||
|
||
pr_review_comment_dicts = [] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,7 @@ | |
from augur.tasks.github.util.util import get_gitlab_repo_identifier, add_key_value_pair_to_dicts | ||
from augur.application.db.models import Issue, IssueLabel, IssueAssignee, IssueMessageRef, Message, Contributor, Repo | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
from augur.tasks.util.worker_util import remove_duplicate_dicts | ||
from augur.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_session | ||
from augur.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_session, batch_insert_contributors | ||
from augur.tasks.gitlab.gitlab_random_key_auth import GitlabRandomKeyAuth | ||
|
||
platform_id = 2 | ||
|
@@ -140,7 +140,7 @@ def process_issues(issues, task_name, repo_id, logger) -> None: | |
|
||
# insert contributors from these issues | ||
logger.info(f"{task_name}: Inserting {len(contributors)} contributors") | ||
bulk_insert_dicts(logger, contributors, Contributor, ["cntrb_id"]) | ||
batch_insert_contributors(logger, contributors) | ||
|
||
logger.info(f"{task_name}: Inserting {len(issue_dicts)} gitlab issues") | ||
issue_natural_keys = ["repo_id", "gh_issue_id"] | ||
|
@@ -325,7 +325,7 @@ def process_gitlab_issue_messages(data, task_name, repo_id, logger, session): | |
contributors = remove_duplicate_dicts(contributors) | ||
|
||
logger.info(f"{task_name}: Inserting {len(contributors)} contributors") | ||
bulk_insert_dicts(logger, contributors, Contributor, ["cntrb_id"]) | ||
batch_insert_contributors(logger, contributors) | ||
|
||
logger.info(f"{task_name}: Inserting {len(message_dicts)} messages") | ||
message_natural_keys = ["platform_msg_id", "pltfrm_id"] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ | |
from augur.application.db.models import PullRequest, PullRequestLabel, PullRequestMeta, PullRequestCommit, PullRequestFile, PullRequestMessageRef, Repo, Message, Contributor, PullRequestAssignee | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
from augur.tasks.gitlab.gitlab_random_key_auth import GitlabRandomKeyAuth | ||
from augur.tasks.util.worker_util import remove_duplicate_dicts | ||
from augur.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_session | ||
from augur.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_session, batch_insert_contributors | ||
|
||
platform_id = 2 | ||
|
||
|
@@ -125,7 +125,7 @@ def process_merge_requests(data, task_name, repo_id, logger): | |
contributors = remove_duplicate_dicts(contributors) | ||
|
||
logger.info(f"{task_name}: Inserting {len(contributors)} contributors") | ||
bulk_insert_dicts(logger, contributors, Contributor, ["cntrb_id"]) | ||
batch_insert_contributors(logger, contributors) | ||
|
||
logger.info(f"{task_name}: Inserting mrs of length: {len(merge_requests)}") | ||
pr_natural_keys = ["repo_id", "pr_src_id"] | ||
|
@@ -250,7 +250,7 @@ def process_gitlab_mr_messages(data, task_name, repo_id, logger, session): | |
contributors = remove_duplicate_dicts(contributors) | ||
|
||
logger.info(f"{task_name}: Inserting {len(contributors)} mr message contributors") | ||
bulk_insert_dicts(logger, contributors, Contributor, ["cntrb_id"]) | ||
batch_insert_contributors(logger, contributors) | ||
|
||
logger.info(f"{task_name}: Inserting {len(message_dicts)} mr messages") | ||
message_natural_keys = ["platform_msg_id", "pltfrm_id"] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[pylint] reported by reviewdog 🐶
W0621: Redefining name 'logger' from outer scope (line 18) (redefined-outer-name)