Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: reduce go routines and add debug logging #394

Merged
merged 1 commit into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ func (c *Crawler) CrawlPublishers(publishers []common.Publisher) error {
// Process every item in publishers.
for _, publisher := range publishers {
c.publishersWg.Add(1)
go c.ScanPublisher(publisher)
c.ScanPublisher(publisher)
}

// Close the repositories channel when all the publisher goroutines are done
Expand All @@ -151,11 +151,15 @@ func (c *Crawler) crawl() error {

// Get cpus number
numCPUs := runtime.NumCPU()
log.Debugf("CPUs #: %d", numCPUs)

// Process the repositories in order to retrieve the files.
for i := 0; i < numCPUs; i++ {
c.repositoriesWg.Add(1)
go c.ProcessRepositories(reposChan)
go func(id int) {
log.Debugf("Starting ProcessRepositories() goroutine (#%d)", id)
c.ProcessRepositories(reposChan)
}(i)
}

for repo := range c.repositories {
Expand Down
4 changes: 4 additions & 0 deletions scanner/bitbucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ func NewBitBucketScanner() Scanner {
func (scanner BitBucketScanner) ScanGroupOfRepos(
url url.URL, publisher common.Publisher, repositories chan common.Repository,
) error {
log.Debugf("BitBucketScanner.ScanGroupOfRepos(%s)", url.String())

splitted := strings.Split(strings.Trim(url.Path, "/"), "/")

if len(splitted) != 1 {
Expand Down Expand Up @@ -82,6 +84,8 @@ func (scanner BitBucketScanner) ScanGroupOfRepos(
func (scanner BitBucketScanner) ScanRepo(
url url.URL, publisher common.Publisher, repositories chan common.Repository,
) error {
log.Debugf("BitBucketScanner.ScanRepo(%s)", url.String())

splitted := strings.Split(strings.Trim(url.Path, "/"), "/")
if len(splitted) != 2 {
return fmt.Errorf("bitbucket URL %s doesn't look like a repo", url.String())
Expand Down
4 changes: 4 additions & 0 deletions scanner/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ func NewGitHubScanner() Scanner {
func (scanner GitHubScanner) ScanGroupOfRepos(
url url.URL, publisher common.Publisher, repositories chan common.Repository,
) error {
log.Debugf("GitHubScanner.ScanGroupOfRepos(%s)", url.String())

opt := &github.RepositoryListByOrgOptions{}

splitted := strings.Split(strings.Trim(url.Path, "/"), "/")
Expand Down Expand Up @@ -128,6 +130,8 @@ func (scanner GitHubScanner) ScanGroupOfRepos(
func (scanner GitHubScanner) ScanRepo(
url url.URL, publisher common.Publisher, repositories chan common.Repository,
) error {
log.Debugf("GitHubScanner.ScanRepo(%s)", url.String())

splitted := strings.Split(strings.Trim(url.Path, "/"), "/")
if len(splitted) != 2 {
return fmt.Errorf("doesn't look like a GitHub repo %s", url.String())
Expand Down
5 changes: 5 additions & 0 deletions scanner/gitlab.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"strings"

"github.com/italia/publiccode-crawler/v4/common"
log "github.com/sirupsen/logrus"
"github.com/xanzy/go-gitlab"
)

Expand All @@ -21,6 +22,8 @@ func NewGitLabScanner() Scanner {
func (scanner GitLabScanner) ScanGroupOfRepos(
url url.URL, publisher common.Publisher, repositories chan common.Repository,
) error {
log.Debugf("GitLabScanner.ScanGroupOfRepos(%s)", url.String())

apiURL, _ := url.Parse("/api/v4")
git, err := gitlab.NewClient(os.Getenv("GITLAB_TOKEN"), gitlab.WithBaseURL(apiURL.String()))
if err != nil {
Expand Down Expand Up @@ -68,6 +71,8 @@ func (scanner GitLabScanner) ScanGroupOfRepos(
func (scanner GitLabScanner) ScanRepo(
url url.URL, publisher common.Publisher, repositories chan common.Repository,
) error {
log.Debugf("GitLabScanner.ScanRepo(%s)", url.String())

apiURL, _ := url.Parse("/api/v4")
git, err := gitlab.NewClient(os.Getenv("GITLAB_TOKEN"), gitlab.WithBaseURL(apiURL.String()))
if err != nil {
Expand Down