Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(engine): Add file checking to filter unwanted files to be parsed #2506 #3045

Merged
merged 13 commits into from
Apr 30, 2021
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions internal/console/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -381,17 +381,19 @@ func createInspector(t engine.Tracker, querySource source.QueriesSource) (*engin
return inspector, nil
}

func analyzePaths(paths, types []string) ([]string, error) {
func analyzePaths(paths, types, exclude []string) (typesRes, excludeRes []string, errRes error) {
var err error
exc := make([]string, 0)
if types[0] == "" { // if '--type' flag was given skip file analyzing
types, err = analyzer.Analyze(paths)
types, exc, err = analyzer.Analyze(paths)
if err != nil {
log.Err(err)
return []string{}, err
return []string{}, []string{}, err
}
log.Info().Msgf("Loading queries of type: %s", strings.Join(types, ", "))
}
return types, nil
exclude = append(exclude, exc...)
return types, exclude, nil
}

func createService(inspector *engine.Inspector,
Expand Down Expand Up @@ -463,7 +465,7 @@ func scan(changedDefaultQueryPath bool) error {
}
}

if types, err = analyzePaths(path, types); err != nil {
if types, excludePath, err = analyzePaths(path, types, excludePath); err != nil {
return err
}

Expand Down
55 changes: 36 additions & 19 deletions pkg/analyzer/analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,10 @@ const (

// Analyze will go through the paths given and determine what type of queries to load
// based on the extension of the file and the content
func Analyze(paths []string) ([]string, error) {
func Analyze(paths []string) (typesRes, excludeRes []string, errRes error) {
// start metrics for file analyzer
metrics.Metric.Start("file_type_analyzer")

availableTypes := make([]string, 0)
var files []string
var wg sync.WaitGroup
// results is the channel shared by the workers that contains the types found
Expand All @@ -51,7 +50,7 @@ func Analyze(paths []string) ([]string, error) {
// get all the files inside the given paths
for _, path := range paths {
if _, err := os.Stat(path); err != nil {
return []string{}, errors.Wrap(err, "failed to analyze path")
return []string{}, []string{}, errors.Wrap(err, "failed to analyze path")
}
if err := filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
if !info.IsDir() {
Expand All @@ -63,33 +62,36 @@ func Analyze(paths []string) ([]string, error) {
}
}

// unwanted is the channel shared by the workers that contains the unwanted files to remove from the parser
unwanted := make(chan string, len(files))

for _, file := range files {
wg.Add(1)
// analyze the files concurrently
go worker(file, results, &wg)
go worker(file, results, unwanted, &wg)
}

go func() {
// close channel results when worker has finished writing into it
defer close(results)
// close channel results when worker has fineshed writing into it
defer func() {
close(unwanted)
close(results)
}()
wg.Wait()
}()

for i := range results {
// read channel results and if type received is not in return slice
// add it
if !contains(availableTypes, i) {
availableTypes = append(availableTypes, i)
}
}
availableTypes := createSlice(results)
unwantedPaths := createSlice(unwanted)

// stop metrics for file analyzer
metrics.Metric.Stop()
return availableTypes, nil
return availableTypes, unwantedPaths, nil
}

// worker determines the type of the file by ext (dockerfile and terraform)/content and
// writes the answer to the results channel
func worker(path string, results chan<- string, wg *sync.WaitGroup) {
// writes the awnser to the results channel
// if no type was found worker will wright into unwanted channel the path of the file
func worker(path string, results, unwanted chan<- string, wg *sync.WaitGroup) {
defer wg.Done()
ext := filepath.Ext(path)
if ext == "" {
Expand All @@ -104,7 +106,7 @@ func worker(path string, results chan<- string, wg *sync.WaitGroup) {
results <- "terraform"
// Cloud Formation, Ansible, OpenAPI
case yaml, yml, ".json":
checkContent(path, results, ext)
checkContent(path, results, unwanted, ext)
}
}

Expand Down Expand Up @@ -138,12 +140,13 @@ var types = map[string]regexSlice{
}

// checkContent will determine the file type by content when worker was unable to
// determine by ext
func checkContent(path string, results chan<- string, ext string) {
// determine by ext, if no type was determined checkContent adds it to unwanted channel
func checkContent(path string, results, unwanted chan<- string, ext string) {
// get file content
content, err := os.ReadFile(path)
if err != nil {
log.Error().Msgf("failed to analyze file: %s", err)
return
}

returnType := ""
Expand Down Expand Up @@ -177,7 +180,21 @@ func checkContent(path string, results chan<- string, ext string) {
// Since Ansible as no defining property
// and no other type was found for YAML assume its Ansible
results <- "ansible"
} else {
// No type was determined (ignore on parser)
unwanted <- path
}
}

// createSlice creates a slice from the channel given removing any duplicates
func createSlice(chanel chan string) []string {
slice := make([]string, 0)
for i := range chanel {
if !contains(slice, i) {
slice = append(slice, i)
}
}
return slice
}

// contains is a simple method to check if a slice
Expand Down
60 changes: 39 additions & 21 deletions pkg/analyzer/analyzer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,57 +10,75 @@ import (

func TestAnalyzer_Analyze(t *testing.T) {
tests := []struct {
name string
paths []string
want []string
wantErr bool
name string
paths []string
wantTypes []string
wantExclude []string
wantErr bool
}{
{
name: "analyze_test_dir_single_path",
paths: []string{filepath.FromSlash("../../test/fixtures/analyzer_test")},
want: []string{"dockerfile", "cloudformation", "kubernetes", "openapi", "terraform", "ansible"},
wantErr: false,
name: "analyze_test_dir_single_path",
paths: []string{filepath.FromSlash("../../test/fixtures/analyzer_test")},
wantTypes: []string{"dockerfile", "cloudformation", "kubernetes", "openapi", "terraform", "ansible"},
wantExclude: []string{},
wantErr: false,
},
{
name: "analyze_test_helm_single_path",
paths: []string{filepath.FromSlash("../../test/fixtures/analyzer_test/helm")},
want: []string{"kubernetes", "ansible"}, // ansible is added because of unknown type in values.yaml
wantErr: false,
name: "analyze_test_helm_single_path",
paths: []string{filepath.FromSlash("../../test/fixtures/analyzer_test/helm")},
wantTypes: []string{"kubernetes", "ansible"}, // ansible is added because of unknown type in values.yaml
wantExclude: []string{},
wantErr: false,
},
{
name: "analyze_test_multiple_path",
paths: []string{
filepath.FromSlash("../../test/fixtures/analyzer_test/Dockerfile"),
filepath.FromSlash("../../test/fixtures/analyzer_test/terraform.tf")},
want: []string{"dockerfile", "terraform"}, // ansible is added because of unknown type in values.yaml
wantErr: false,
wantTypes: []string{"dockerfile", "terraform"}, // ansible is added because of unknown type in values.yaml
wantExclude: []string{},
wantErr: false,
},
{
name: "analyze_test_mult_checks_path",
paths: []string{
filepath.FromSlash("../../test/fixtures/analyzer_test/openAPI_test")},
want: []string{"kubernetes"}, // ansible is added because of unknown type in values.yaml
wantErr: false,
wantTypes: []string{"kubernetes"}, // ansible is added because of unknown type in values.yaml
wantExclude: []string{}, // ansible is added because of unknown type in values.yaml
wantErr: false,
},
{
name: "analyze_test_error_path",
paths: []string{
filepath.FromSlash("../../test/fixtures/analyzer_test/Dockserfile"),
filepath.FromSlash("../../test/fixtures/analyzer_test/terraform.tf")},
want: []string{}, // ansible is added because of unknown type in values.yaml
wantErr: true,
wantTypes: []string{},
wantExclude: []string{},
wantErr: true,
},
{
name: "analyze_test_unwanted_path",
paths: []string{
filepath.FromSlash("../../test/fixtures/type-test01/template01/metadata.json"),
},
wantTypes: []string{},
wantExclude: []string{filepath.FromSlash("../../test/fixtures/type-test01/template01/metadata.json")},
wantErr: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := Analyze(tt.paths)
got, exc, err := Analyze(tt.paths)
if (err != nil) != tt.wantErr {
t.Errorf("Analyze = %v, wantErr = %v", err, tt.wantErr)
}
sort.Strings(tt.want)
sort.Strings(tt.wantTypes)
sort.Strings(tt.wantExclude)
sort.Strings(got)
require.Equal(t, tt.want, got)
sort.Strings(exc)
require.Equal(t, tt.wantTypes, got, "wrong types from analyzer")
require.Equal(t, tt.wantExclude, exc, "wrong excludes from analyzer")
})
}
}