Skip to content

Commit

Permalink
feat(engine): Add file checking to filter unwanted files to be parsed (
Browse files Browse the repository at this point in the history
…#2506)  (#3045)

Signed-off-by: João Reigota <[email protected]>
  • Loading branch information
cx-joao-reigota authored Apr 30, 2021
1 parent 02584c1 commit 3d13a00
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 48 deletions.
15 changes: 10 additions & 5 deletions internal/console/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -381,17 +381,22 @@ func createInspector(t engine.Tracker, querySource source.QueriesSource) (*engin
return inspector, nil
}

func analyzePaths(paths, types []string) ([]string, error) {
// analyzePaths will analyze the paths to scan to determine which type of queries to load
// and which files should be ignored, it then updates the types and exclude flags variables
// with the results found
func analyzePaths(paths, types, exclude []string) (typesRes, excludeRes []string, errRes error) {
var err error
exc := make([]string, 0)
if types[0] == "" { // if '--type' flag was given skip file analyzing
types, err = analyzer.Analyze(paths)
types, exc, err = analyzer.Analyze(paths)
if err != nil {
log.Err(err)
return []string{}, err
return []string{}, []string{}, err
}
log.Info().Msgf("Loading queries of type: %s", strings.Join(types, ", "))
}
return types, nil
exclude = append(exclude, exc...)
return types, exclude, nil
}

func createService(inspector *engine.Inspector,
Expand Down Expand Up @@ -463,7 +468,7 @@ func scan(changedDefaultQueryPath bool) error {
}
}

if types, err = analyzePaths(path, types); err != nil {
if types, excludePath, err = analyzePaths(path, types, excludePath); err != nil {
return err
}

Expand Down
61 changes: 39 additions & 22 deletions pkg/analyzer/analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,12 @@ const (
yaml = ".yaml"
)

// Analyze will go through the paths given and determine what type of queries to load
// based on the extension of the file and the content
func Analyze(paths []string) ([]string, error) {
// Analyze will go through the slice paths given and determine what type of queries should be loaded
// should be loaded based on the extension of the file and the content
func Analyze(paths []string) (typesRes, excludeRes []string, errRes error) {
// start metrics for file analyzer
metrics.Metric.Start("file_type_analyzer")

availableTypes := make([]string, 0)
var files []string
var wg sync.WaitGroup
// results is the channel shared by the workers that contains the types found
Expand All @@ -51,7 +50,7 @@ func Analyze(paths []string) ([]string, error) {
// get all the files inside the given paths
for _, path := range paths {
if _, err := os.Stat(path); err != nil {
return []string{}, errors.Wrap(err, "failed to analyze path")
return []string{}, []string{}, errors.Wrap(err, "failed to analyze path")
}
if err := filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
if !info.IsDir() {
Expand All @@ -63,33 +62,36 @@ func Analyze(paths []string) ([]string, error) {
}
}

// unwanted is the channel shared by the workers that contains the unwanted files that the parser will ignore
unwanted := make(chan string, len(files))

for _, file := range files {
wg.Add(1)
// analyze the files concurrently
go worker(file, results, &wg)
go worker(file, results, unwanted, &wg)
}

go func() {
// close channel results when worker has finished writing into it
defer close(results)
// close channel results when the worker has finished writing into it
defer func() {
close(unwanted)
close(results)
}()
wg.Wait()
}()

for i := range results {
// read channel results and if type received is not in return slice
// add it
if !contains(availableTypes, i) {
availableTypes = append(availableTypes, i)
}
}
availableTypes := createSlice(results)
unwantedPaths := createSlice(unwanted)

// stop metrics for file analyzer
metrics.Metric.Stop()
return availableTypes, nil
return availableTypes, unwantedPaths, nil
}

// worker determines the type of the file by ext (dockerfile and terraform)/content and
// writes the answer to the results channel
func worker(path string, results chan<- string, wg *sync.WaitGroup) {
// if no types were found, the worker will write the path of the file in the unwanted channel
func worker(path string, results, unwanted chan<- string, wg *sync.WaitGroup) {
defer wg.Done()
ext := filepath.Ext(path)
if ext == "" {
Expand All @@ -104,7 +106,7 @@ func worker(path string, results chan<- string, wg *sync.WaitGroup) {
results <- "terraform"
// Cloud Formation, Ansible, OpenAPI
case yaml, yml, ".json":
checkContent(path, results, ext)
checkContent(path, results, unwanted, ext)
}
}

Expand Down Expand Up @@ -138,12 +140,13 @@ var types = map[string]regexSlice{
}

// checkContent will determine the file type by content when worker was unable to
// determine by ext
func checkContent(path string, results chan<- string, ext string) {
// determine by ext, if no type was determined checkContent adds it to unwanted channel
func checkContent(path string, results, unwanted chan<- string, ext string) {
// get file content
content, err := os.ReadFile(path)
if err != nil {
log.Error().Msgf("failed to analyze file: %s", err)
return
}

returnType := ""
Expand Down Expand Up @@ -174,10 +177,24 @@ func checkContent(path string, results chan<- string, ext string) {
// write to channel type of file
results <- returnType
} else if ext == yaml || ext == yml {
// Since Ansible as no defining property
// and no other type was found for YAML assume its Ansible
// Since Ansible has no defining property
// and no other type matched for YAML file extension, assume the file type is Ansible
results <- "ansible"
} else {
// No type was determined (ignore on parser)
unwanted <- path
}
}

// createSlice creates a slice from the channel given removing any duplicates
func createSlice(chanel chan string) []string {
slice := make([]string, 0)
for i := range chanel {
if !contains(slice, i) {
slice = append(slice, i)
}
}
return slice
}

// contains is a simple method to check if a slice
Expand Down
60 changes: 39 additions & 21 deletions pkg/analyzer/analyzer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,57 +10,75 @@ import (

func TestAnalyzer_Analyze(t *testing.T) {
tests := []struct {
name string
paths []string
want []string
wantErr bool
name string
paths []string
wantTypes []string
wantExclude []string
wantErr bool
}{
{
name: "analyze_test_dir_single_path",
paths: []string{filepath.FromSlash("../../test/fixtures/analyzer_test")},
want: []string{"dockerfile", "cloudformation", "kubernetes", "openapi", "terraform", "ansible"},
wantErr: false,
name: "analyze_test_dir_single_path",
paths: []string{filepath.FromSlash("../../test/fixtures/analyzer_test")},
wantTypes: []string{"dockerfile", "cloudformation", "kubernetes", "openapi", "terraform", "ansible"},
wantExclude: []string{},
wantErr: false,
},
{
name: "analyze_test_helm_single_path",
paths: []string{filepath.FromSlash("../../test/fixtures/analyzer_test/helm")},
want: []string{"kubernetes", "ansible"}, // ansible is added because of unknown type in values.yaml
wantErr: false,
name: "analyze_test_helm_single_path",
paths: []string{filepath.FromSlash("../../test/fixtures/analyzer_test/helm")},
wantTypes: []string{"kubernetes", "ansible"}, // ansible is added because of unknown type in values.yaml
wantExclude: []string{},
wantErr: false,
},
{
name: "analyze_test_multiple_path",
paths: []string{
filepath.FromSlash("../../test/fixtures/analyzer_test/Dockerfile"),
filepath.FromSlash("../../test/fixtures/analyzer_test/terraform.tf")},
want: []string{"dockerfile", "terraform"}, // ansible is added because of unknown type in values.yaml
wantErr: false,
wantTypes: []string{"dockerfile", "terraform"}, // ansible is added because of unknown type in values.yaml
wantExclude: []string{},
wantErr: false,
},
{
name: "analyze_test_mult_checks_path",
paths: []string{
filepath.FromSlash("../../test/fixtures/analyzer_test/openAPI_test")},
want: []string{"kubernetes"}, // ansible is added because of unknown type in values.yaml
wantErr: false,
wantTypes: []string{"kubernetes"}, // ansible is added because of unknown type in values.yaml
wantExclude: []string{}, // ansible is added because of unknown type in values.yaml
wantErr: false,
},
{
name: "analyze_test_error_path",
paths: []string{
filepath.FromSlash("../../test/fixtures/analyzer_test/Dockserfile"),
filepath.FromSlash("../../test/fixtures/analyzer_test/terraform.tf")},
want: []string{}, // ansible is added because of unknown type in values.yaml
wantErr: true,
wantTypes: []string{},
wantExclude: []string{},
wantErr: true,
},
{
name: "analyze_test_unwanted_path",
paths: []string{
filepath.FromSlash("../../test/fixtures/type-test01/template01/metadata.json"),
},
wantTypes: []string{},
wantExclude: []string{filepath.FromSlash("../../test/fixtures/type-test01/template01/metadata.json")},
wantErr: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := Analyze(tt.paths)
got, exc, err := Analyze(tt.paths)
if (err != nil) != tt.wantErr {
t.Errorf("Analyze = %v, wantErr = %v", err, tt.wantErr)
}
sort.Strings(tt.want)
sort.Strings(tt.wantTypes)
sort.Strings(tt.wantExclude)
sort.Strings(got)
require.Equal(t, tt.want, got)
sort.Strings(exc)
require.Equal(t, tt.wantTypes, got, "wrong types from analyzer")
require.Equal(t, tt.wantExclude, exc, "wrong excludes from analyzer")
})
}
}

0 comments on commit 3d13a00

Please sign in to comment.