From 55e2b38583ba6305be30f9dc21278f8433c13a73 Mon Sep 17 00:00:00 2001 From: zzjc1234 <2359047351@qq.com> Date: Mon, 21 Oct 2024 13:01:49 +0800 Subject: [PATCH 01/11] feat(healthcheck/nonascii): ignore content in gitattributes --- cmd/repo-health-checker/main.go | 3 ++- go.mod | 2 +- pkg/healthcheck/nonascii.go | 41 ++++++++++++++++----------------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/cmd/repo-health-checker/main.go b/cmd/repo-health-checker/main.go index bba0991..5396fd4 100644 --- a/cmd/repo-health-checker/main.go +++ b/cmd/repo-health-checker/main.go @@ -43,6 +43,7 @@ func main() { showVersion := flag.Bool("version", false, "print current version") rootDir := flag.String("root", ".", "root dir for forbidden files check") repoSize := flag.Float64("repoSize", 2, "maximum size of the repo in MiB") + // TODO: remove gitWhitelist, it is only for backward compatibility now localList := flag.String("localList", "", "local file list for non-ascii file check") checkFileNameList := flag.String("checkFileNameList", "", "comma-separated list of files to check") checkFileSumList := flag.String("checkFileSumList", "", "comma-separated list of expected checksums") @@ -77,7 +78,7 @@ func main() { if err != nil { fmt.Printf("### Forbidden File Check Failed:\n%s\n", err.Error()) } - err = healthcheck.NonAsciiFiles(*rootDir, *localList) + err = healthcheck.NonAsciiFiles(*rootDir) if err != nil { fmt.Printf("### Non-ASCII Characters File Check Failed:\n%s\n", err.Error()) } diff --git a/go.mod b/go.mod index 6c81d72..62e166a 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.23.1 require ( github.com/criyle/go-judge v1.8.5 github.com/denormal/go-gitignore v0.0.0-20180930084346-ae8ad1d07817 + github.com/go-git/go-billy/v5 v5.5.0 github.com/go-git/go-git/v5 v5.12.0 github.com/jinzhu/copier v0.4.0 github.com/koding/multiconfig v0.0.0-20171124222453-69c27309b2d7 @@ -27,7 +28,6 @@ require ( github.com/fatih/camelcase v1.0.0 // indirect github.com/fatih/structs v1.1.0 // indirect github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect - github.com/go-git/go-billy/v5 v5.5.0 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect diff --git a/pkg/healthcheck/nonascii.go b/pkg/healthcheck/nonascii.go index 0204c3c..07bc436 100644 --- a/pkg/healthcheck/nonascii.go +++ b/pkg/healthcheck/nonascii.go @@ -8,38 +8,33 @@ import ( "path/filepath" "strings" "unicode" + + "github.com/go-git/go-billy/v5/memfs" + "github.com/go-git/go-git/v5/plumbing/format/gitattributes" ) // getNonAscii retrieves a list of files in the specified root directory that contain non-ASCII characters. // It searches for non-ASCII characters in each file's content and returns a list of paths to files containing non-ASCII characters. -func getNonAscii(root string, localList string) ([]string, error) { +func getNonAscii(root string) ([]string, error) { var nonAscii []string + fs := memfs.New() + rootPath := []string{"./"} + var emptyStr []string - var dirs []string - - if localList != "" { - file, err := os.Open(localList) - if err != nil { - return nil, fmt.Errorf("Failed to open file %s: %v\n", localList, err) - } - defer file.Close() - - scanner := bufio.NewScanner(file) - for scanner.Scan() { - dirs = append(dirs, scanner.Text()) - } - if err := scanner.Err(); err != nil { - return nil, fmt.Errorf("Error reading file %s: %v\n", localList, err) - } + attribute, err := gitattributes.ReadPatterns(fs, rootPath) + if err != nil { + return nil, err } - err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + matcher := gitattributes.NewMatcher(attribute) + + err = filepath.Walk(root, func(path string, info os.FileInfo, err error) error { if err != nil { return err } if info.IsDir() { - if info.Name() == ".git" || info.Name() == ".gitea" || info.Name() == "ci" || (localList != "" && inString(info.Name(), dirs)) { + if info.Name() == ".git" || info.Name() == ".gitea" { return filepath.SkipDir } else { return nil @@ -50,6 +45,10 @@ func getNonAscii(root string, localList string) ([]string, error) { return nil } + if _, ret := matcher.Match(rootPath, append(emptyStr, info.Name())); ret { + return nil + } + file, err := os.Open(path) if err != nil { return err @@ -79,8 +78,8 @@ func getNonAscii(root string, localList string) ([]string, error) { // nonAsciiFiles checks for non-ASCII characters in files within the specified root directory. // It prints a message with the paths to files containing non-ASCII characters, if any. -func NonAsciiFiles(root string, localList string) error { - nonAscii, err := getNonAscii(root, localList) +func NonAsciiFiles(root string) error { + nonAscii, err := getNonAscii(root) if err != nil { slog.Error("getting non-ascii", "err", err) return fmt.Errorf("error getting non-ascii: %w", err) -- 2.30.2 From 348eaa247c1fe3b107302c4ce3714d5aaa8c6183 Mon Sep 17 00:00:00 2001 From: zzjc1234 <2359047351@qq.com> Date: Mon, 21 Oct 2024 15:18:43 +0800 Subject: [PATCH 02/11] feat(healthcheck/nonascii): ignore content in gitattributes --- pkg/healthcheck/nonascii.go | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pkg/healthcheck/nonascii.go b/pkg/healthcheck/nonascii.go index 07bc436..f56ae10 100644 --- a/pkg/healthcheck/nonascii.go +++ b/pkg/healthcheck/nonascii.go @@ -9,7 +9,6 @@ import ( "strings" "unicode" - "github.com/go-git/go-billy/v5/memfs" "github.com/go-git/go-git/v5/plumbing/format/gitattributes" ) @@ -17,11 +16,13 @@ import ( // It searches for non-ASCII characters in each file's content and returns a list of paths to files containing non-ASCII characters. func getNonAscii(root string) ([]string, error) { var nonAscii []string - fs := memfs.New() - rootPath := []string{"./"} - var emptyStr []string + fs := os.DirFS(".") + f, err := fs.Open(".gitattributes") + if err != nil { + return nil, err + } - attribute, err := gitattributes.ReadPatterns(fs, rootPath) + attribute, err := gitattributes.ReadAttributes(f, nil, true) if err != nil { return nil, err } @@ -45,7 +46,13 @@ func getNonAscii(root string) ([]string, error) { return nil } - if _, ret := matcher.Match(rootPath, append(emptyStr, info.Name())); ret { + relPath, err := filepath.Rel(root, path) + if err != nil { + return err + } + str := strings.Split(relPath, "/") + _, ret := matcher.Match(str, nil) + if ret { return nil } -- 2.30.2 From c8f377245fb654e06d005a137853a037946552cb Mon Sep 17 00:00:00 2001 From: zzjc1234 <2359047351@qq.com> Date: Mon, 21 Oct 2024 15:26:07 +0800 Subject: [PATCH 03/11] fix(healthcheck/utils): unused function --- pkg/healthcheck/utils.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pkg/healthcheck/utils.go b/pkg/healthcheck/utils.go index 005379a..8939f22 100644 --- a/pkg/healthcheck/utils.go +++ b/pkg/healthcheck/utils.go @@ -5,15 +5,6 @@ import ( "regexp" ) -func inString(str1 string, strList []string) bool { - for _, str := range strList { - if str1 == str { - return true - } - } - return false -} - // addExt appends the specified extension to each file name in the given fileList. // It modifies the original fileList in place. func addExt(fileList []string, ext string) { -- 2.30.2 From decadeaef1d43c40b1b7bd135a4d5dc5d2c7f360 Mon Sep 17 00:00:00 2001 From: zzjc1234 <2359047351@qq.com> Date: Mon, 21 Oct 2024 15:42:20 +0800 Subject: [PATCH 04/11] fix(healthcheck/nonasciifile): no gitattributes --- go.mod | 2 +- pkg/healthcheck/nonascii.go | 43 +++++++++++++++++++++++-------------- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/go.mod b/go.mod index 62e166a..6c81d72 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,6 @@ go 1.23.1 require ( github.com/criyle/go-judge v1.8.5 github.com/denormal/go-gitignore v0.0.0-20180930084346-ae8ad1d07817 - github.com/go-git/go-billy/v5 v5.5.0 github.com/go-git/go-git/v5 v5.12.0 github.com/jinzhu/copier v0.4.0 github.com/koding/multiconfig v0.0.0-20171124222453-69c27309b2d7 @@ -28,6 +27,7 @@ require ( github.com/fatih/camelcase v1.0.0 // indirect github.com/fatih/structs v1.1.0 // indirect github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect + github.com/go-git/go-billy/v5 v5.5.0 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect diff --git a/pkg/healthcheck/nonascii.go b/pkg/healthcheck/nonascii.go index f56ae10..4ef7800 100644 --- a/pkg/healthcheck/nonascii.go +++ b/pkg/healthcheck/nonascii.go @@ -16,18 +16,27 @@ import ( // It searches for non-ASCII characters in each file's content and returns a list of paths to files containing non-ASCII characters. func getNonAscii(root string) ([]string, error) { var nonAscii []string - fs := os.DirFS(".") - f, err := fs.Open(".gitattributes") - if err != nil { + noAttri := false + var matcher gitattributes.Matcher + _, err := os.Stat(".gitattributes") + if os.IsNotExist(err) { + noAttri = true return nil, err } - attribute, err := gitattributes.ReadAttributes(f, nil, true) - if err != nil { - return nil, err - } + if !noAttri { + fs := os.DirFS(".") + f, err := fs.Open(".gitattributes") + if err != nil { + return nil, err + } - matcher := gitattributes.NewMatcher(attribute) + attribute, err := gitattributes.ReadAttributes(f, nil, true) + if err != nil { + return nil, err + } + matcher = gitattributes.NewMatcher(attribute) + } err = filepath.Walk(root, func(path string, info os.FileInfo, err error) error { if err != nil { @@ -46,14 +55,16 @@ func getNonAscii(root string) ([]string, error) { return nil } - relPath, err := filepath.Rel(root, path) - if err != nil { - return err - } - str := strings.Split(relPath, "/") - _, ret := matcher.Match(str, nil) - if ret { - return nil + if !noAttri { + relPath, err := filepath.Rel(root, path) + if err != nil { + return err + } + str := strings.Split(relPath, "/") + _, ret := matcher.Match(str, nil) + if ret { + return nil + } } file, err := os.Open(path) -- 2.30.2 From 373d31cb5cc36ec58ff2f502cdcf92ca9afd606a Mon Sep 17 00:00:00 2001 From: zzjc1234 <2359047351@qq.com> Date: Mon, 21 Oct 2024 15:47:38 +0800 Subject: [PATCH 05/11] fix(healthcheck/nonasciifile): no gitattributes --- pkg/healthcheck/nonascii.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/healthcheck/nonascii.go b/pkg/healthcheck/nonascii.go index 4ef7800..8dff2cd 100644 --- a/pkg/healthcheck/nonascii.go +++ b/pkg/healthcheck/nonascii.go @@ -21,7 +21,6 @@ func getNonAscii(root string) ([]string, error) { _, err := os.Stat(".gitattributes") if os.IsNotExist(err) { noAttri = true - return nil, err } if !noAttri { -- 2.30.2 From 68e4c8e01edaa31844d59b32606bf347fa040774 Mon Sep 17 00:00:00 2001 From: zzjc1234 <2359047351@qq.com> Date: Mon, 21 Oct 2024 16:21:12 +0800 Subject: [PATCH 06/11] fix(healthcheck/nonasciifile): comment, code style and check range --- cmd/repo-health-checker/main.go | 4 ++-- pkg/healthcheck/nonascii.go | 18 ++++++------------ 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/cmd/repo-health-checker/main.go b/cmd/repo-health-checker/main.go index 5396fd4..aedcb16 100644 --- a/cmd/repo-health-checker/main.go +++ b/cmd/repo-health-checker/main.go @@ -43,12 +43,12 @@ func main() { showVersion := flag.Bool("version", false, "print current version") rootDir := flag.String("root", ".", "root dir for forbidden files check") repoSize := flag.Float64("repoSize", 2, "maximum size of the repo in MiB") - // TODO: remove gitWhitelist, it is only for backward compatibility now + // TODO: remove git whitelist, it is only for backward compatibility now localList := flag.String("localList", "", "local file list for non-ascii file check") checkFileNameList := flag.String("checkFileNameList", "", "comma-separated list of files to check") checkFileSumList := flag.String("checkFileSumList", "", "comma-separated list of expected checksums") parseMultiValueFlag(&metaFile, "meta", "meta files to check") - // TODO: remove gitWhitelist, it is only for backward compatibility now + // TODO: remove git whitelist, it is only for backward compatibility now var gitWhitelist []string parseMultiValueFlag(&gitWhitelist, "whitelist", "[DEPRECATED] will be ignored") flag.Parse() diff --git a/pkg/healthcheck/nonascii.go b/pkg/healthcheck/nonascii.go index 8dff2cd..838069c 100644 --- a/pkg/healthcheck/nonascii.go +++ b/pkg/healthcheck/nonascii.go @@ -16,14 +16,14 @@ import ( // It searches for non-ASCII characters in each file's content and returns a list of paths to files containing non-ASCII characters. func getNonAscii(root string) ([]string, error) { var nonAscii []string - noAttri := false + gitattrExist := true var matcher gitattributes.Matcher _, err := os.Stat(".gitattributes") if os.IsNotExist(err) { - noAttri = true + gitattrExist = false } - if !noAttri { + if gitattrExist { fs := os.DirFS(".") f, err := fs.Open(".gitattributes") if err != nil { @@ -43,25 +43,19 @@ func getNonAscii(root string) ([]string, error) { } if info.IsDir() { - if info.Name() == ".git" || info.Name() == ".gitea" { + if info.Name() == ".git" { return filepath.SkipDir } else { return nil } } - if info.Name() == "healthcheck" { - return nil - } - - if !noAttri { + if gitattrExist { relPath, err := filepath.Rel(root, path) if err != nil { return err } - str := strings.Split(relPath, "/") - _, ret := matcher.Match(str, nil) - if ret { + if _, ret := matcher.Match(strings.Split(relPath, "/"), nil); ret { return nil } } -- 2.30.2 From 3fdad5f70e988dca579624c48b9a6ae8c9f0a858 Mon Sep 17 00:00:00 2001 From: zzjc1234 <2359047351@qq.com> Date: Mon, 21 Oct 2024 16:31:14 +0800 Subject: [PATCH 07/11] fix(healthcheck/nonasciifile): revert skip healthcheck --- pkg/healthcheck/nonascii.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/healthcheck/nonascii.go b/pkg/healthcheck/nonascii.go index 838069c..c8e537b 100644 --- a/pkg/healthcheck/nonascii.go +++ b/pkg/healthcheck/nonascii.go @@ -50,6 +50,10 @@ func getNonAscii(root string) ([]string, error) { } } + if info.Name() == "healthcheck" { + return nil + } + if gitattrExist { relPath, err := filepath.Rel(root, path) if err != nil { -- 2.30.2 From 375c874734a9b3ef34dddb6c4953bbb1707653d6 Mon Sep 17 00:00:00 2001 From: zzjc1234 <2359047351@qq.com> Date: Mon, 21 Oct 2024 16:32:24 +0800 Subject: [PATCH 08/11] fix(healthcheck/nonasciifile): typo --- cmd/repo-health-checker/main.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/repo-health-checker/main.go b/cmd/repo-health-checker/main.go index aedcb16..da6c97e 100644 --- a/cmd/repo-health-checker/main.go +++ b/cmd/repo-health-checker/main.go @@ -43,12 +43,12 @@ func main() { showVersion := flag.Bool("version", false, "print current version") rootDir := flag.String("root", ".", "root dir for forbidden files check") repoSize := flag.Float64("repoSize", 2, "maximum size of the repo in MiB") - // TODO: remove git whitelist, it is only for backward compatibility now + // TODO: remove localList, it is only for backward compatibility now localList := flag.String("localList", "", "local file list for non-ascii file check") checkFileNameList := flag.String("checkFileNameList", "", "comma-separated list of files to check") checkFileSumList := flag.String("checkFileSumList", "", "comma-separated list of expected checksums") parseMultiValueFlag(&metaFile, "meta", "meta files to check") - // TODO: remove git whitelist, it is only for backward compatibility now + // TODO: remove gitWhitelist, it is only for backward compatibility now var gitWhitelist []string parseMultiValueFlag(&gitWhitelist, "whitelist", "[DEPRECATED] will be ignored") flag.Parse() -- 2.30.2 From 78a711ff3bfc4592f2785efaaeb967a62e464178 Mon Sep 17 00:00:00 2001 From: zzjc1234 <2359047351@qq.com> Date: Mon, 21 Oct 2024 16:45:24 +0800 Subject: [PATCH 09/11] feat(healthcheck/nonasciifile): distinguish no-text and text flag --- pkg/healthcheck/nonascii.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/healthcheck/nonascii.go b/pkg/healthcheck/nonascii.go index c8e537b..178e3b7 100644 --- a/pkg/healthcheck/nonascii.go +++ b/pkg/healthcheck/nonascii.go @@ -59,7 +59,7 @@ func getNonAscii(root string) ([]string, error) { if err != nil { return err } - if _, ret := matcher.Match(strings.Split(relPath, "/"), nil); ret { + if ret, matched := matcher.Match(strings.Split(relPath, "/"), nil); matched && ret["text"].IsUnset() { return nil } } -- 2.30.2 From c7b8cda34deb5fab4d184523e37ba92d685ccf40 Mon Sep 17 00:00:00 2001 From: zzjc1234 <2359047351@qq.com> Date: Mon, 21 Oct 2024 16:49:48 +0800 Subject: [PATCH 10/11] fix(healthcheck/nonasciifile): don't skip healthcheck bin --- pkg/healthcheck/nonascii.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pkg/healthcheck/nonascii.go b/pkg/healthcheck/nonascii.go index 178e3b7..e67cbc6 100644 --- a/pkg/healthcheck/nonascii.go +++ b/pkg/healthcheck/nonascii.go @@ -50,10 +50,6 @@ func getNonAscii(root string) ([]string, error) { } } - if info.Name() == "healthcheck" { - return nil - } - if gitattrExist { relPath, err := filepath.Rel(root, path) if err != nil { -- 2.30.2 From 01798453b363e0d73dce976e1c89ada4308a2512 Mon Sep 17 00:00:00 2001 From: zzjc1234 <2359047351@qq.com> Date: Mon, 21 Oct 2024 16:54:38 +0800 Subject: [PATCH 11/11] chore(healthcheck/nonasciifile): code style --- pkg/healthcheck/nonascii.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/healthcheck/nonascii.go b/pkg/healthcheck/nonascii.go index e67cbc6..c61002d 100644 --- a/pkg/healthcheck/nonascii.go +++ b/pkg/healthcheck/nonascii.go @@ -55,7 +55,8 @@ func getNonAscii(root string) ([]string, error) { if err != nil { return err } - if ret, matched := matcher.Match(strings.Split(relPath, "/"), nil); matched && ret["text"].IsUnset() { + ret, matched := matcher.Match(strings.Split(relPath, "/"), nil) + if matched && ret["text"].IsUnset() { return nil } } -- 2.30.2