From cfc455e0fbbbd911d737f0f779bf8029db5eb595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E8=B5=B5=E5=98=89=E7=A8=8B521432910016?= Date: Mon, 21 Oct 2024 17:04:52 +0800 Subject: [PATCH] fix(healthcheck): determine non-ascii file from git attributes (#69) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: zzjc1234 <2359047351@qq.com> Reviewed-on: https://focs.ji.sjtu.edu.cn/git/JOJ/JOJ3/pulls/69 Reviewed-by: 张泊明518370910136 Co-authored-by: 周赵嘉程521432910016 Co-committed-by: 周赵嘉程521432910016 --- cmd/repo-health-checker/main.go | 3 +- pkg/healthcheck/nonascii.go | 59 +++++++++++++++++++-------------- pkg/healthcheck/utils.go | 9 ----- 3 files changed, 37 insertions(+), 34 deletions(-) diff --git a/cmd/repo-health-checker/main.go b/cmd/repo-health-checker/main.go index bba0991..da6c97e 100644 --- a/cmd/repo-health-checker/main.go +++ b/cmd/repo-health-checker/main.go @@ -43,6 +43,7 @@ func main() { showVersion := flag.Bool("version", false, "print current version") rootDir := flag.String("root", ".", "root dir for forbidden files check") repoSize := flag.Float64("repoSize", 2, "maximum size of the repo in MiB") + // TODO: remove localList, it is only for backward compatibility now localList := flag.String("localList", "", "local file list for non-ascii file check") checkFileNameList := flag.String("checkFileNameList", "", "comma-separated list of files to check") checkFileSumList := flag.String("checkFileSumList", "", "comma-separated list of expected checksums") @@ -77,7 +78,7 @@ func main() { if err != nil { fmt.Printf("### Forbidden File Check Failed:\n%s\n", err.Error()) } - err = healthcheck.NonAsciiFiles(*rootDir, *localList) + err = healthcheck.NonAsciiFiles(*rootDir) if err != nil { fmt.Printf("### Non-ASCII Characters File Check Failed:\n%s\n", err.Error()) } diff --git a/pkg/healthcheck/nonascii.go b/pkg/healthcheck/nonascii.go index 0204c3c..c61002d 100644 --- a/pkg/healthcheck/nonascii.go +++ b/pkg/healthcheck/nonascii.go @@ -8,46 +8,57 @@ import ( "path/filepath" "strings" "unicode" + + "github.com/go-git/go-git/v5/plumbing/format/gitattributes" ) // getNonAscii retrieves a list of files in the specified root directory that contain non-ASCII characters. // It searches for non-ASCII characters in each file's content and returns a list of paths to files containing non-ASCII characters. -func getNonAscii(root string, localList string) ([]string, error) { +func getNonAscii(root string) ([]string, error) { var nonAscii []string - - var dirs []string - - if localList != "" { - file, err := os.Open(localList) - if err != nil { - return nil, fmt.Errorf("Failed to open file %s: %v\n", localList, err) - } - defer file.Close() - - scanner := bufio.NewScanner(file) - for scanner.Scan() { - dirs = append(dirs, scanner.Text()) - } - if err := scanner.Err(); err != nil { - return nil, fmt.Errorf("Error reading file %s: %v\n", localList, err) - } + gitattrExist := true + var matcher gitattributes.Matcher + _, err := os.Stat(".gitattributes") + if os.IsNotExist(err) { + gitattrExist = false } - err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if gitattrExist { + fs := os.DirFS(".") + f, err := fs.Open(".gitattributes") + if err != nil { + return nil, err + } + + attribute, err := gitattributes.ReadAttributes(f, nil, true) + if err != nil { + return nil, err + } + matcher = gitattributes.NewMatcher(attribute) + } + + err = filepath.Walk(root, func(path string, info os.FileInfo, err error) error { if err != nil { return err } if info.IsDir() { - if info.Name() == ".git" || info.Name() == ".gitea" || info.Name() == "ci" || (localList != "" && inString(info.Name(), dirs)) { + if info.Name() == ".git" { return filepath.SkipDir } else { return nil } } - if info.Name() == "healthcheck" { - return nil + if gitattrExist { + relPath, err := filepath.Rel(root, path) + if err != nil { + return err + } + ret, matched := matcher.Match(strings.Split(relPath, "/"), nil) + if matched && ret["text"].IsUnset() { + return nil + } } file, err := os.Open(path) @@ -79,8 +90,8 @@ func getNonAscii(root string, localList string) ([]string, error) { // nonAsciiFiles checks for non-ASCII characters in files within the specified root directory. // It prints a message with the paths to files containing non-ASCII characters, if any. -func NonAsciiFiles(root string, localList string) error { - nonAscii, err := getNonAscii(root, localList) +func NonAsciiFiles(root string) error { + nonAscii, err := getNonAscii(root) if err != nil { slog.Error("getting non-ascii", "err", err) return fmt.Errorf("error getting non-ascii: %w", err) diff --git a/pkg/healthcheck/utils.go b/pkg/healthcheck/utils.go index 005379a..8939f22 100644 --- a/pkg/healthcheck/utils.go +++ b/pkg/healthcheck/utils.go @@ -5,15 +5,6 @@ import ( "regexp" ) -func inString(str1 string, strList []string) bool { - for _, str := range strList { - if str1 == str { - return true - } - } - return false -} - // addExt appends the specified extension to each file name in the given fileList. // It modifies the original fileList in place. func addExt(fileList []string, ext string) {