fix(healthcheck): determine non-ascii file from git attributes (#69)
Co-authored-by: zzjc1234 <2359047351@qq.com> Reviewed-on: #69 Reviewed-by: 张泊明518370910136 <bomingzh@sjtu.edu.cn> Co-authored-by: 周赵嘉程521432910016 <zzjc123@sjtu.edu.cn> Co-committed-by: 周赵嘉程521432910016 <zzjc123@sjtu.edu.cn>
This commit is contained in:
		
							parent
							
								
									03d072bfb6
								
							
						
					
					
						commit
						cfc455e0fb
					
				|  | @ -43,6 +43,7 @@ func main() { | ||||||
| 	showVersion := flag.Bool("version", false, "print current version") | 	showVersion := flag.Bool("version", false, "print current version") | ||||||
| 	rootDir := flag.String("root", ".", "root dir for forbidden files check") | 	rootDir := flag.String("root", ".", "root dir for forbidden files check") | ||||||
| 	repoSize := flag.Float64("repoSize", 2, "maximum size of the repo in MiB") | 	repoSize := flag.Float64("repoSize", 2, "maximum size of the repo in MiB") | ||||||
|  | 	// TODO: remove localList, it is only for backward compatibility now
 | ||||||
| 	localList := flag.String("localList", "", "local file list for non-ascii file check") | 	localList := flag.String("localList", "", "local file list for non-ascii file check") | ||||||
| 	checkFileNameList := flag.String("checkFileNameList", "", "comma-separated list of files to check") | 	checkFileNameList := flag.String("checkFileNameList", "", "comma-separated list of files to check") | ||||||
| 	checkFileSumList := flag.String("checkFileSumList", "", "comma-separated list of expected checksums") | 	checkFileSumList := flag.String("checkFileSumList", "", "comma-separated list of expected checksums") | ||||||
|  | @ -77,7 +78,7 @@ func main() { | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		fmt.Printf("### Forbidden File Check Failed:\n%s\n", err.Error()) | 		fmt.Printf("### Forbidden File Check Failed:\n%s\n", err.Error()) | ||||||
| 	} | 	} | ||||||
| 	err = healthcheck.NonAsciiFiles(*rootDir, *localList) | 	err = healthcheck.NonAsciiFiles(*rootDir) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		fmt.Printf("### Non-ASCII Characters File Check Failed:\n%s\n", err.Error()) | 		fmt.Printf("### Non-ASCII Characters File Check Failed:\n%s\n", err.Error()) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -8,47 +8,58 @@ import ( | ||||||
| 	"path/filepath" | 	"path/filepath" | ||||||
| 	"strings" | 	"strings" | ||||||
| 	"unicode" | 	"unicode" | ||||||
|  | 
 | ||||||
|  | 	"github.com/go-git/go-git/v5/plumbing/format/gitattributes" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| // getNonAscii retrieves a list of files in the specified root directory that contain non-ASCII characters.
 | // getNonAscii retrieves a list of files in the specified root directory that contain non-ASCII characters.
 | ||||||
| // It searches for non-ASCII characters in each file's content and returns a list of paths to files containing non-ASCII characters.
 | // It searches for non-ASCII characters in each file's content and returns a list of paths to files containing non-ASCII characters.
 | ||||||
| func getNonAscii(root string, localList string) ([]string, error) { | func getNonAscii(root string) ([]string, error) { | ||||||
| 	var nonAscii []string | 	var nonAscii []string | ||||||
|  | 	gitattrExist := true | ||||||
|  | 	var matcher gitattributes.Matcher | ||||||
|  | 	_, err := os.Stat(".gitattributes") | ||||||
|  | 	if os.IsNotExist(err) { | ||||||
|  | 		gitattrExist = false | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	var dirs []string | 	if gitattrExist { | ||||||
| 
 | 		fs := os.DirFS(".") | ||||||
| 	if localList != "" { | 		f, err := fs.Open(".gitattributes") | ||||||
| 		file, err := os.Open(localList) |  | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return nil, fmt.Errorf("Failed to open file %s: %v\n", localList, err) | 			return nil, err | ||||||
| 		} |  | ||||||
| 		defer file.Close() |  | ||||||
| 
 |  | ||||||
| 		scanner := bufio.NewScanner(file) |  | ||||||
| 		for scanner.Scan() { |  | ||||||
| 			dirs = append(dirs, scanner.Text()) |  | ||||||
| 		} |  | ||||||
| 		if err := scanner.Err(); err != nil { |  | ||||||
| 			return nil, fmt.Errorf("Error reading file %s: %v\n", localList, err) |  | ||||||
| 		} |  | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 	err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error { | 		attribute, err := gitattributes.ReadAttributes(f, nil, true) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
|  | 		matcher = gitattributes.NewMatcher(attribute) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	err = filepath.Walk(root, func(path string, info os.FileInfo, err error) error { | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return err | 			return err | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		if info.IsDir() { | 		if info.IsDir() { | ||||||
| 			if info.Name() == ".git" || info.Name() == ".gitea" || info.Name() == "ci" || (localList != "" && inString(info.Name(), dirs)) { | 			if info.Name() == ".git" { | ||||||
| 				return filepath.SkipDir | 				return filepath.SkipDir | ||||||
| 			} else { | 			} else { | ||||||
| 				return nil | 				return nil | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		if info.Name() == "healthcheck" { | 		if gitattrExist { | ||||||
|  | 			relPath, err := filepath.Rel(root, path) | ||||||
|  | 			if err != nil { | ||||||
|  | 				return err | ||||||
|  | 			} | ||||||
|  | 			ret, matched := matcher.Match(strings.Split(relPath, "/"), nil) | ||||||
|  | 			if matched && ret["text"].IsUnset() { | ||||||
| 				return nil | 				return nil | ||||||
| 			} | 			} | ||||||
|  | 		} | ||||||
| 
 | 
 | ||||||
| 		file, err := os.Open(path) | 		file, err := os.Open(path) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
|  | @ -79,8 +90,8 @@ func getNonAscii(root string, localList string) ([]string, error) { | ||||||
| 
 | 
 | ||||||
| // nonAsciiFiles checks for non-ASCII characters in files within the specified root directory.
 | // nonAsciiFiles checks for non-ASCII characters in files within the specified root directory.
 | ||||||
| // It prints a message with the paths to files containing non-ASCII characters, if any.
 | // It prints a message with the paths to files containing non-ASCII characters, if any.
 | ||||||
| func NonAsciiFiles(root string, localList string) error { | func NonAsciiFiles(root string) error { | ||||||
| 	nonAscii, err := getNonAscii(root, localList) | 	nonAscii, err := getNonAscii(root) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		slog.Error("getting non-ascii", "err", err) | 		slog.Error("getting non-ascii", "err", err) | ||||||
| 		return fmt.Errorf("error getting non-ascii: %w", err) | 		return fmt.Errorf("error getting non-ascii: %w", err) | ||||||
|  |  | ||||||
|  | @ -5,15 +5,6 @@ import ( | ||||||
| 	"regexp" | 	"regexp" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| func inString(str1 string, strList []string) bool { |  | ||||||
| 	for _, str := range strList { |  | ||||||
| 		if str1 == str { |  | ||||||
| 			return true |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return false |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| // addExt appends the specified extension to each file name in the given fileList.
 | // addExt appends the specified extension to each file name in the given fileList.
 | ||||||
| // It modifies the original fileList in place.
 | // It modifies the original fileList in place.
 | ||||||
| func addExt(fileList []string, ext string) { | func addExt(fileList []string, ext string) { | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user