feat: repo health check (#16) #17

Merged
张泊明518370910136 merged 37 commits from file_check into master 2024-09-11 20:09:27 +08:00
22 changed files with 701 additions and 1 deletions

28
.gitmodules vendored
View File

@ -38,6 +38,34 @@
path = examples/keyword/clangtidy/sillycode
url = ssh://git@focs.ji.sjtu.edu.cn:2222/FOCS-dev/JOJ3-examples.git
branch = keyword/clangtidy/sillycode
[submodule "examples/healthcheck/asciifile"]
path = examples/healthcheck/asciifile
url = ssh://git@focs.ji.sjtu.edu.cn:2222/FOCS-dev/JOJ3-examples.git
branch = healthcheck/asciifile
[submodule "examples/healthcheck/asciimsg"]
path = examples/healthcheck/asciimsg
url = ssh://git@focs.ji.sjtu.edu.cn:2222/FOCS-dev/JOJ3-examples.git
branch = healthcheck/asciimsg
[submodule "examples/healthcheck/forbiddenfile"]
path = examples/healthcheck/forbiddenfile
url = ssh://git@focs.ji.sjtu.edu.cn:2222/FOCS-dev/JOJ3-examples.git
branch = healthcheck/forbiddenfile
[submodule "examples/healthcheck/meta"]
path = examples/healthcheck/meta
url = ssh://git@focs.ji.sjtu.edu.cn:2222/FOCS-dev/JOJ3-examples.git
branch = healthcheck/meta
[submodule "examples/healthcheck/release"]
path = examples/healthcheck/release
url = ssh://git@focs.ji.sjtu.edu.cn:2222/FOCS-dev/JOJ3-examples.git
branch = healthcheck/release
[submodule "examples/healthcheck/reposize"]
path = examples/healthcheck/reposize
url = ssh://git@focs.ji.sjtu.edu.cn:2222/FOCS-dev/JOJ3-examples.git
branch = healthcheck/reposize
[submodule "examples/healthcheck/repoverify"]
path = examples/healthcheck/repoverify
url = ssh://git@focs.ji.sjtu.edu.cn:2222/FOCS-dev/JOJ3-examples.git
branch = healthcheck/repoverify
[submodule "examples/cppcheck/sillycode"]
path = examples/cppcheck/sillycode
url = ssh://git@focs.ji.sjtu.edu.cn:2222/FOCS-dev/JOJ3-examples.git

View File

@ -9,12 +9,14 @@
3. Enable cgroup v2 for your OS. Check [here](https://stackoverflow.com/a/73376219/13724598). So that you do not need root permission to run `go-judge`.
4. Clone [go-judge](https://github.com/criyle/go-judge).
```bash
$ git clone https://github.com/criyle/go-judge && cd go-judge
$ go build -o ./tmp/go-judge ./cmd/go-judge
```
5. Run `go-judge`.
```bash
$ # make sure you are in go-judge directory
$ ./tmp/go-judge -http-addr 0.0.0.0:5050 -grpc-addr 0.0.0.0:5051 -monitor-addr 0.0.0.0:5052 -enable-grpc -enable-debug -enable-metrics
@ -51,6 +53,7 @@ ok focs.ji.sjtu.edu.cn/git/FOCS-dev/JOJ3/cmd/joj3 2.290s coverage: 74.0%
1. Install [`pre-commit`](https://pre-commit.com/), [`golangci-lint`](https://golangci-lint.run), [`goimports`](https://golang.org/x/tools/cmd/goimports), [`gofumpt`](https://github.com/mvdan/gofumpt).
2. Install the pre-commit hooks. It will run some checks before you commit.
```bash
$ pre-commit install
pre-commit installed at .git/hooks/pre-commit
@ -84,3 +87,11 @@ Check the `Result` at <https://github.com/criyle/go-judge#rest-api-interface>.
- `Score int`: score of the stage.
- `Comment string`: comment on the stage.
### HealthCheck
The repohealth check will return a json list to for check result. The structure follows the score-comment pattern.
HealthCheck currently includes, `reposize`, `forbidden file`, `Metafile existence`, `non-ascii character` in file and message, `release tag`, and `ci files invariance` check.
The workflow is `joj3` pass cli args to healthcheck binary. See `./cmd/healthcheck/main.go` to view all flags.

84
cmd/healthcheck/main.go Normal file
View File

@ -0,0 +1,84 @@
package main
import (
"flag"
"fmt"
"log/slog"
"os"
"focs.ji.sjtu.edu.cn/git/FOCS-dev/JOJ3/pkg/healthcheck"
)
// parseMultiValueFlag parses a multi-value command-line flag and appends its values to the provided slice.
// It registers a flag with the specified name and description, associating it with a multiStringValue receiver.
func parseMultiValueFlag(values *[]string, flagName, description string) {
flag.Var((*multiStringValue)(values), flagName, description)
}
type multiStringValue []string
// Set appends a new value to the multiStringValue slice.
// It satisfies the flag.Value interface, allowing multiStringValue to be used as a flag value.
func (m *multiStringValue) Set(value string) error {
*m = append(*m, value)
return nil
}
func (m *multiStringValue) String() string {
return fmt.Sprintf("%v", *m)
}
func setupSlog() {
opts := &slog.HandlerOptions{}
handler := slog.NewTextHandler(os.Stderr, opts)
logger := slog.New(handler)
slog.SetDefault(logger)
}
// Generally, err is used for runtime errors, and checkRes is used for the result of the checks.
func main() {
var gitWhitelist, metaFile, releaseTags []string
rootDir := flag.String("root", "", "")
repo := flag.String("repo", "", "")
localList := flag.String("localList", "", "")
droneBranch := flag.String("droneBranch", "", "")
releaseCategories := flag.String("releaseCategories", "", "")
releaseNumber := flag.Int("releaseNumber", 0, "")
checkFileNameList := flag.String("checkFileNameList", "", "Comma-separated list of files to check.")
checkFileSumList := flag.String("checkFileSumList", "", "Comma-separated list of expected checksums.")
parseMultiValueFlag(&gitWhitelist, "whitelist", "")
parseMultiValueFlag(&metaFile, "meta", "")
parseMultiValueFlag(&releaseTags, "releaseTags", "")
flag.Parse()
setupSlog()
var err error
err = healthcheck.RepoSize()
if err != nil {
fmt.Printf("## Repo Size Check Failed:\n%s\n", err.Error())
}
err = healthcheck.ForbiddenCheck(*rootDir, gitWhitelist, *localList, *repo, *droneBranch)
if err != nil {
fmt.Printf("## Forbidden File Check Failed:\n%s\n", err.Error())
}
err = healthcheck.MetaCheck(*rootDir, metaFile)
if err != nil {
fmt.Printf("## Forbidden File Check Failed:\n%s\n", err.Error())
}
err = healthcheck.NonAsciiFiles(*rootDir, *localList)
if err != nil {
fmt.Printf("## Non-ASCII Characters File Check Failed:\n%s\n", err.Error())
}
err = healthcheck.NonAsciiMsg(*rootDir)
if err != nil {
fmt.Printf("## Non-ASCII Characters Commit Message Check Failed:\n%s\n", err.Error())
}
err = healthcheck.CheckTags(*rootDir, *releaseCategories, *releaseNumber)
if err != nil {
fmt.Printf("## Release Tag Check Failed:\n%s\n", err.Error())
}
// FIXME: for drone usage
err = healthcheck.VerifyFiles(*rootDir, *checkFileNameList, *checkFileSumList)
if err != nil {
fmt.Printf("## Repo File Check Failed:\n%s\n", err.Error())
}
}

@ -0,0 +1 @@
Subproject commit a236c7ea934de5e59525fa27e4211f4a48dbbf93

@ -0,0 +1 @@
Subproject commit 36bb5fb15f100078bd3af1027017825932f8c24b

@ -0,0 +1 @@
Subproject commit 62c43fe51666417c7cbb227d6daaeee7189b6944

@ -0,0 +1 @@
Subproject commit 5c2cd9e6b31c6f223ac5d3ee5b07f11fbd378427

@ -0,0 +1 @@
Subproject commit fc9828bde135e53a7ef3e6367c708d9a000afc74

@ -0,0 +1 @@
Subproject commit a49a6aa29d3dcb0509e8de540db0781aca596f26

@ -0,0 +1 @@
Subproject commit 2f455dca9d28e39926e68b9b13eef39b0a9f67fc

View File

@ -5,6 +5,7 @@ import (
_ "focs.ji.sjtu.edu.cn/git/FOCS-dev/JOJ3/internal/parsers/cppcheck"
_ "focs.ji.sjtu.edu.cn/git/FOCS-dev/JOJ3/internal/parsers/cpplint"
_ "focs.ji.sjtu.edu.cn/git/FOCS-dev/JOJ3/internal/parsers/diff"
_ "focs.ji.sjtu.edu.cn/git/FOCS-dev/JOJ3/internal/parsers/healthcheck"
_ "focs.ji.sjtu.edu.cn/git/FOCS-dev/JOJ3/internal/parsers/keyword"
_ "focs.ji.sjtu.edu.cn/git/FOCS-dev/JOJ3/internal/parsers/resultstatus"
_ "focs.ji.sjtu.edu.cn/git/FOCS-dev/JOJ3/internal/parsers/sample"

View File

@ -0,0 +1,9 @@
package healthcheck
import "focs.ji.sjtu.edu.cn/git/FOCS-dev/JOJ3/internal/stage"
var name = "healthcheck"
func init() {
stage.RegisterParser(name, &Healthcheck{})
}

View File

@ -0,0 +1,41 @@
package healthcheck
import (
"fmt"
"focs.ji.sjtu.edu.cn/git/FOCS-dev/JOJ3/internal/stage"
"github.com/criyle/go-judge/envexec"
)
type Healthcheck struct{}
func Parse(executorResult stage.ExecutorResult) (stage.ParserResult, bool) {
stdout := executorResult.Files["stdout"]
stderr := executorResult.Files["stderr"]
if executorResult.Status != stage.Status(envexec.StatusAccepted) {
return stage.ParserResult{
Score: 0,
Comment: fmt.Sprintf(
"Unexpected executor status: %s.\nStdout: %s\nStderr: %s",
executorResult.Status, stdout, stderr,
),
}, true
}
return stage.ParserResult{
Score: 0,
Comment: stdout,
}, stdout != ""
}
func (*Healthcheck) Run(results []stage.ExecutorResult, confAny any) (
[]stage.ParserResult, bool, error,
) {
var res []stage.ParserResult
forceQuit := false
for _, result := range results {
parserResult, forceQuitResult := Parse(result)
res = append(res, parserResult)
forceQuit = forceQuit || forceQuitResult
}
return res, forceQuit, nil
}

65
pkg/healthcheck/commit.go Normal file
View File

@ -0,0 +1,65 @@
package healthcheck
import (
"fmt"
"log/slog"
"strings"
"unicode"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing/object"
)
// nonAsciiMsg checks for non-ASCII characters in the commit message.
// If the message starts with "Merge pull request", it skips the non-ASCII characters check.
// Otherwise, it iterates over each character in the message and checks if it is a non-ASCII character.
// If a non-ASCII character is found, it returns an error indicating not to use non-ASCII characters in commit messages.
// Otherwise, it returns nil indicating that the commit message is valid.
func NonAsciiMsg(root string) error {
// cmd := exec.Command("git", "log", "--encoding=UTF-8", "--format=%B")
repo, err := git.PlainOpen(root)
if err != nil {
slog.Error("openning git repo", "err", err)
return fmt.Errorf("error openning git repo: %v", err)
}
ref, err := repo.Head()
if err != nil {
slog.Error("getting reference", "err", err)
return fmt.Errorf("error getting reference: %v", err)
}
commits, err := repo.Log(&git.LogOptions{From: ref.Hash()})
if err != nil {
slog.Error("getting commits", "err", err)
return fmt.Errorf("error getting commits from reference %s: %v", ref.Hash(), err)
}
var msgs []string
err = commits.ForEach(func(c *object.Commit) error {
msgs = append(msgs, c.Message)
return nil
})
if err != nil {
slog.Error("iterating commits", "err", err)
return fmt.Errorf("error iterating commits: %v", err)
}
var nonAsciiMsgs []string
for _, msg := range msgs {
if msg == "" {
continue
}
if strings.HasPrefix(msg, "Merge pull request") {
continue
}
for _, c := range msg {
if c > unicode.MaxASCII {
nonAsciiMsgs = append(nonAsciiMsgs, msg)
}
}
}
if len(nonAsciiMsgs) > 0 {
return fmt.Errorf("Non-ASCII characters in commit messages:\n%s", strings.Join(nonAsciiMsgs, "\n"))
}
return nil
}

View File

@ -0,0 +1,90 @@
package healthcheck
import (
"bufio"
"fmt"
"log/slog"
"os"
"path/filepath"
"regexp"
"strings"
)
// getForbiddens retrieves a list of forbidden files in the specified root directory.
// It searches for files that do not match the specified regex patterns in the given file list.
func getForbiddens(root string, fileList []string, localList string) ([]string, error) {
var matches []string
var regexList []*regexp.Regexp
regexList, err := getRegex(fileList)
if err != nil {
return nil, err
}
var dirs []string
if localList != "" {
file, err := os.Open(localList)
if err != nil {
return nil, fmt.Errorf("Failed to open file %s: %v\n", localList, err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
dirs = append(dirs, scanner.Text())
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("Error reading file %s: %v\n", localList, err)
}
}
err = filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
if info.Name() == ".git" || info.Name() == ".gitea" || info.Name() == "ci" || (localList != "" && inString(info.Name(), dirs)) {
return filepath.SkipDir
}
} else {
match := false
for _, regex := range regexList {
if regex.MatchString(info.Name()) {
match = true
break
}
}
if !match {
matches = append(matches, path)
}
}
return nil
})
return matches, err
}
// forbiddenCheck checks for forbidden files in the specified root directory.
// It prints the list of forbidden files found, along with instructions on how to fix them.
func ForbiddenCheck(rootDir string, regexList []string, localList string, repo string, droneBranch string) error {
forbids, err := getForbiddens(rootDir, regexList, localList)
if err != nil {
slog.Error("getting forbiddens", "error", err)
return fmt.Errorf("error getting forbiddens: %w", err)
}
if len(forbids) > 0 {
return fmt.Errorf("The following forbidden files were found: %s\n\nTo fix it, first make a backup of your repository and then run the following commands:\nfor i in %s%s",
strings.Join(forbids, ", "),
strings.Join(forbids, " "),
fmt.Sprint(
"; do git filter-repo --force --invert-paths --path \"$i\"; done\ngit remote add origin ",
repo, "\ngit push --set-upstream origin ",
droneBranch, " --force"))
}
return nil
}

View File

@ -1 +0,0 @@
package healthcheck

73
pkg/healthcheck/meta.go Normal file
View File

@ -0,0 +1,73 @@
package healthcheck
import (
"fmt"
"log/slog"
"os"
)
// getMetas retrieves a list of metadata files that are expected to exist in the specified root directory.
// It checks for the existence of each file in the fileList and provides instructions if any file is missing.
func getMetas(rootDir string, fileList []string) ([]string, string, error) {
addExt(fileList, "\\.*")
regexList, err := getRegex(fileList)
var unmatchedList []string
if err != nil {
return nil, "", err
}
files, err := os.ReadDir(rootDir)
if err != nil {
return nil, "", fmt.Errorf("error reading directory: %w", err)
}
matched := false
umatchedRes := ""
// TODO: it seems that there is no good find subsitution now
// modify current code if exist a better solution
for i, regex := range regexList {
for _, file := range files {
if file.IsDir() {
continue
}
if regex.MatchString(file.Name()) {
matched = true
break
}
}
if !matched {
unmatchedList = append(unmatchedList, fileList[i])
str := fmt.Sprint("\tno ", fileList[i], " file found")
switch fileList[i] {
case "readme\\.*":
str += ", please refer to https://www.makeareadme.com/ for more information"
case "changelog\\.*":
str += ", please refer to https://keepachangelog.com/en/1.1.0/ for more information"
default:
str += ""
}
str += "\n"
umatchedRes += str
}
}
return unmatchedList, umatchedRes, nil
}
// metaCheck performs a check for metadata files in the specified root directory.
// It prints a message if any required metadata files are missing.
func MetaCheck(rootDir string, fileList []string) error {
unmatchedList, umatchedRes, err := getMetas(rootDir, fileList)
if err != nil {
slog.Error("getting metas", "err", err)
return fmt.Errorf("error getting metas: %w", err)
}
if len(unmatchedList) != 0 {
return fmt.Errorf("%d important project files missing\n"+umatchedRes, len(unmatchedList))
}
return nil
}

View File

@ -0,0 +1,93 @@
package healthcheck
import (
"bufio"
"fmt"
"log/slog"
"os"
"path/filepath"
"strings"
"unicode"
)
// getNonAscii retrieves a list of files in the specified root directory that contain non-ASCII characters.
// It searches for non-ASCII characters in each file's content and returns a list of paths to files containing non-ASCII characters.
func getNonAscii(root string, localList string) ([]string, error) {
var nonAscii []string
var dirs []string
if localList != "" {
file, err := os.Open(localList)
if err != nil {
return nil, fmt.Errorf("Failed to open file %s: %v\n", localList, err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
dirs = append(dirs, scanner.Text())
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("Error reading file %s: %v\n", localList, err)
}
}
err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
if info.Name() == ".git" || info.Name() == ".gitea" || info.Name() == "ci" || (localList != "" && inString(info.Name(), dirs)) {
return filepath.SkipDir
} else {
return nil
}
}
if info.Name() == "healthcheck" {
return nil
}
file, err := os.Open(path)
if err != nil {
return err
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
cont := true
for _, c := range scanner.Text() {
if c > unicode.MaxASCII {
nonAscii = append(nonAscii, "\t"+path)
cont = false
break
}
}
if !cont {
break
}
}
return nil
})
return nonAscii, err
}
// nonAsciiFiles checks for non-ASCII characters in files within the specified root directory.
// It prints a message with the paths to files containing non-ASCII characters, if any.
func NonAsciiFiles(root string, localList string) error {
nonAscii, err := getNonAscii(root, localList)
if err != nil {
slog.Error("getting non-ascii", "err", err)
return fmt.Errorf("error getting non-ascii: %w", err)
}
if len(nonAscii) > 0 {
return fmt.Errorf("Non-ASCII characters found in the following files:\n%s",
strings.Join(nonAscii, "\n"))
}
return nil
}

View File

@ -0,0 +1,40 @@
package healthcheck
import (
"fmt"
"log/slog"
"os/exec"
"strconv"
"strings"
)
// RepoSize checks the size of the repository to determine if it is oversized.
// It executes the 'git count-objects -v' command to obtain the size information,
func RepoSize() error {
// TODO: reimplement here when go-git is available
// https://github.com/go-git/go-git/blob/master/COMPATIBILITY.md
cmd := exec.Command("git", "count-objects", "-v")
output, err := cmd.CombinedOutput()
if err != nil {
slog.Error("running git command:", "err", err)
return fmt.Errorf("error running git command: %w", err)
}
lines := strings.Split(string(output), "\n")
var sum int
for _, line := range lines {
if strings.Contains(line, "size") {
fields := strings.Fields(line)
sizeStr := fields[1]
size, err := strconv.Atoi(sizeStr)
if err != nil {
slog.Error("running git command:", "err", err)
return fmt.Errorf("error running git command: %w", err)
}
sum += size
}
}
if sum > 2048 {
return fmt.Errorf("Repository larger than 2MB. Please clean up or contact the teaching team.")
}
return nil
}

62
pkg/healthcheck/tag.go Normal file
View File

@ -0,0 +1,62 @@
package healthcheck
import (
"fmt"
"strings"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
)
func getTagsFromRepo(repoPath string) ([]string, error) {
repo, err := git.PlainOpen(repoPath)
if err != nil {
return nil, fmt.Errorf("error opening repo: %v", err)
}
refs, err := repo.Tags()
if err != nil {
return nil, fmt.Errorf("error getting tags: %v", err)
}
var tags []string
err = refs.ForEach(func(ref *plumbing.Reference) error {
tags = append(tags, ref.Name().Short())
return nil
})
if err != nil {
return nil, fmt.Errorf("error iterating tags: %v", err)
}
return tags, nil
}
func CheckTags(repoPath string, category string, n int) error {
tags, err := getTagsFromRepo(repoPath)
if err != nil {
return fmt.Errorf("error getting tags: %v", err)
}
var prefix string
switch category {
case "exam":
prefix = "e"
case "project":
prefix = "p"
case "homework":
prefix = "h"
default:
prefix = "a"
}
target := prefix + fmt.Sprintf("%d", n)
found := false
for _, tag := range tags {
if tag == target {
found = true
break
}
}
if !found {
return fmt.Errorf("Wrong release tag '%s' or missing release tags. Please use one of '%s'.", target, strings.Join(tags, "', '"))
}
return nil
}

38
pkg/healthcheck/utils.go Normal file
View File

@ -0,0 +1,38 @@
package healthcheck
import (
"fmt"
"regexp"
)
func inString(str1 string, strList []string) bool {
for _, str := range strList {
if str1 == str {
return true
}
}
return false
}
// addExt appends the specified extension to each file name in the given fileList.
// It modifies the original fileList in place.
func addExt(fileList []string, ext string) {
for i, file := range fileList {
fileList[i] = file + ext
}
}
// getRegex compiles each regex pattern in the fileList into a []*regexp.Regexp slice.
// It returns a slice containing compiled regular expressions.
func getRegex(fileList []string) ([]*regexp.Regexp, error) {
var regexList []*regexp.Regexp
for _, pattern := range fileList {
regex, err := regexp.Compile("(?i)" + pattern)
if err != nil {
return nil, fmt.Errorf("Error compiling regex:%w", err)
}
regexList = append(regexList, regex)
}
return regexList, nil
}

59
pkg/healthcheck/verify.go Normal file
View File

@ -0,0 +1,59 @@
package healthcheck
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"os"
"path/filepath"
"strings"
)
// getChecksum calculates the SHA-256 checksum of a file
func getChecksum(filePath string) (string, error) {
// Open the file
file, err := os.Open(filePath)
if err != nil {
return "", err
}
defer file.Close()
// Calculate SHA-256
hash := sha256.New()
if _, err := io.Copy(hash, file); err != nil {
return "", err
}
return hex.EncodeToString(hash.Sum(nil)), nil
}
// checkFileChecksum checks if a single file's checksum matches the expected value
func checkFileChecksum(rootDir, fileName, expectedChecksum string) error {
filePath := filepath.Join(rootDir, strings.TrimSpace(fileName))
actualChecksum, err := getChecksum(filePath)
if err != nil {
return fmt.Errorf("Error reading file %s: %v", filePath, err)
}
if actualChecksum != expectedChecksum {
return fmt.Errorf("Checksum for %s failed. Expected %s, but got %s. Please revert your changes or contact the teaching team if you have a valid reason for adjusting them.", filePath, expectedChecksum, actualChecksum)
}
return nil
}
func VerifyFiles(rootDir string, checkFileNameList string, checkFileSumList string) error {
if len(checkFileNameList) == 0 {
return nil
}
fileNames := strings.Split(checkFileNameList, ",")
checkSums := strings.Split(checkFileSumList, ",")
// Check each file's checksum
for i, fileName := range fileNames {
expectedChecksum := strings.TrimSpace(checkSums[i])
err := checkFileChecksum(rootDir, fileName, expectedChecksum)
if err != nil {
return err
}
}
return nil
}