feat: repo health check (#16) #17

Merged
张泊明518370910136 merged 37 commits from file_check into master 2024-09-11 20:09:27 +08:00
10 changed files with 88 additions and 248 deletions
Showing only changes of commit 2d5fe25d1a - Show all commits

View File

@ -1,7 +1,6 @@
package main
import (
"encoding/json"
"flag"
"fmt"
"log/slog"
@ -38,10 +37,7 @@ func setupSlog() {
// Generally, err is used for runtime errors, and checkRes is used for the result of the checks.
func main() {
var info []healthcheck.CheckStage
var gitWhitelist, metaFile, releaseTags []string
var tmp healthcheck.CheckStage
rootDir := flag.String("root", "", "")
repo := flag.String("repo", "", "")
droneBranch := flag.String("droneBranch", "", "")
@ -54,33 +50,35 @@ func main() {
parseMultiValueFlag(&releaseTags, "releaseTags", "")
flag.Parse()
setupSlog()
tmp = healthcheck.RepoSize()
info = append(info, tmp)
tmp = healthcheck.ForbiddenCheck(*rootDir, gitWhitelist, *repo, *droneBranch)
info = append(info, tmp)
tmp = healthcheck.MetaCheck(*rootDir, metaFile)
info = append(info, tmp)
tmp = healthcheck.NonAsciiFiles(*rootDir)
info = append(info, tmp)
tmp = healthcheck.NonAsciiMsg(*rootDir)
info = append(info, tmp)
// TODO: find a way to test the release tag
tmp = healthcheck.CheckReleases(*rootDir, *releaseCategories, *releaseNumber)
info = append(info, tmp)
// FIXME: for drone usage
tmp = healthcheck.Verify(*rootDir, *adminDir)
info = append(info, tmp)
jsonData, err := json.Marshal(info)
var err error
err = healthcheck.RepoSize()
if err != nil {
fmt.Fprint(os.Stderr, err)
os.Exit(1)
fmt.Printf("## Repo Size Check Failed:\n%s\n", err.Error())
}
err = healthcheck.ForbiddenCheck(*rootDir, gitWhitelist, *repo, *droneBranch)
if err != nil {
fmt.Printf("## Forbidden File Check Failed:\n%s\n", err.Error())
}
err = healthcheck.MetaCheck(*rootDir, metaFile)
if err != nil {
fmt.Printf("## Forbidden File Check Failed:\n%s\n", err.Error())
bomingzh marked this conversation as resolved Outdated

is it still TODO?

is it still TODO?

Done yet

Done yet
}
err = healthcheck.NonAsciiFiles(*rootDir)
if err != nil {
fmt.Printf("## Non-ASCII Characters File Check Failed:\n%s\n", err.Error())
}
err = healthcheck.NonAsciiMsg(*rootDir)
if err != nil {
fmt.Printf("## Non-ASCII Characters Commit Message Check Failed:\n%s\n", err.Error())
}
// TODO: find a way to test the release tag
err = healthcheck.CheckReleases(*rootDir, *releaseCategories, *releaseNumber)
bomingzh marked this conversation as resolved Outdated

check releases need to use Gitea API, just check tags is enough

check releases need to use Gitea API, just check tags is enough

Yep, only check tags

Yep, only check tags

Where is it tested?

Where is it tested?
if err != nil {
fmt.Printf("## Release Tag Check Failed:\n%s\n", err.Error())
}
// FIXME: for drone usage
err = healthcheck.VerifyDirectory(*rootDir, *adminDir)
zzjc123 marked this conversation as resolved Outdated

what should be fixed?

what should be fixed?

I think it is due to we can only test it when it is deployed on drone so I just skip it when running the code. If not I couldn't push the code.

I think it is due to we can only test it when it is deployed on drone so I just skip it when running the code. If not I couldn't push the code.

I think checking an empty value for skipping it is enough. No dir specified = no verify.

I think checking an empty value for skipping it is enough. No dir specified = no verify.
if err != nil {
fmt.Printf("## Directory File Check Failed:\n%s\n", err.Error())
}
fmt.Printf("%s", jsonData)
}

View File

@ -7,41 +7,35 @@ import (
"github.com/criyle/go-judge/envexec"
)
type Conf struct {
Score int
Comment string
}
type Healthcheck struct{}
func Parse(executorResult stage.ExecutorResult, conf Conf) stage.ParserResult {
func Parse(executorResult stage.ExecutorResult) (stage.ParserResult, bool) {
stdout := executorResult.Files["stdout"]
stderr := executorResult.Files["stderr"]
if executorResult.Status != stage.Status(envexec.StatusAccepted) {
return stage.ParserResult{
Score: 0,
Comment: fmt.Sprintf(
"Unexpected executor status: %s.\nStderr: %s",
executorResult.Status, stderr,
"Unexpected executor status: %s.\nStdout: %s\nStderr: %s",
executorResult.Status, stdout, stderr,
),
}
}, true
}
return stage.ParserResult{
Score: 0,
Comment: stdout,
}
}, stdout != ""
}
func (*Healthcheck) Run(results []stage.ExecutorResult, confAny any) (
[]stage.ParserResult, bool, error,
) {
conf, err := stage.DecodeConf[Conf](confAny)
if err != nil {
return nil, true, err
}
var res []stage.ParserResult
zzjc123 marked this conversation as resolved Outdated

Comments need to be human-readable. You can use markdown format here. I suggest only keep the stderr field. You can get exitcode directly from executor if needed. Use logger to log any details for student to debug as they can check the log info from drone.

A comment of health check should look like this:

Repo Size

Pass

Forbidden File

The following forbidden files were found: README.md, conf.toml, expected.json, healthcheck, stderr, stdin.sh, stdout
To fix it, first make a backup of your repository and then run the following commands:

...

Check name...

check detail...

Comments need to be human-readable. You can use markdown format here. I suggest only keep the stderr field. You can get exitcode directly from executor if needed. Use logger to log any details for student to debug as they can check the log info from drone. A comment of health check should look like this: ### Repo Size Pass ### Forbidden File The following forbidden files were found: README.md, conf.toml, expected.json, healthcheck, stderr, stdin.sh, stdout To fix it, first make a backup of your repository and then run the following commands: ``` ... ``` ### Check name... check detail...
forceQuit := false
for _, result := range results {
res = append(res, Parse(result, *conf))
parserResult, forceQuitResult := Parse(result)
res = append(res, parserResult)
forceQuit = forceQuit || forceQuitResult
}
return res, false, nil
return res, forceQuit, nil
}

View File

@ -1,6 +1,7 @@
package healthcheck
import (
"fmt"
"log/slog"
"strings"
"unicode"
@ -14,36 +15,23 @@ import (
// Otherwise, it iterates over each character in the message and checks if it is a non-ASCII character.
// If a non-ASCII character is found, it returns an error indicating not to use non-ASCII characters in commit messages.
// Otherwise, it returns nil indicating that the commit message is valid.
func NonAsciiMsg(root string) (jsonOut CheckStage) {
jsonOut = CheckStage{
Name: "NonAsciiMsg",
StdOut: "Checking for non-ASCII characters in commit message: ",
ExitCode: 0,
StdErr: "",
}
func NonAsciiMsg(root string) error {
// cmd := exec.Command("git", "log", "--encoding=UTF-8", "--format=%B")
repo, err := git.PlainOpen(root)
if err != nil {
jsonOut.StdOut += "Failed"
jsonOut.ExitCode = 1
slog.Error("openning git repo", "err", err)
return jsonOut
return fmt.Errorf("error openning git repo: %v", err)
}
zzjc123 marked this conversation as resolved Outdated
https://github.com/go-git/go-git#in-memory-example Try to avoid `exec.Command`.
ref, err := repo.Head()
if err != nil {
jsonOut.StdOut += "Failed"
jsonOut.ExitCode = 1
slog.Error("getting reference", "err", err)
return jsonOut
return fmt.Errorf("error getting reference: %v", err)
}
commits, err := repo.Log(&git.LogOptions{From: ref.Hash()})
if err != nil {
jsonOut.StdOut += "Failed"
jsonOut.ExitCode = 1
slog.Error("getting commits", "err", err)
return jsonOut
return fmt.Errorf("error getting commits: %v", err)
}
var msgs []string
@ -52,10 +40,8 @@ func NonAsciiMsg(root string) (jsonOut CheckStage) {
return nil
})
if err != nil {
jsonOut.StdOut += "Failed"
jsonOut.ExitCode = 1
slog.Error("converting log to string", "err", err)
return jsonOut
slog.Error("iterating commits", "err", err)
return fmt.Errorf("error iterating commits: %v", err)
}
var nonAsciiMsgs []string
@ -73,11 +59,7 @@ func NonAsciiMsg(root string) (jsonOut CheckStage) {
}
}
if len(nonAsciiMsgs) > 0 {
jsonOut.StdOut += "Failed"
jsonOut.ExitCode = 105
jsonOut.StdErr = "Non-ASCII characters in commit messages:\n" + strings.Join(nonAsciiMsgs, "\n")
return jsonOut
return fmt.Errorf("Non-ASCII characters in commit messages:\n" + strings.Join(nonAsciiMsgs, "\n"))
}
jsonOut.StdOut += "OK"
return jsonOut
return nil
}

View File

@ -16,7 +16,6 @@ func getForbiddens(root string, fileList []string) ([]string, error) {
var regexList []*regexp.Regexp
regexList, err := getRegex(fileList)
if err != nil {
fmt.Println("Error compiling regex:", err)
return nil, err
}
@ -52,24 +51,11 @@ func getForbiddens(root string, fileList []string) ([]string, error) {
// forbiddenCheck checks for forbidden files in the specified root directory.
// It prints the list of forbidden files found, along with instructions on how to fix them.
func ForbiddenCheck(rootDir string, regexList []string, repo string, droneBranch string) (jsonOut CheckStage) {
jsonOut = CheckStage{
Name: "forbiddenFile",
StdOut: "Checking forbidden files: ",
ExitCode: 0,
StdErr: "",
}
// INFO: test case
// regexList := []string{`\.$`, `\.git`, `\.drone.yml`, `Makefile`, `CMakeLists.txt`,`.*\.go`,`.*\.toml`, `.*\.c`, `.*\.cc`, `.*\.cpp`, `.*\.h`, `.*\.md`}
// rootDir = "/Users/zhouzhaojiacheng/Desktop/STUDENT_ORG/TechJI/Dev/joj/JOJ3"
func ForbiddenCheck(rootDir string, regexList []string, repo string, droneBranch string) error {
forbids, err := getForbiddens(rootDir, regexList)
if err != nil {
jsonOut.StdOut += "Failed"
jsonOut.ExitCode = 1
slog.Error("get forbiddens", "error", err)
return jsonOut
slog.Error("getting forbiddens", "error", err)
return fmt.Errorf("error getting forbiddens: %w", err)
}
var message string
@ -84,12 +70,7 @@ func ForbiddenCheck(rootDir string, regexList []string, repo string, droneBranch
message += fmt.Sprint(file, " ")
}
message += fmt.Sprint("; do git filter-repo --force --invert-paths --path \\\"\\$i\\\"; done\ngit remote add origin ", repo, "\ngit push --set-upstream origin ", droneBranch, " --force")
jsonOut.StdOut += "Failed"
jsonOut.ExitCode = 103
jsonOut.StdErr = message
return jsonOut
} else {
jsonOut.StdOut += "OK"
return jsonOut
return fmt.Errorf(message)
}
return nil
}

View File

@ -19,7 +19,7 @@ func getMetas(rootDir string, fileList []string) ([]string, string, error) {
files, err := os.ReadDir(rootDir)
if err != nil {
return nil, "", fmt.Errorf("Error reading directory:%w", err)
return nil, "", fmt.Errorf("error reading directory: %w", err)
}
matched := false
@ -60,28 +60,14 @@ func getMetas(rootDir string, fileList []string) ([]string, string, error) {
// metaCheck performs a check for metadata files in the specified root directory.
// It prints a message if any required metadata files are missing.
func MetaCheck(rootDir string, fileList []string) (jsonOut CheckStage) {
func MetaCheck(rootDir string, fileList []string) error {
unmatchedList, umatchedRes, err := getMetas(rootDir, fileList)
jsonOut = CheckStage{
Name: "metaFile",
StdOut: "Checking the existence of meta file: ",
ExitCode: 0,
StdErr: "",
}
if err != nil {
jsonOut.ExitCode = 1
slog.Error("get metas", "err", err)
return jsonOut
slog.Error("getting metas", "err", err)
return fmt.Errorf("error getting metas: %w", err)
}
if len(unmatchedList) == 0 {
jsonOut.StdOut += "OK"
return jsonOut
} else {
jsonOut.ExitCode = 104
jsonOut.StdOut += "Failed"
jsonOut.StdErr = fmt.Sprintf("%d important project files missing\n"+umatchedRes, len(unmatchedList))
return jsonOut
if len(unmatchedList) != 0 {
return fmt.Errorf("%d important project files missing\n"+umatchedRes, len(unmatchedList))
}
return nil
}

View File

@ -61,31 +61,14 @@ func getNonAscii(root string) ([]string, error) {
// nonAsciiFiles checks for non-ASCII characters in files within the specified root directory.
zzjc123 marked this conversation as resolved Outdated

And do not just panic on these errors. These errors are not unrecoverable. You can just record these errors and let the program continue working on the other parts. Or just return the error to the upper level and let that function decides what is the next step.

And do not just `panic` on these errors. These errors are not unrecoverable. You can just record these errors and let the program continue working on the other parts. Or just return the error to the upper level and let that function decides what is the next step.
// It prints a message with the paths to files containing non-ASCII characters, if any.
func NonAsciiFiles(root string) (jsonOut CheckStage) {
jsonOut = CheckStage{
Name: "NonAsciiFiles",
StdOut: "Checking for non-ascii files: ",
ExitCode: 0,
StdErr: "",
}
func NonAsciiFiles(root string) error {
nonAscii, err := getNonAscii(root)
if err != nil {
jsonOut.StdOut += "Failed"
jsonOut.ExitCode = 1
slog.Error("get non-ascii", "err", err)
return jsonOut
slog.Error("getting non-ascii", "err", err)
return fmt.Errorf("error getting non-ascii: %w", err)
}
var nonAsciiRes string
if len(nonAscii) > 0 {
nonAsciiRes = fmt.Sprintf("Non-ASCII characters found in the following files:\n" + strings.Join(nonAscii, "\n"))
jsonOut.ExitCode = 105
jsonOut.StdOut += "Failed"
} else {
jsonOut.StdOut += "OK"
return fmt.Errorf("Non-ASCII characters found in the following files:\n" + strings.Join(nonAscii, "\n"))
}
jsonOut.StdErr = nonAsciiRes
return jsonOut
return nil
}

View File

@ -2,7 +2,6 @@ package healthcheck
import (
"fmt"
"log"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
@ -19,12 +18,12 @@ func catTags(all []string) (out string) {
func getTagsFromRepo(repoPath string) ([]string, error) {
repo, err := git.PlainOpen(repoPath)
if err != nil {
return nil, fmt.Errorf("Cannot open repo: %v", err)
return nil, fmt.Errorf("error opening repo: %v", err)
}
refs, err := repo.Tags()
if err != nil {
return nil, fmt.Errorf("Cannot get tags: %v", err)
return nil, fmt.Errorf("error getting tags: %v", err)
}
var tags []string
@ -33,27 +32,18 @@ func getTagsFromRepo(repoPath string) ([]string, error) {
return nil
})
if err != nil {
zzjc123 marked this conversation as resolved Outdated

why not just let the parameter fully determine the target tag? e.g. func CheckReleases(repoPath, target string)

why not just let the parameter fully determine the target tag? e.g. `func CheckReleases(repoPath, target string)`

Because we cannot decide how many hw or project release need to be checked yes. The int n is used to specif thr number

Because we cannot decide how many hw or project release need to be checked yes. The int n is used to specif thr number

So since we need to pass category and n to this function, we still need to decide the count of hw&proj when generating conf.toml. I do not see much difference.

So since we need to pass `category` and `n` to this function, we still need to decide the count of hw&proj when generating `conf.toml`. I do not see much difference.
return nil, fmt.Errorf("Error while iterating tags: %v", err)
return nil, fmt.Errorf("error iterating tags: %v", err)
}
return tags, nil
}
func CheckReleases(repoPath string, category string, n int) (jsonOut CheckStage) {
jsonOut = CheckStage{
Name: "ReleaseCheck",
StdOut: "Checking release tag: ",
ExitCode: 0,
StdErr: "",
}
func CheckReleases(repoPath string, category string, n int) error {
tags, err := getTagsFromRepo(repoPath)
if err != nil {
log.Fatalf("Error in getting tags: %v", err)
return fmt.Errorf("error getting tags: %v", err)
}
var prefix string
switch category {
case "exam":
prefix = "e"
@ -64,7 +54,6 @@ func CheckReleases(repoPath string, category string, n int) (jsonOut CheckStage)
default:
prefix = "a"
}
target := prefix + fmt.Sprintf("%d", n)
found := false
for _, tag := range tags {

Why is the target release tag wrong?

Why is the `target` release tag wrong?

I intended to mean release tag is missing or wrong.

I intended to mean release tag is missing or wrong.

Do we need to specify missing or wrong? I think only we need to inform students that their release is wrong.

Do we need to specify missing or wrong? I think only we need to inform students that their release is wrong.

Do we need to test if they submit extra tags?

Do we need to test if they submit extra tags?

I think extra is fine, as long as they don't miss any tags.

I think extra is fine, as long as they don't miss any tags.

Then just print the error on missing tags.

Then just print the error on missing tags.
@ -75,14 +64,7 @@ func CheckReleases(repoPath string, category string, n int) (jsonOut CheckStage)
}
if !found {
tagList := catTags(tags)
jsonOut.ExitCode = 107
jsonOut.StdOut = "Failed"
jsonOut.StdErr = fmt.Sprintf("wrong release tag '%s', please use one of %s aborting", target, tagList)
return jsonOut
return fmt.Errorf("Wrong release tag '%s'. Please use one of %s.", target, tagList)
}
jsonOut.StdOut += "OK"
jsonOut.ExitCode = 0
jsonOut.StdErr = "Fine"
return jsonOut
return nil
}

View File

@ -1,6 +1,7 @@
package healthcheck
import (
"fmt"
"log/slog"
"os/exec"
"strconv"
@ -9,25 +10,15 @@ import (
// RepoSize checks the size of the repository to determine if it is oversized.
// It executes the 'git count-objects -v' command to obtain the size information,
func RepoSize() (jsonOut CheckStage) {
jsonOut = CheckStage{
Name: "RepoSize",
StdOut: "Checking repository size: ",
ExitCode: 0,
StdErr: "",
}
func RepoSize() error {
// TODO: reimplement here when go-git is available
// https://github.com/go-git/go-git/blob/master/COMPATIBILITY.md
cmd := exec.Command("git", "count-objects", "-v")
output, err := cmd.CombinedOutput()
if err != nil {
jsonOut.StdOut += "Failed"
jsonOut.ExitCode = 1
slog.Error("running git command:", "err", err)
return jsonOut
return fmt.Errorf("error running git command: %w", err)
bomingzh marked this conversation as resolved Outdated

add a comment about link to https://github.com/go-git/go-git/blob/master/COMPATIBILITY.md#plumbing-commands , we just can not use go-git to implement it.

add a comment about link to https://github.com/go-git/go-git/blob/master/COMPATIBILITY.md#plumbing-commands , we just can not use go-git to implement it.

@manuel :
add a "TODO comment" in the source code with

  • a note to reimplement with go-git when available
  • link to feature/compatibility page of go-git
@manuel : add a "TODO comment" in the source code with - a note to reimplement with go-git when available - link to feature/compatibility page of go-git
}
lines := strings.Split(string(output), "\n")
var sum int
for _, line := range lines {
@ -36,21 +27,14 @@ func RepoSize() (jsonOut CheckStage) {
sizeStr := fields[1]
size, err := strconv.Atoi(sizeStr)
if err != nil {
jsonOut.StdOut += "Failed"
jsonOut.ExitCode = 1
slog.Error("running git command:", "err", err)
return jsonOut
return fmt.Errorf("error running git command: %w", err)
}
sum += size
}
}
if sum <= 2048 {
jsonOut.StdOut += "OK"
return jsonOut
if sum > 2048 {
return fmt.Errorf("Repository larger than 2MB. Please clean up or contact the teaching team.")
}
jsonOut.StdOut += "Failed"
jsonOut.ExitCode = 100
jsonOut.StdErr = "repository larger than 2MB, please clean up or contact the teaching team."
return jsonOut
return nil
}

View File

@ -5,15 +5,6 @@ import (
"regexp"
)
// For ExitCode, see https://focs.ji.sjtu.edu.cn/git/TAs/resources/src/branch/drone/dronelib.checks
// 1 for unrecoverable error and 0 for succeses
type CheckStage struct {
Name string `json:"name"`
StdOut string `json:"stdout"`
ExitCode int `json:"exit code"`
StdErr string `json:"stderr"`
}
// addExt appends the specified extension to each file name in the given fileList.
// It modifies the original fileList in place.
func addExt(fileList []string, ext string) {

View File

@ -47,68 +47,27 @@ func filesMatch(file1, file2 string) (bool, error) {
return true, nil
bomingzh marked this conversation as resolved Outdated

ditto

ditto
}
// compareDirectories compares the contents of two directories.
func compareDirectories(dir1, dir2 string, jsonOut *CheckStage) error {
allMatch := true
var message string
err := filepath.Walk(dir1, func(path string, info os.FileInfo, err error) error {
// VerifyDirectory checks if the contents of two directories are identical.
func VerifyDirectory(rootDir, compareDir string) error {
err := filepath.Walk(rootDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
return fmt.Errorf("error walking directory: %w", err)
}
bomingzh marked this conversation as resolved Outdated

remove it

remove it
if !info.IsDir() {
relPath, _ := filepath.Rel(dir1, path)
file2 := filepath.Join(dir2, relPath)
relPath, _ := filepath.Rel(rootDir, path)
file2 := filepath.Join(compareDir, relPath)
if !fileExists(file2) {
// fmt.Printf("File %s in %s is missing in %s\n, please immediately revert your changes!\n", relPath, dir1, dir2)
message += "File missing"
allMatch = false
jsonOut.StdOut += "Failed"
jsonOut.ExitCode = 101
jsonOut.StdErr = message
return nil
return fmt.Errorf("File %s in %s is missing in %s. Please immediately revert your changes!\n", relPath, rootDir, compareDir)
}
// fmt.Printf("Checking integrity of %s:\n", path)
match, err := filesMatch(path, file2)
if err != nil {
return err
return fmt.Errorf("error matching files: %w", err)

remove it

remove it

I mean remove the os.Exit(1), and also the one on line 55.

I mean remove the `os.Exit(1)`, and also the one on line 55.
}
if !match {
// fmt.Printf("File %s in %s is not identical to %s\nPlease revert your changes or contact the teaching team if you have a valid reason for adjusting them.\n", relPath, dir1, dir2)
message += "File is not identical"
allMatch = false
jsonOut.StdOut += "Failed"
jsonOut.ExitCode = 101
jsonOut.StdErr = message
return fmt.Errorf("File %s in %s is not identical to %s. Please revert your changes or contact the teaching team if you have a valid reason for adjusting them.\n", relPath, rootDir, compareDir)
}
}

why not just return error on not all passed?

why not just return error on not all passed?
return nil
})
if allMatch {
jsonOut.StdOut += "OK!"
} else {
jsonOut.StdOut += "Failed!"
}
return err
}
// Verify checks if the contents of two directories are identical.
func Verify(rootDir, compareDir string) CheckStage {
jsonOut := CheckStage{
Name: "verifyFile",
StdOut: "Checking files to be verified: ",
ExitCode: 0,
StdErr: "",
}
err := compareDirectories(rootDir, compareDir, &jsonOut)
// fmt.Println("Comparison finished ")
if err != nil {
fmt.Println("Error:", err)
}
return jsonOut
}