clang-tidy parser and executor #26

Merged
张泊明518370910136 merged 26 commits from clang-tidy into master 2024-05-18 02:50:13 +08:00
6 changed files with 437 additions and 17 deletions
Showing only changes of commit e133b13a84 - Show all commits
cmd/joj3
internal
executors/clang_tidy
parsers/clang_tidy

View File

@ -53,6 +53,11 @@ func TestMain(t *testing.T) {
{Score: 100, Comment: "executor status: run time: \\d+ ns, memory: \\d+ bytes"},
}},
}},
{"clang_tidy", []stage.StageResult{
zjc_he marked this conversation as resolved Outdated

ditto

ditto
{Name: "clang-tidy", Results: []stage.ParserResult{
{Score: -200, Comment: ""},
}},
}},
{"compile_error", []stage.StageResult{
{Name: "compile", Results: []stage.ParserResult{
{Score: 0, Comment: "Unexpected executor status: Nonzero Exit Status\\."},

View File

@ -1,6 +1,10 @@
package clang_tidy
import (
"fmt"
"io"
"os/exec"
"focs.ji.sjtu.edu.cn/git/FOCS-dev/JOJ3/internal/stage"
"github.com/criyle/go-judge/envexec"
)
@ -9,17 +13,49 @@ type ClangTidy struct{}
func (e *ClangTidy) Run(cmds []stage.Cmd) ([]stage.ExecutorResult, error) {
var res []stage.ExecutorResult
for range cmds {
res = append(res, stage.ExecutorResult{
Status: stage.Status(envexec.StatusInvalid),
for _, cmd := range cmds {
args := ""
for _, arg := range cmd.Args {
args += fmt.Sprint(arg)
args += " "
}
clang_tidy_Cmd := exec.Command("bash", "-c", args)
zjc_he marked this conversation as resolved Outdated

I do not think we need this executor. We can just run everything inside the sandbox.

I do not think we need this executor. We can just run everything inside the sandbox.
clang_tidy_stdout, err1 := clang_tidy_Cmd.StdoutPipe()
clang_tidy_stderr, err2 := clang_tidy_Cmd.StderrPipe()
if err1 != nil {
return nil, err1
}
if err2 != nil {
return nil, err2
}
_ = clang_tidy_Cmd.Start()
clang_tidy_Out, err1 := io.ReadAll(clang_tidy_stdout)
clang_tidy_Err, err2 := io.ReadAll(clang_tidy_stderr)
if err1 != nil {
return nil, err1
}
if err2 != nil {
return nil, err2
}
_ = clang_tidy_Cmd.Wait()
r := stage.ExecutorResult{
Status: stage.Status(envexec.StatusAccepted),
ExitStatus: 0,
Error: "I'm a dummy",
Error: "",
Time: 0,
Memory: 0,
RunTime: 0,
Files: map[string]string{},
FileIDs: map[string]string{},
})
}
r.Files["stdout"] = string(clang_tidy_Out)
// TODO: We may don't want stderr
r.Files["stderr"] = string(clang_tidy_Err)
res = append(res, r)
}
return res, nil
}

View File

@ -0,0 +1,135 @@
// Referenced from https://github.com/yuriisk/clang-tidy-converter/blob/master/clang_tidy_converter/parser/clang_tidy_parser.py
package clang_tidy
import (
"os"
"path/filepath"
"regexp"
"strconv"
)
type Level int
const (
UNKNOWN Level = iota
NOTE
REMARK
WARNING
ERROR
FATAL
)
type ClangMessage struct {
filepath string
line int
column int
level Level
message string
diagnosticName string
detailsLines []string
children []ClangMessage
}
func newClangMessage(filepath string, line int, column int, level Level, message string, diagnosticName string, detailsLines []string, children []ClangMessage) *ClangMessage {
if detailsLines == nil {
detailsLines = make([]string, 0)
}
if children == nil {
children = make([]ClangMessage, 0)
}
return &ClangMessage{
filepath: filepath,
line: line,
column: column,
level: level,
message: message,
diagnosticName: diagnosticName,
detailsLines: detailsLines,
children: children,
}
}
func LevelFromString(levelString string) Level {
switch levelString {
case "note":
return NOTE
case "remark":
return REMARK
case "warning":
return WARNING
case "error":
return ERROR
case "fatal":
return FATAL
default:
return UNKNOWN
}
}
func is_ignored(line string) bool {
IGNORE_REGEX := regexp.MustCompile("^error:.*$")
return IGNORE_REGEX.MatchString(line)
}
func parse_message(line string) ClangMessage {
MESSAGE_REGEX := regexp.MustCompile(`^(?P<filepath>.+):(?P<line>\d+):(?P<column>\d+): (?P<level>\S+): (?P<message>.*?)(?: \[(?P<diagnostic_name>.*)\])?\n$`)
regex_res := MESSAGE_REGEX.FindStringSubmatch(line)
if len(regex_res) == 0 {
return *newClangMessage("", 0, 0, UNKNOWN, "", "", nil, nil)
} else {
filepath := regex_res[1]
line, _ := strconv.Atoi(regex_res[2])
column, _ := strconv.Atoi(regex_res[3])
level := LevelFromString(regex_res[4])
message := regex_res[5]
diagnostic_name := regex_res[6]
return ClangMessage{
filepath: filepath,
line: line,
column: column,
level: level,
message: message,
diagnosticName: diagnostic_name,
detailsLines: make([]string, 0),
children: make([]ClangMessage, 0),
}
}
}
func group_messages(messages []ClangMessage) []ClangMessage {
grouped_messages := make([]ClangMessage, 0)
for _, message := range messages {
if message.level == NOTE {
grouped_messages[len(grouped_messages)-1].children = append(grouped_messages[len(grouped_messages)-1].children, message)
} else {
grouped_messages = append(grouped_messages, message)
}
}
return grouped_messages
}
func convert_paths_to_relative(messages *[]ClangMessage) {
zjc_he marked this conversation as resolved Outdated

I think it should be part of the config file with default value "/w". Since the work dir in the sandbox can also be changed, it should not be hardcoded.

I think it should be part of the config file with default value `"/w"`. Since the work dir in the sandbox can also be changed, it should not be hardcoded.
currentDir, _ := os.Getwd()
for i := range *messages {
(*messages)[i].filepath, _ = filepath.Rel(currentDir, (*messages)[i].filepath)
}
}
func parse_lines(lines []string) []ClangMessage {
messages := make([]ClangMessage, 0)
for _, line := range lines {
if is_ignored(string(line)) {
continue
}
message := parse_message(string(line))
if message.level == UNKNOWN && len(messages) > 0 {
messages[len(messages)-1].detailsLines = append(messages[len(messages)-1].detailsLines, string(line))
} else {
messages = append(messages, message)
}
}
convert_paths_to_relative(&messages)
return group_messages(messages)
}

View File

@ -0,0 +1,201 @@
// Referenced from https://github.com/yuriisk/clang-tidy-converter/blob/master/clang_tidy_converter/parser/clang_tidy_parser.py
package clang_tidy
import (
"fmt"
"strings"
)
type json_message struct {
Type string `json:"type"`
Check_name string `json:"check_name"`
Description string `json:"description"`
Content map[string]interface{} `json:"content"`
Categories []string `json:"categories"`
Location map[string]interface{} `json:"location"`
Trace map[string]interface{} `json:"trace"`
Severity string `json:"severity"`
Fingerprint string `json:"fingerprint"`
}
func format(messages []ClangMessage) []json_message {
formatted_messages := make([]json_message, len(messages))
for i, message := range messages {
formatted_messages[i] = format_message(message)
}
return formatted_messages
}
func format_message(message ClangMessage) json_message {
result := json_message{
Type: "issue",
Check_name: message.diagnosticName,
Description: message.message,
Content: extract_content(message),
Categories: extract_categories(message),
Location: extract_location(message),
Trace: extract_trace(message),
Severity: extract_severity(message),
Fingerprint: "",
// Fingerprint: generate_fingerprint(message),
zjc_he marked this conversation as resolved Outdated

ditto

ditto
}
return result
}
func messages_to_text(messages []ClangMessage) []string {
text_lines := []string{}
for _, message := range messages {
text_lines = append(text_lines, fmt.Sprintf("%s:%d:%d: %s", message.filepath, message.line, message.column, message.message))
text_lines = append(text_lines, message.detailsLines...)
text_lines = append(text_lines, messages_to_text(message.children)...)
}
return text_lines
}
func extract_content(message ClangMessage) map[string]interface{} {
detailLines := ""
for _, line := range message.detailsLines {
if line == "" {
continue
}
detailLines += (line + "\n")
}
for _, line := range messages_to_text(message.children) {
if line == "" {
continue
}
detailLines += (line + "\n")
}
result := map[string]interface{}{
"body": "```\n" + detailLines + "```",
}
return result
}
func remove_duplicates(list []string) []string {
uniqueMap := make(map[string]bool)
for _, v := range list {
uniqueMap[v] = true
}
result := []string{}
for k := range uniqueMap {
result = append(result, k)
}
return result
}
func extract_categories(message ClangMessage) []string {
BUGRISC_CATEGORY := "Bug Risk"
CLARITY_CATEGORY := "Clarity"
COMPATIBILITY_CATEGORY := "Compatibility"
COMPLEXITY_CATEGORY := "Complexity"
DUPLICATION_CATEGORY := "Duplication"
PERFORMANCE_CATEGORY := "Performance"
SECURITY_CATEGORY := "Security"
STYLE_CATEGORY := "Style"
categories := []string{}
if strings.Contains(message.diagnosticName, "bugprone") {
categories = append(categories, BUGRISC_CATEGORY)
}
if strings.Contains(message.diagnosticName, "modernize") {
categories = append(categories, COMPATIBILITY_CATEGORY)
}
if strings.Contains(message.diagnosticName, "portability") {
categories = append(categories, COMPATIBILITY_CATEGORY)
}
if strings.Contains(message.diagnosticName, "performance") {
categories = append(categories, PERFORMANCE_CATEGORY)
}
if strings.Contains(message.diagnosticName, "readability") {
categories = append(categories, CLARITY_CATEGORY)
}
if strings.Contains(message.diagnosticName, "cloexec") {
categories = append(categories, SECURITY_CATEGORY)
}
if strings.Contains(message.diagnosticName, "security") {
categories = append(categories, SECURITY_CATEGORY)
}
if strings.Contains(message.diagnosticName, "naming") {
categories = append(categories, STYLE_CATEGORY)
}
if strings.Contains(message.diagnosticName, "misc") {
categories = append(categories, STYLE_CATEGORY)
}
if strings.Contains(message.diagnosticName, "cppcoreguidelines") {
categories = append(categories, STYLE_CATEGORY)
}
if strings.Contains(message.diagnosticName, "hicpp") {
categories = append(categories, STYLE_CATEGORY)
}
if strings.Contains(message.diagnosticName, "simplify") {
categories = append(categories, COMPLEXITY_CATEGORY)
}
if strings.Contains(message.diagnosticName, "redundant") {
categories = append(categories, DUPLICATION_CATEGORY)
}
if strings.HasPrefix(message.diagnosticName, "boost-use-to-string") {
categories = append(categories, COMPATIBILITY_CATEGORY)
}
if len(categories) == 0 {
categories = append(categories, BUGRISC_CATEGORY)
}
return remove_duplicates(categories)
}
func extract_location(message ClangMessage) map[string]interface{} {
location := map[string]interface{}{
"path": message.filepath,
"lines": map[string]interface{}{
"begin": message.line,
},
}
return location
}
func extract_other_locations(message ClangMessage) []map[string]interface{} {
location_list := []map[string]interface{}{}
for _, child := range message.children {
location_list = append(location_list, extract_location(child))
location_list = append(location_list, extract_other_locations(child)...)
}
return location_list
}
func extract_trace(message ClangMessage) map[string]interface{} {
result := map[string]interface{}{
"locations": extract_other_locations(message),
}
return result
}
func extract_severity(message ClangMessage) string {
switch message.level {
case NOTE:
return "info"
case REMARK:
return "minor"
case WARNING:
return "major"
case ERROR:
return "critical"
case FATAL:
return "blocker"
default:
return "unknown"
}
}
// func generate_fingerprint(message ClangMessage) string {
zjc_he marked this conversation as resolved Outdated

I think we can completely remove this part.

I think we can completely remove this part.
// h := md5.New()
// h.Write([]byte(message.filepath))
// h.Write([]byte(fmt.Sprintf("%d", message.line)))
// h.Write([]byte(fmt.Sprintf("%d", message.column)))
// h.Write([]byte(message.message))
// h.Write([]byte(message.diagnosticName))
// for _, child := range message.children {
// childFingerprint := generate_fingerprint(child)
// h.Write([]byte(childFingerprint))
// }
// return hex.EncodeToString(h.Sum(nil))
// }

View File

@ -3,15 +3,21 @@ package clang_tidy
import (
"encoding/json"
"fmt"
"os"
"strings"
"focs.ji.sjtu.edu.cn/git/FOCS-dev/JOJ3/internal/stage"
"focs.ji.sjtu.edu.cn/git/FOCS-dev/JOJ3/pkg/dummy"
"github.com/criyle/go-judge/envexec"
)
type Match struct {
Keyword []string
Score int
}
zjc_he marked this conversation as resolved Outdated

Only a little confused about it. Does it mean /w by default as we run in linux? Or whether clang-tidy has some output format.

Only a little confused about it. Does it mean `/w` by default as we run in linux? Or whether clang-tidy has some output format.

Sorry that was a mistake

Sorry that was a mistake
type Conf struct {
Score int
Comment string
Matches []Match
}
type ClangTidy struct{}
@ -19,6 +25,20 @@ type ClangTidy struct{}
func Parse(executorResult stage.ExecutorResult, conf Conf) stage.ParserResult {
stdout := executorResult.Files["stdout"]
stderr := executorResult.Files["stderr"]
lines := strings.SplitAfter(stdout, "\n")
messages := parse_lines(lines)
zjc_he marked this conversation as resolved Outdated

I think we can uncomment it.

I think we can uncomment it.

No, as long as the clang-tidy report is not empty, its return value is 1 and the executor result status is Nonzero Exit Status:

main_test.go:117: actual[1].Results[0].Comment = Unexpected executor status: Nonzero Exit Status.
        Stderr: 17855 warnings and 1 error generated.
        Error while processing /w/src/sillycode.cpp.
No, as long as the clang-tidy report is not empty, its return value is 1 and the executor result status is Nonzero Exit Status: ``` main_test.go:117: actual[1].Results[0].Comment = Unexpected executor status: Nonzero Exit Status. Stderr: 17855 warnings and 1 error generated. Error while processing /w/src/sillycode.cpp. ```

Ok, then we can assume clang-tidy do not have unexpected errors for now. clang-tidy built in rules based on Libtooling are usually very strong. An alternative approach can be checking executorResult.exitStatus if we know it will only return 0 and 1.

Ok, then we can assume clang-tidy do not have unexpected errors for now. `clang-tidy` built in rules based on Libtooling are usually very strong. An alternative approach can be checking `executorResult.exitStatus` if we know it will only return 0 and 1.
formatted_messages := format(messages)
zjc_he marked this conversation as resolved Outdated

Do we still need this file?

Do we still need this file?

I don't know. We will need this data for drone, but I don't know where to put it for now.

I don't know. We will need this data for drone, but I don't know where to put it for now.

I think running JOJ3 will be the only step in the future. It just provides coast-to-coast experience.

I think running JOJ3 will be the only step in the future. It just provides coast-to-coast experience.

What do you mean? JOJ3 will cover everything and we don't need drone anymore?

This data shows the details for all cases, we may need to parse them into markdown in the future, so if we don't need to save it into a file I'll just put it there and wait for future needs

What do you mean? JOJ3 will cover everything and we don't need drone anymore? This data shows the details for all cases, we may need to parse them into markdown in the future, so if we don't need to save it into a file I'll just put it there and wait for future needs

Yes. Drone/Gitea actions will only be used to trigger the running of JOJ3, and JOJ3 should be the only step. We can just leave the details in logs, so if any debug purpose, student can check the output in drone/gitea actions to see. So that we can only give simple and brief feedback and gitea issue comment, which should be enough for most cases.

Yes. Drone/Gitea actions will only be used to trigger the running of JOJ3, and JOJ3 should be the only step. We can just leave the details in logs, so if any debug purpose, student can check the output in drone/gitea actions to see. So that we can only give simple and brief feedback and gitea issue comment, which should be enough for most cases.
// TODO: Handle the json file (parse into markdown and delete it?)
json_file, _ := os.Create("./clangtidy_result.json")
defer json_file.Close()
encoder := json.NewEncoder(json_file)
encoder.SetEscapeHTML(false)
encoder.SetIndent("", " ")
_ = encoder.Encode(formatted_messages)
if executorResult.Status != stage.Status(envexec.StatusAccepted) {
return stage.ParserResult{
Score: 0,
@ -28,17 +48,9 @@ func Parse(executorResult stage.ExecutorResult, conf Conf) stage.ParserResult {
),
}
}
var dummyResult dummy.Result
err := json.Unmarshal([]byte(stdout), &dummyResult)
if err != nil {
return stage.ParserResult{
Score: 0,
Comment: fmt.Sprintf("Failed to parse result: %s", err),
}
}
return stage.ParserResult{
Score: dummyResult.Score + conf.Score,
Comment: dummyResult.Comment + conf.Comment,
Score: get_score(formatted_messages, conf),
Comment: "",
zjc_he marked this conversation as resolved Outdated

We just put all the output of the parser in Comment.

We just put all the output of the parser in Comment.

Do you mean the json file which shows the whole detail of outputs? That might be really really long

Do you mean the json file which shows the whole detail of outputs? That might be really really long

The comment just gives human readable results. We may log the actual output for further details, but the comment will be replied in gitea issues. So we can just keep it short.

The comment just gives human readable results. We may log the actual output for further details, but the comment will be replied in gitea issues. So we can just keep it short.

A current version of comment:

Test results summary

  1. codequality-unchecked-malloc-result: 0
  2. codequality-no-global-variables: 0
  3. codequality-no-header-guard: 0
  4. codequality-no-fflush-stdin: 0
  5. readability-function-size: 0
  6. readability-identifier-naming: 0
  7. readability-redundant: 0
  8. readability-misleading-indentation: 0
  9. readability-misplaced-array-index: 0
  10. cppcoreguidelines-init-variables: 1
  11. bugprone-suspicious-string-compare: 0
  12. google-global-names-in-headers: 0
  13. clang-diagnostic: 1
  14. clang-analyzer: 0
  15. misc: 8
  16. performance: 4
  17. others: 187

I followed the version in ve482 issue

A current version of comment: ### Test results summary 1. codequality-unchecked-malloc-result: 0 2. codequality-no-global-variables: 0 3. codequality-no-header-guard: 0 4. codequality-no-fflush-stdin: 0 5. readability-function-size: 0 6. readability-identifier-naming: 0 7. readability-redundant: 0 8. readability-misleading-indentation: 0 9. readability-misplaced-array-index: 0 10. cppcoreguidelines-init-variables: 1 11. bugprone-suspicious-string-compare: 0 12. google-global-names-in-headers: 0 13. clang-diagnostic: 1 14. clang-analyzer: 0 15. misc: 8 16. performance: 4 17. others: 187 I followed the version in ve482 issue
}
}

View File

@ -0,0 +1,31 @@
package clang_tidy
func Contains[T comparable](arr []T, element T) bool {
for i := range arr {
// TODO: The keyword in json report might also be an array, need to split it
// TODO: Might use string.Contains() rather than ==
if element == arr[i] {
return true
}
}
return false
}
func get_score(json_messages []json_message, conf Conf) int {
fullmark := conf.Score
for _, json_message := range json_messages {
keyword := json_message.Check_name
flag := false
for _, match := range conf.Matches {
if Contains(match.Keyword, keyword) {
fullmark -= match.Score
flag = true
break
}
}
if !flag {
fullmark -= 1
}
}
return fullmark
}