From f556801884420bb1e51dfe06b49506e89226649d Mon Sep 17 00:00:00 2001 From: Boming Zhang Date: Sun, 1 Jun 2025 21:43:37 -0400 Subject: [PATCH] feat: check unnecessary fields (#12) --- joj3_config_generator/loader.py | 117 +++++++++++++++++++++++++++- tests/convert/unnecessary/repo.toml | 2 + tests/convert/unnecessary/task.json | 95 ++++++++++++++++++++++ tests/convert/unnecessary/task.toml | 15 ++++ 4 files changed, 228 insertions(+), 1 deletion(-) create mode 100644 tests/convert/unnecessary/repo.toml create mode 100644 tests/convert/unnecessary/task.json create mode 100644 tests/convert/unnecessary/task.toml diff --git a/joj3_config_generator/loader.py b/joj3_config_generator/loader.py index fa20876..feb30f6 100644 --- a/joj3_config_generator/loader.py +++ b/joj3_config_generator/loader.py @@ -1,11 +1,15 @@ +import json from pathlib import Path -from typing import Tuple, Type, cast +from typing import Any, Dict, Tuple, Type, cast import inquirer import tomli import yaml +from pydantic import BaseModel from joj3_config_generator.models import answer, joj1, repo, task +from joj3_config_generator.models.common import Memory, Time +from joj3_config_generator.utils.logger import logger def load_joj3_task_toml_answers() -> answer.Answers: @@ -39,6 +43,115 @@ def load_joj1_yaml(yaml_path: Path) -> joj1.Config: def load_joj3_toml( root_path: Path, repo_toml_path: Path, task_toml_path: Path ) -> Tuple[repo.Config, task.Config]: + def check_unnecessary_fields( + pydantic_model_type: Type[BaseModel], + input_dict: Dict[str, Any], + file_path: Path, + current_path: str = "", + ) -> None: + def format_value_for_toml_warning(value: Any) -> str: + if isinstance(value, str): + escaped_value = value.replace("\\", "\\\\").replace('"', '\\"') + return f'"{escaped_value}"' + elif isinstance(value, bool): + return str(value).lower() + elif isinstance(value, (int, float)): + return str(value) + elif isinstance(value, Path): + escaped_value = str(value).replace("\\", "\\\\").replace('"', '\\"') + return f'"{escaped_value}"' + elif isinstance(value, list): + formatted_elements = [ + format_value_for_toml_warning(item) for item in value + ] + return f"[{', '.join(formatted_elements)}]" + elif isinstance(value, dict): + return json.dumps(value, separators=(",", ":")) + elif value is None: + return "None" + else: + return repr(value) + + default_instance = pydantic_model_type.model_construct() + for field_name, field_info in pydantic_model_type.model_fields.items(): + should_warn = False + full_field_path = ( + f"{current_path}.{field_name}" if current_path else field_name + ) + toml_field_name = field_name + if field_info.alias in input_dict: + toml_field_name = field_info.alias + if toml_field_name not in input_dict: + continue + toml_value = input_dict[toml_field_name] + default_value = getattr(default_instance, field_name) + # Handle List[Pydantic.BaseModel] + if ( + field_info.annotation is not None + and hasattr(field_info.annotation, "__origin__") + and field_info.annotation.__origin__ is list + and hasattr(field_info.annotation, "__args__") + and len(field_info.annotation.__args__) == 1 + and isinstance(field_info.annotation.__args__[0], type) + and issubclass(field_info.annotation.__args__[0], BaseModel) + ): + nested_model_type = field_info.annotation.__args__[0] + # Ensure the TOML value is a list (as expected for this type) + if isinstance(toml_value, list): + for i, toml_item in enumerate(toml_value): + if isinstance(toml_item, dict): + check_unnecessary_fields( + nested_model_type, + toml_item, + file_path, + f"{full_field_path}[{i}]", + ) + continue + # Handle directly nested Pydantic models (non-list) + if isinstance(field_info.annotation, type) and issubclass( + field_info.annotation, BaseModel + ): + if isinstance(toml_value, dict): + check_unnecessary_fields( + field_info.annotation, + toml_value, + file_path, + full_field_path, + ) + continue + # Handle Path type + elif ( + isinstance(toml_value, str) + and isinstance(default_value, Path) + and Path(toml_value) == default_value + ): + should_warn = True + # Handle Time type + elif isinstance(default_value, Time) and Time(toml_value) == default_value: + should_warn = True + # Handle Memory type + elif ( + isinstance(default_value, Memory) + and Memory(toml_value) == default_value + ): + should_warn = True + # Handle non-model list types (e.g., List[str], List[int]) + elif ( + isinstance(toml_value, list) + and isinstance(default_value, list) + and toml_value == default_value + ): + should_warn = True + # Handle other basic types (str, int, float, bool, dict) + elif toml_value == default_value and toml_value != {}: + should_warn = True + if should_warn: + logger.warning( + f"In file {file_path}, unnecessary field " + f"`{full_field_path} = {format_value_for_toml_warning(toml_value)}`" + " can be removed as it matches the default value" + ) + repo_obj = tomli.loads(repo_toml_path.read_text()) task_obj = tomli.loads(task_toml_path.read_text()) repo_conf = repo.Config(**repo_obj) @@ -47,4 +160,6 @@ def load_joj3_toml( task_conf = task.Config(**task_obj) task_conf.root = root_path task_conf.path = task_toml_path.relative_to(root_path) + check_unnecessary_fields(repo.Config, repo_obj, repo_toml_path) + check_unnecessary_fields(task.Config, task_obj, task_toml_path) return repo_conf, task_conf diff --git a/tests/convert/unnecessary/repo.toml b/tests/convert/unnecessary/repo.toml new file mode 100644 index 0000000..2858724 --- /dev/null +++ b/tests/convert/unnecessary/repo.toml @@ -0,0 +1,2 @@ +force_skip_health_check_on_test = true +force_skip_teapot_on_test = true diff --git a/tests/convert/unnecessary/task.json b/tests/convert/unnecessary/task.json new file mode 100644 index 0000000..cfd090e --- /dev/null +++ b/tests/convert/unnecessary/task.json @@ -0,0 +1,95 @@ +{ + "name": "hw7 ex2", + "logPath": "/home/tt/.cache/joj3/joj3.log", + "expireUnixTimestamp": 0, + "effectiveUnixTimestamp": 0, + "actorCsvPath": "/home/tt/.config/joj/students.csv", + "maxTotalScore": 100, + "stage": { + "sandboxExecServer": "172.17.0.1:5051", + "sandboxToken": "", + "outputPath": "/tmp/joj3_result.json", + "stages": [ + { + "name": "[cq] Filelength", + "group": "cq", + "executor": { + "name": "sandbox", + "with": { + "default": { + "args": [ + "./tools/filelength", + "400", + "300", + "*.cpp", + "*.h" + ], + "env": [ + "PATH=/usr/bin:/bin:/usr/local/bin" + ], + "stdin": { + "content": "" + }, + "stdout": { + "name": "stdout", + "max": 33554432, + "pipe": true + }, + "stderr": { + "name": "stderr", + "max": 33554432, + "pipe": true + }, + "cpuLimit": 1000000000, + "clockLimit": 2000000000, + "memoryLimit": 268435456, + "stackLimit": 0, + "procLimit": 50, + "cpuRateLimit": 0, + "cpuSetLimit": "", + "copyIn": { + "tools/filelength": { + "src": "/home/tt/.config/joj/tools/filelength" + } + }, + "copyInCached": {}, + "copyInDir": ".", + "copyOut": [ + "stdout", + "stderr" + ], + "copyOutCached": [], + "copyOutMax": 0, + "copyOutDir": "", + "tty": false, + "strictMemoryLimit": false, + "dataSegmentLimit": false, + "addressSpaceLimit": false + }, + "cases": [] + } + }, + "parsers": [ + { + "name": "result-detail", + "with": { + "score": 0, + "comment": "", + "showExxecutorStatus": true, + "showExitStatus": true, + "showError": false, + "showTime": true, + "showMemory": true, + "showRuntime": true, + "showFiles": [], + "filesInCodeBlock": true, + "maxFileLength": 2048 + } + } + ] + } + ], + "preStages": [], + "postStages": [] + } +} diff --git a/tests/convert/unnecessary/task.toml b/tests/convert/unnecessary/task.toml new file mode 100644 index 0000000..21534d1 --- /dev/null +++ b/tests/convert/unnecessary/task.toml @@ -0,0 +1,15 @@ +# general task configuration +task.name = "hw7 ex2" # task name + +[[stages]] +name = "[cq] Filelength" +command = "./tools/filelength 400 300 *.cpp *.h" +files.import = ["tools/filelength"] + +parsers = ["result-detail"] +result-detail.cpu_time = true +result-detail.time = true +result-detail.mem = true +result-detail.stdout = false +result-detail.stderr = false +result-detail.exit_status = true