From 6f78708c4de7832e7fe113e11c35f8e586117c93 Mon Sep 17 00:00:00 2001 From: Markus Zimmermann Date: Wed, 14 May 2025 10:06:31 +0200 Subject: [PATCH 1/5] Create the result path directory first so we log everything into it --- cmd/eval-dev-quality/cmd/evaluate.go | 65 +++++++++++++++------------- 1 file changed, 34 insertions(+), 31 deletions(-) diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go index faaae41a..384a8565 100644 --- a/cmd/eval-dev-quality/cmd/evaluate.go +++ b/cmd/eval-dev-quality/cmd/evaluate.go @@ -122,6 +122,22 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate. evaluationContext = &evaluate.Context{} evaluationConfiguration = NewEvaluationConfiguration() + // Setup evaluation result directory. + { + command.ResultPath = strings.ReplaceAll(command.ResultPath, "%datetime%", command.timestamp.Format("2006-01-02-15:04:05")) // REMARK Use a datetime format with a dash, so directories can be easily marked because they are only one group. + uniqueResultPath, err := util.UniqueDirectory(command.ResultPath) + if err != nil { + command.logger.Panicf("ERROR: %s", err) + } + // Ensure that the directory really exists. + if err := osutil.MkdirAll(uniqueResultPath); err != nil { + command.logger.Panicf("ERROR: %s", err) + } + command.ResultPath = uniqueResultPath + evaluationContext.ResultPath = uniqueResultPath + command.logger.Info("configured results directory", "path", command.ResultPath) + } + // Load the provided configuration file, if any. if command.Configuration != "" { if command.Runtime != "local" { @@ -215,29 +231,6 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate. evaluationContext.NoDisqualification = command.NoDisqualification } - // Setup evaluation result directory. - { - command.ResultPath = strings.ReplaceAll(command.ResultPath, "%datetime%", command.timestamp.Format("2006-01-02-15:04:05")) // REMARK Use a datetime format with a dash, so directories can be easily marked because they are only one group. - uniqueResultPath, err := util.UniqueDirectory(command.ResultPath) - if err != nil { - command.logger.Panicf("ERROR: %s", err) - } - // Ensure that the directory really exists. - if err := osutil.MkdirAll(uniqueResultPath); err != nil { - command.logger.Panicf("ERROR: %s", err) - } - command.ResultPath = uniqueResultPath - evaluationContext.ResultPath = uniqueResultPath - command.logger.Info("configured results directory", "path", command.ResultPath) - } - - // Initialize logging within result directory. - { - log := command.logger.With(log.AttributeKeyResultPath, command.ResultPath) - command.logger = log - evaluationContext.Log = log - } - // Gather languages. languagesSelected := map[string]language.Language{} { @@ -540,17 +533,27 @@ func (command *Evaluate) Execute(args []string) (err error) { command.logger.Panicf("ERROR: empty evaluation configuration") } - configurationFile, err := os.Create(filepath.Join(evaluationContext.ResultPath, "config.json")) - if err != nil { - command.logger.Panicf("ERROR: cannot create configuration file: %s", err) + // Initialize logging within result directory. + { + log := command.logger.With(log.AttributeKeyResultPath, command.ResultPath) + command.logger = log + evaluationContext.Log = log } - defer func() { - if err := configurationFile.Close(); err != nil { + + // Write the final evaluation configuration to the result directory. + { + configurationFile, err := os.Create(filepath.Join(evaluationContext.ResultPath, "config.json")) + if err != nil { + command.logger.Panicf("ERROR: cannot create configuration file: %s", err) + } + defer func() { + if err := configurationFile.Close(); err != nil { + command.logger.Panicf("ERROR: %s", err) + } + }() + if err := evaluationConfiguration.Write(configurationFile); err != nil { command.logger.Panicf("ERROR: %s", err) } - }() - if err := evaluationConfiguration.Write(configurationFile); err != nil { - command.logger.Panicf("ERROR: %s", err) } switch command.Runtime { From cb0d224827a01cd68974e03276cec14f744379f0 Mon Sep 17 00:00:00 2001 From: Markus Zimmermann Date: Wed, 14 May 2025 10:07:15 +0200 Subject: [PATCH 2/5] Option to validate the configuration of the evaluation to also check if models still exist --- cmd/eval-dev-quality/cmd/evaluate.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go index 384a8565..ba72efb2 100644 --- a/cmd/eval-dev-quality/cmd/evaluate.go +++ b/cmd/eval-dev-quality/cmd/evaluate.go @@ -76,6 +76,8 @@ type Evaluate struct { Configuration string `long:"configuration" description:"Configuration file to set up an evaluation run."` // ExecutionTimeout holds the timeout for an execution. ExecutionTimeout uint `long:"execution-timeout" description:"Execution timeout for compilation and tests in minutes." default:"5"` + // OnlyValidate indicates that only the configuration is validated and no evaluation is performed. + OnlyValidate bool `long:"only-validate" description:"Only validate the configuration and do not perform an evaluation."` // RunIDStartsAt holds the offset increment for the run id used in creating the result folders. RunIDStartsAt uint `long:"run-id-starts-at" description:"Sets the starting index for the run ID." default:"1"` // Runs holds the number of runs to perform. @@ -123,7 +125,7 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate. evaluationConfiguration = NewEvaluationConfiguration() // Setup evaluation result directory. - { + if !command.OnlyValidate { command.ResultPath = strings.ReplaceAll(command.ResultPath, "%datetime%", command.timestamp.Format("2006-01-02-15:04:05")) // REMARK Use a datetime format with a dash, so directories can be easily marked because they are only one group. uniqueResultPath, err := util.UniqueDirectory(command.ResultPath) if err != nil { @@ -533,6 +535,10 @@ func (command *Evaluate) Execute(args []string) (err error) { command.logger.Panicf("ERROR: empty evaluation configuration") } + if command.OnlyValidate { + return nil + } + // Initialize logging within result directory. { log := command.logger.With(log.AttributeKeyResultPath, command.ResultPath) From 49465e263dfc9147f8b69eb95463e6bda94fd54b Mon Sep 17 00:00:00 2001 From: Markus Zimmermann Date: Wed, 14 May 2025 10:19:02 +0200 Subject: [PATCH 3/5] Validate all models before exiting on an unknown model/provider This allows us to validate the whole configuration fast. --- cmd/eval-dev-quality/cmd/evaluate.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go index ba72efb2..0066b417 100644 --- a/cmd/eval-dev-quality/cmd/evaluate.go +++ b/cmd/eval-dev-quality/cmd/evaluate.go @@ -443,6 +443,7 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate. sort.Strings(command.ModelIDsWithProviderAndAttributes) // Check and initialize models. + var unknownModels []string evaluationContext.ProviderForModel = map[model.Model]provider.Provider{} for _, modelIDsWithProviderAndAttributes := range command.ModelIDsWithProviderAndAttributes { command.logger.Info("selecting model", "model", modelIDsWithProviderAndAttributes) @@ -497,7 +498,15 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate. var ok bool m, ok = models[modelIDWithProvider] if !ok { - command.logger.Panicf("ERROR: model %q does not exist for provider %q. Valid models are: %s", modelIDsWithProviderAndAttributes, providerID, strings.Join(modelIDs, ", ")) + unknownModels = append(unknownModels, modelIDsWithProviderAndAttributes) + command.logger.Error( + "ERROR: model does not exist for provider", + "model", modelIDsWithProviderAndAttributes, + "provider", providerID, + "valid", strings.Join(modelIDs, ", "), + ) + + continue } // If a model with attributes is requested, we add the base model plus attributes as new model to our list. @@ -512,6 +521,12 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate. evaluationContext.ProviderForModel[m] = p evaluationConfiguration.Models.Selected = append(evaluationConfiguration.Models.Selected, modelIDsWithProviderAndAttributes) } + + if len(unknownModels) > 0 { + sort.Strings(unknownModels) + + command.logger.Panicf("ERROR: found unknown providers or models: %s", strings.Join(unknownModels, ", ")) + } } return evaluationContext, evaluationConfiguration, func() { From ed0cdec79fa330954955f3bbd081672376e8cef0 Mon Sep 17 00:00:00 2001 From: Markus Zimmermann Date: Wed, 14 May 2025 10:34:42 +0200 Subject: [PATCH 4/5] Increase API request attempts for an LLM query to 10 (from 3) because that was our new default in v1.0 --- cmd/eval-dev-quality/cmd/evaluate.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go index 0066b417..80bbc92c 100644 --- a/cmd/eval-dev-quality/cmd/evaluate.go +++ b/cmd/eval-dev-quality/cmd/evaluate.go @@ -61,7 +61,7 @@ type Evaluate struct { // ProviderUrls holds all custom inference endpoint urls for the providers. ProviderUrls map[string]string `long:"urls" description:"Custom OpenAI API compatible inference endpoints (of the form '$provider:$url,...'). Use '$provider=custom-$name' to manually register a custom OpenAI API endpoint provider. Note that the models of a custom OpenAI API endpoint provider must be declared explicitly using the '--model' option. When using the environment variable, separate multiple definitions with ','." env:"PROVIDER_URL" env-delim:","` // APIRequestAttempts holds the number of allowed API requests per LLM query. - APIRequestAttempts uint `long:"api-request-attempts" description:"Number of allowed API requests per LLM query." default:"3"` + APIRequestAttempts uint `long:"api-request-attempts" description:"Number of allowed API requests per LLM query." default:"10"` // APIRequestTimeout holds the timeout for API requests in seconds. APIRequestTimeout uint `long:"api-request-timeout" description:"Timeout of API requests in seconds. ('0' to disable)" default:"1200"` From e04225dbac11a6bf0ee1d49bd19b11f9d4631cf4 Mon Sep 17 00:00:00 2001 From: Markus Zimmermann Date: Wed, 14 May 2025 10:35:17 +0200 Subject: [PATCH 5/5] Log which repositories got selected --- cmd/eval-dev-quality/cmd/evaluate.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go index 80bbc92c..1886a5ed 100644 --- a/cmd/eval-dev-quality/cmd/evaluate.go +++ b/cmd/eval-dev-quality/cmd/evaluate.go @@ -338,6 +338,10 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate. } evaluationContext.RepositoryPaths = command.Repositories evaluationConfiguration.Repositories.Selected = append(evaluationConfiguration.Repositories.Selected, command.Repositories...) + + for _, repositoryID := range evaluationConfiguration.Repositories.Selected { + command.logger.Info("selected repository", "repository", repositoryID) + } } // Make the resolved selected languages available in the command.