Skip to content

Validate option #457

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 61 additions & 33 deletions cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ type Evaluate struct {
// ProviderUrls holds all custom inference endpoint urls for the providers.
ProviderUrls map[string]string `long:"urls" description:"Custom OpenAI API compatible inference endpoints (of the form '$provider:$url,...'). Use '$provider=custom-$name' to manually register a custom OpenAI API endpoint provider. Note that the models of a custom OpenAI API endpoint provider must be declared explicitly using the '--model' option. When using the environment variable, separate multiple definitions with ','." env:"PROVIDER_URL" env-delim:","`
// APIRequestAttempts holds the number of allowed API requests per LLM query.
APIRequestAttempts uint `long:"api-request-attempts" description:"Number of allowed API requests per LLM query." default:"3"`
APIRequestAttempts uint `long:"api-request-attempts" description:"Number of allowed API requests per LLM query." default:"10"`
// APIRequestTimeout holds the timeout for API requests in seconds.
APIRequestTimeout uint `long:"api-request-timeout" description:"Timeout of API requests in seconds. ('0' to disable)" default:"1200"`

Expand All @@ -76,6 +76,8 @@ type Evaluate struct {
Configuration string `long:"configuration" description:"Configuration file to set up an evaluation run."`
// ExecutionTimeout holds the timeout for an execution.
ExecutionTimeout uint `long:"execution-timeout" description:"Execution timeout for compilation and tests in minutes." default:"5"`
// OnlyValidate indicates that only the configuration is validated and no evaluation is performed.
OnlyValidate bool `long:"only-validate" description:"Only validate the configuration and do not perform an evaluation."`
// RunIDStartsAt holds the offset increment for the run id used in creating the result folders.
RunIDStartsAt uint `long:"run-id-starts-at" description:"Sets the starting index for the run ID." default:"1"`
// Runs holds the number of runs to perform.
Expand Down Expand Up @@ -122,6 +124,22 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.
evaluationContext = &evaluate.Context{}
evaluationConfiguration = NewEvaluationConfiguration()

// Setup evaluation result directory.
if !command.OnlyValidate {
command.ResultPath = strings.ReplaceAll(command.ResultPath, "%datetime%", command.timestamp.Format("2006-01-02-15:04:05")) // REMARK Use a datetime format with a dash, so directories can be easily marked because they are only one group.
uniqueResultPath, err := util.UniqueDirectory(command.ResultPath)
if err != nil {
command.logger.Panicf("ERROR: %s", err)
}
// Ensure that the directory really exists.
if err := osutil.MkdirAll(uniqueResultPath); err != nil {
command.logger.Panicf("ERROR: %s", err)
}
command.ResultPath = uniqueResultPath
evaluationContext.ResultPath = uniqueResultPath
command.logger.Info("configured results directory", "path", command.ResultPath)
}

// Load the provided configuration file, if any.
if command.Configuration != "" {
if command.Runtime != "local" {
Expand Down Expand Up @@ -215,29 +233,6 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.
evaluationContext.NoDisqualification = command.NoDisqualification
}

// Setup evaluation result directory.
{
command.ResultPath = strings.ReplaceAll(command.ResultPath, "%datetime%", command.timestamp.Format("2006-01-02-15:04:05")) // REMARK Use a datetime format with a dash, so directories can be easily marked because they are only one group.
uniqueResultPath, err := util.UniqueDirectory(command.ResultPath)
if err != nil {
command.logger.Panicf("ERROR: %s", err)
}
// Ensure that the directory really exists.
if err := osutil.MkdirAll(uniqueResultPath); err != nil {
command.logger.Panicf("ERROR: %s", err)
}
command.ResultPath = uniqueResultPath
evaluationContext.ResultPath = uniqueResultPath
command.logger.Info("configured results directory", "path", command.ResultPath)
}

// Initialize logging within result directory.
{
log := command.logger.With(log.AttributeKeyResultPath, command.ResultPath)
command.logger = log
evaluationContext.Log = log
}

// Gather languages.
languagesSelected := map[string]language.Language{}
{
Expand Down Expand Up @@ -343,6 +338,10 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.
}
evaluationContext.RepositoryPaths = command.Repositories
evaluationConfiguration.Repositories.Selected = append(evaluationConfiguration.Repositories.Selected, command.Repositories...)

for _, repositoryID := range evaluationConfiguration.Repositories.Selected {
command.logger.Info("selected repository", "repository", repositoryID)
}
}

// Make the resolved selected languages available in the command.
Expand Down Expand Up @@ -448,6 +447,7 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.
sort.Strings(command.ModelIDsWithProviderAndAttributes)

// Check and initialize models.
var unknownModels []string
evaluationContext.ProviderForModel = map[model.Model]provider.Provider{}
for _, modelIDsWithProviderAndAttributes := range command.ModelIDsWithProviderAndAttributes {
command.logger.Info("selecting model", "model", modelIDsWithProviderAndAttributes)
Expand Down Expand Up @@ -502,7 +502,15 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.
var ok bool
m, ok = models[modelIDWithProvider]
if !ok {
command.logger.Panicf("ERROR: model %q does not exist for provider %q. Valid models are: %s", modelIDsWithProviderAndAttributes, providerID, strings.Join(modelIDs, ", "))
unknownModels = append(unknownModels, modelIDsWithProviderAndAttributes)
command.logger.Error(
"ERROR: model does not exist for provider",
"model", modelIDsWithProviderAndAttributes,
"provider", providerID,
"valid", strings.Join(modelIDs, ", "),
)

continue
}

// If a model with attributes is requested, we add the base model plus attributes as new model to our list.
Expand All @@ -517,6 +525,12 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.
evaluationContext.ProviderForModel[m] = p
evaluationConfiguration.Models.Selected = append(evaluationConfiguration.Models.Selected, modelIDsWithProviderAndAttributes)
}

if len(unknownModels) > 0 {
sort.Strings(unknownModels)

command.logger.Panicf("ERROR: found unknown providers or models: %s", strings.Join(unknownModels, ", "))
}
}

return evaluationContext, evaluationConfiguration, func() {
Expand All @@ -540,17 +554,31 @@ func (command *Evaluate) Execute(args []string) (err error) {
command.logger.Panicf("ERROR: empty evaluation configuration")
}

configurationFile, err := os.Create(filepath.Join(evaluationContext.ResultPath, "config.json"))
if err != nil {
command.logger.Panicf("ERROR: cannot create configuration file: %s", err)
if command.OnlyValidate {
return nil
}
defer func() {
if err := configurationFile.Close(); err != nil {

// Initialize logging within result directory.
{
log := command.logger.With(log.AttributeKeyResultPath, command.ResultPath)
command.logger = log
evaluationContext.Log = log
}

// Write the final evaluation configuration to the result directory.
{
configurationFile, err := os.Create(filepath.Join(evaluationContext.ResultPath, "config.json"))
if err != nil {
command.logger.Panicf("ERROR: cannot create configuration file: %s", err)
}
defer func() {
if err := configurationFile.Close(); err != nil {
command.logger.Panicf("ERROR: %s", err)
}
}()
if err := evaluationConfiguration.Write(configurationFile); err != nil {
command.logger.Panicf("ERROR: %s", err)
}
}()
if err := evaluationConfiguration.Write(configurationFile); err != nil {
command.logger.Panicf("ERROR: %s", err)
}

switch command.Runtime {
Expand Down
Loading