Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion ldai/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,17 @@ func (c *Client) Config(
variables map[string]interface{},
) (Config, *Tracker) {
_ = c.sdk.TrackMetric("$ld:ai:config:function:single", context, 1, ldvalue.String(key))
return c.evaluateConfig(key, context, defaultValue, variables)
}

// evaluateConfig fetches and interpolates an AI Config without emitting any metric.
// Callers (Config, JudgeConfig) are meant to emit their own metric before calling this.
func (c *Client) evaluateConfig(
key string,
context ldcontext.Context,
defaultValue Config,
variables map[string]interface{},
) (Config, *Tracker) {
result, _ := c.sdk.JSONVariation(key, context, defaultValue.AsLdValue())

// The spec requires the config to at least be an object (although all properties are optional, so it may be an
Expand Down Expand Up @@ -102,7 +112,11 @@ func (c *Client) Config(
builder := NewConfig().
WithModelName(parsed.Model.Name).
WithProviderName(parsed.Provider.Name).
WithEnabled(parsed.Meta.Enabled)
WithEnabled(parsed.Meta.Enabled).
WithMode(parsed.Mode).
WithEvaluationMetricKey(parsed.EvaluationMetricKey).
WithEvaluationMetricKeys(parsed.EvaluationMetricKeys).
WithJudgeConfiguration(parsed.JudgeConfiguration)

for k, v := range parsed.Model.Parameters {
builder.WithModelParam(k, v)
Expand Down Expand Up @@ -174,3 +188,38 @@ func interpolateTemplate(template string, variables map[string]interface{}) (str
}
return m.RenderString(variables)
}

// JudgeConfig evaluates an AI Config, tracking it as a judge function. See Config for details.
//
// This method extends the provided variables with reserved judge variables:
// - "message_history": "{{message_history}}"
// - "response_to_evaluate": "{{response_to_evaluate}}"
//
// These literal placeholder strings preserve the Mustache templates through the first interpolation
// (during config fetch), allowing Judge.Evaluate() to perform a second interpolation with actual values.
func (c *Client) JudgeConfig(
key string,
context ldcontext.Context,
defaultValue Config,
variables map[string]interface{},
) (Config, *Tracker) {
_ = c.sdk.TrackMetric("$ld:ai:judge:function:single", context, 1, ldvalue.String(key))

// Extend variables with reserved judge placeholders
extendedVariables := make(map[string]interface{})
for k, v := range variables {
// Warn if user tries to override reserved variables
if k == "message_history" || k == "response_to_evaluate" {
c.logger.Warnf("AI Config '%s': variable '%s' is reserved by judge and will be ignored", key, k)
continue
}
extendedVariables[k] = v
}

// Inject reserved variables as literal placeholder strings
// These will be preserved through the first interpolation and resolved during Judge.Evaluate()
extendedVariables["message_history"] = "{{message_history}}"
extendedVariables["response_to_evaluate"] = "{{response_to_evaluate}}"

return c.evaluateConfig(key, context, defaultValue, extendedVariables)
}
200 changes: 200 additions & 0 deletions ldai/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,42 @@ func TestConfigMethodTracking(t *testing.T) {
assert.ElementsMatch(t, expectedEvents, mockSDK.events)
}

// TestJudgeConfigMethodTracking verifies that JudgeConfig emits only the judge metric,
// not the config metric, so judge evaluations are not double-counted on the dashboard.
func TestJudgeConfigMethodTracking(t *testing.T) {
json := []byte(`{
"_ldMeta": {"variationKey": "1", "enabled": true},
"mode": "judge",
"evaluationMetricKey": "toxicity",
"messages": [{"content": "test", "role": "system"}]
}`)
mockSDK := newMockSDK(json, nil)
client, err := NewClient(mockSDK)
require.NoError(t, err)
require.NotNil(t, client)

defaultConfig := Disabled()
context := ldcontext.New("user-key")
configKey := "judge-config-key"

config, tracker := client.JudgeConfig(configKey, context, defaultConfig, nil)

require.NotNil(t, config)
require.NotNil(t, tracker)

// Only the judge metric should be emitted; evaluateConfig does not emit any metric.
expectedEvents := []mockEvent{
{
eventName: "$ld:ai:judge:function:single",
context: context,
metricValue: 1,
data: ldvalue.String(configKey),
},
}
assert.ElementsMatch(t, expectedEvents, mockSDK.events,
"JudgeConfig must not emit $ld:ai:config:function:single to avoid double-counting")
}

func TestCanSetModelParameters(t *testing.T) {
client, err := NewClient(newMockSDK(nil, nil))
require.NoError(t, err)
Expand Down Expand Up @@ -579,3 +615,167 @@ func TestInterpolation(t *testing.T) {
assert.Equal(t, "user_kind=<>,cat_kind=<>", result)
})
}

func TestParseJudgeSpecificFields(t *testing.T) {
json := []byte(`{
"_ldMeta": {"variationKey": "1", "enabled": true},
"mode": "judge",
"evaluationMetricKey": "toxicity",
"judgeConfiguration": {
"judges": [
{"key": "judge1", "samplingRate": 0.5},
{"key": "judge2", "samplingRate": 1.0}
]
},
"messages": [
{"content": "test", "role": "system"}
]
}`)

client, err := NewClient(newMockSDK(json, nil))
require.NoError(t, err)
require.NotNil(t, client)

cfg, _ := client.Config("key", ldcontext.New("user"), Disabled(), nil)

assert.Equal(t, "judge", cfg.Mode())
assert.Equal(t, "toxicity", cfg.EvaluationMetricKey())

judgeConfig := cfg.JudgeConfiguration()
require.NotNil(t, judgeConfig)
require.Len(t, judgeConfig.Judges, 2)
assert.Equal(t, "judge1", judgeConfig.Judges[0].Key)
assert.Equal(t, 0.5, judgeConfig.Judges[0].SamplingRate)
assert.Equal(t, "judge2", judgeConfig.Judges[1].Key)
assert.Equal(t, 1.0, judgeConfig.Judges[1].SamplingRate)
}

func TestParseEvaluationMetricKeys(t *testing.T) {
json := []byte(`{
"_ldMeta": {"variationKey": "1", "enabled": true},
"mode": "judge",
"evaluationMetricKeys": ["relevance", "accuracy"],
"messages": [
{"content": "test", "role": "system"}
]
}`)

client, err := NewClient(newMockSDK(json, nil))
require.NoError(t, err)
require.NotNil(t, client)

cfg, _ := client.Config("key", ldcontext.New("user"), Disabled(), nil)

assert.Equal(t, "judge", cfg.Mode())
assert.Equal(t, "", cfg.EvaluationMetricKey())
assert.Equal(t, []string{"relevance", "accuracy"}, cfg.EvaluationMetricKeys())
}

func TestParseEvaluationMetricKeyPriority(t *testing.T) {
json := []byte(`{
"_ldMeta": {"variationKey": "1", "enabled": true},
"mode": "judge",
"evaluationMetricKey": "toxicity",
"evaluationMetricKeys": ["relevance", "accuracy"],
"messages": [
{"content": "test", "role": "system"}
]
}`)

client, err := NewClient(newMockSDK(json, nil))
require.NoError(t, err)
require.NotNil(t, client)

cfg, _ := client.Config("key", ldcontext.New("user"), Disabled(), nil)

assert.Equal(t, "judge", cfg.Mode())
// Both fields should be parsed
assert.Equal(t, "toxicity", cfg.EvaluationMetricKey())
assert.Equal(t, []string{"relevance", "accuracy"}, cfg.EvaluationMetricKeys())
}

func TestJudgeConfigurationImmutable(t *testing.T) {
// Test that mutations to JudgeConfiguration don't affect the Config
judgeConfig := &datamodel.JudgeConfiguration{
Judges: []datamodel.Judge{
{Key: "judge1", SamplingRate: 0.5},
{Key: "judge2", SamplingRate: 1.0},
},
}

builder := NewConfig().
Enable().
WithJudgeConfiguration(judgeConfig)
cfg := builder.Build()

// Mutate the original
judgeConfig.Judges[0].Key = "mutated"
judgeConfig.Judges = append(judgeConfig.Judges, datamodel.Judge{Key: "judge3", SamplingRate: 0.3})

// Config should not be affected
retrieved := cfg.JudgeConfiguration()
require.NotNil(t, retrieved)
require.Len(t, retrieved.Judges, 2)
assert.Equal(t, "judge1", retrieved.Judges[0].Key) // Should still be original value
assert.Equal(t, "judge2", retrieved.Judges[1].Key)

// Mutate the retrieved config
retrieved.Judges[0].Key = "mutated_again"
retrieved.Judges = append(retrieved.Judges, datamodel.Judge{Key: "judge4", SamplingRate: 0.4})

// Config should still not be affected
retrieved2 := cfg.JudgeConfiguration()
require.NotNil(t, retrieved2)
require.Len(t, retrieved2.Judges, 2)
assert.Equal(t, "judge1", retrieved2.Judges[0].Key) // Should still be original value
assert.Equal(t, "judge2", retrieved2.Judges[1].Key)
}

// TestJudgeConfig_PreservesReservedPlaceholders verifies that JudgeConfig injects reserved variables
// so that {{message_history}} and {{response_to_evaluate}} are preserved for the second interpolation
// pass during Judge.Evaluate(). Without this, Config's first Mustache pass would render them as empty.
func TestJudgeConfig_PreservesReservedPlaceholders(t *testing.T) {
json := []byte(`{
"_ldMeta": {"variationKey": "1", "enabled": true},
"mode": "judge",
"evaluationMetricKey": "toxicity",
"messages": [
{"content": "You are a judge.", "role": "system"},
{"content": "Input: {{message_history}}\nOutput: {{response_to_evaluate}}", "role": "user"}
]
}`)

client, err := NewClient(newMockSDK(json, nil))
require.NoError(t, err)
require.NotNil(t, client)

cfg, _ := client.JudgeConfig("judge-key", ldcontext.New("user"), Disabled(), nil)

msgs := cfg.Messages()
require.Len(t, msgs, 2)
assert.Equal(t, "You are a judge.", msgs[0].Content)
assert.Contains(t, msgs[1].Content, "{{message_history}}", "JudgeConfig must preserve placeholder for second interpolation")
assert.Contains(t, msgs[1].Content, "{{response_to_evaluate}}", "JudgeConfig must preserve placeholder for second interpolation")
assert.Equal(t, "Input: {{message_history}}\nOutput: {{response_to_evaluate}}", msgs[1].Content)
}

// TestConfig_WithoutReservedVarsWipesJudgePlaceholders documents that Config (without reserved vars)
// renders {{message_history}} and {{response_to_evaluate}} as empty when used for judge templates.
func TestConfig_WithoutReservedVarsWipesJudgePlaceholders(t *testing.T) {
json := []byte(`{
"_ldMeta": {"variationKey": "1", "enabled": true},
"messages": [
{"content": "Input: {{message_history}}\nOutput: {{response_to_evaluate}}", "role": "user"}
]
}`)

client, err := NewClient(newMockSDK(json, nil))
require.NoError(t, err)
require.NotNil(t, client)

cfg, _ := client.Config("key", ldcontext.New("user"), Disabled(), nil)

msgs := cfg.Messages()
require.Len(t, msgs, 1)
assert.Equal(t, "Input: \nOutput: ", msgs[0].Content, "Config without reserved vars renders placeholders as empty")
}
86 changes: 80 additions & 6 deletions ldai/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,33 @@ func (c *Config) CustomModelParam(key string) (ldvalue.Value, bool) {
return val, ok
}

// Mode returns the AI Config mode (e.g., "completion", "agent", "judge").
func (c *Config) Mode() string {
return c.c.Mode
}

// EvaluationMetricKey returns the evaluation metric key for judge mode configs.
func (c *Config) EvaluationMetricKey() string {
return c.c.EvaluationMetricKey
}

// EvaluationMetricKeys returns the deprecated array of evaluation metric keys.
// Use EvaluationMetricKey instead.
func (c *Config) EvaluationMetricKeys() []string {
return slices.Clone(c.c.EvaluationMetricKeys)
}

// JudgeConfiguration returns the judge configuration attached to this config, if any.
// Returns a defensive copy to prevent mutations.
func (c *Config) JudgeConfiguration() *datamodel.JudgeConfiguration {
if c.c.JudgeConfiguration == nil {
return nil
}
return &datamodel.JudgeConfiguration{
Judges: slices.Clone(c.c.JudgeConfiguration.Judges),
}
}

// AsLdValue is used internally.
func (c *Config) AsLdValue() ldvalue.Value {
return ldvalue.FromJSONMarshal(c.c)
Expand All @@ -68,12 +95,16 @@ func (c *Config) AsLdValue() ldvalue.Value {
// ConfigBuilder is used to define a default AI Config, returned when LaunchDarkly is unreachable or there
// is an error evaluating the Config.
type ConfigBuilder struct {
messages []datamodel.Message
enabled bool
providerName string
modelName string
modelParams map[string]ldvalue.Value
modelCustomParams map[string]ldvalue.Value
messages []datamodel.Message
enabled bool
providerName string
modelName string
modelParams map[string]ldvalue.Value
modelCustomParams map[string]ldvalue.Value
mode string
evaluationMetricKey string
evaluationMetricKeys []string
judgeConfiguration *datamodel.JudgeConfiguration
}

// NewConfig returns a new ConfigBuilder. By default, the Config is disabled.
Expand Down Expand Up @@ -141,8 +172,47 @@ func (cb *ConfigBuilder) WithCustomModelParam(key string, value ldvalue.Value) *
return cb
}

// WithMode sets the AI Config mode (e.g., "completion", "agent", "judge").
func (cb *ConfigBuilder) WithMode(mode string) *ConfigBuilder {
cb.mode = mode
return cb
}

// WithEvaluationMetricKey sets the evaluation metric key for judge mode configs.
func (cb *ConfigBuilder) WithEvaluationMetricKey(key string) *ConfigBuilder {
cb.evaluationMetricKey = key
return cb
}

// WithEvaluationMetricKeys sets the deprecated array of evaluation metric keys.
// Use WithEvaluationMetricKey instead.
func (cb *ConfigBuilder) WithEvaluationMetricKeys(keys []string) *ConfigBuilder {
cb.evaluationMetricKeys = slices.Clone(keys)
return cb
}

// WithJudgeConfiguration sets the judge configuration for this config.
// The provided judgeConfig is defensively copied.
func (cb *ConfigBuilder) WithJudgeConfiguration(judgeConfig *datamodel.JudgeConfiguration) *ConfigBuilder {
if judgeConfig == nil {
cb.judgeConfiguration = nil
return cb
}
cb.judgeConfiguration = &datamodel.JudgeConfiguration{
Judges: slices.Clone(judgeConfig.Judges),
}
return cb
}

// Build creates a Config from the current builder state.
func (cb *ConfigBuilder) Build() Config {
var judgeConfig *datamodel.JudgeConfiguration
if cb.judgeConfiguration != nil {
judgeConfig = &datamodel.JudgeConfiguration{
Judges: slices.Clone(cb.judgeConfiguration.Judges),
}
}

return Config{
c: datamodel.Config{
Messages: slices.Clone(cb.messages),
Expand All @@ -157,6 +227,10 @@ func (cb *ConfigBuilder) Build() Config {
Provider: datamodel.Provider{
Name: cb.providerName,
},
Mode: cb.mode,
EvaluationMetricKey: cb.evaluationMetricKey,
EvaluationMetricKeys: slices.Clone(cb.evaluationMetricKeys),
JudgeConfiguration: judgeConfig,
},
}
}
Loading