launchdarkly · knfreemLD · Feb 10, 2026 · Feb 5, 2026 · Feb 5, 2026 · Feb 5, 2026
@@ -71,7 +71,17 @@ func (c *Client) Config(
 	variables map[string]interface{},
 ) (Config, *Tracker) {
 	_ = c.sdk.TrackMetric("$ld:ai:config:function:single", context, 1, ldvalue.String(key))
+	return c.evaluateConfig(key, context, defaultValue, variables)
+}
 
+// evaluateConfig fetches and interpolates an AI Config without emitting any metric.
+// Callers (Config, JudgeConfig) are meant to emit their own metric before calling this.
+func (c *Client) evaluateConfig(
+	key string,
+	context ldcontext.Context,
+	defaultValue Config,
+	variables map[string]interface{},
+) (Config, *Tracker) {
 	result, _ := c.sdk.JSONVariation(key, context, defaultValue.AsLdValue())
 
 	// The spec requires the config to at least be an object (although all properties are optional, so it may be an
@@ -102,7 +112,11 @@ func (c *Client) Config(
 	builder := NewConfig().
 		WithModelName(parsed.Model.Name).
 		WithProviderName(parsed.Provider.Name).
-		WithEnabled(parsed.Meta.Enabled)
+		WithEnabled(parsed.Meta.Enabled).
+		WithMode(parsed.Mode).
+		WithEvaluationMetricKey(parsed.EvaluationMetricKey).
+		WithEvaluationMetricKeys(parsed.EvaluationMetricKeys).
+		WithJudgeConfiguration(parsed.JudgeConfiguration)
 
 	for k, v := range parsed.Model.Parameters {
 		builder.WithModelParam(k, v)
@@ -174,3 +188,38 @@ func interpolateTemplate(template string, variables map[string]interface{}) (str
 	}
 	return m.RenderString(variables)
 }
+
+// JudgeConfig evaluates an AI Config, tracking it as a judge function. See Config for details.
+//
+// This method extends the provided variables with reserved judge variables:
+// - "message_history": "{{message_history}}"
+// - "response_to_evaluate": "{{response_to_evaluate}}"
+//
+// These literal placeholder strings preserve the Mustache templates through the first interpolation
+// (during config fetch), allowing Judge.Evaluate() to perform a second interpolation with actual values.
+func (c *Client) JudgeConfig(
+	key string,
+	context ldcontext.Context,
+	defaultValue Config,
+	variables map[string]interface{},
+) (Config, *Tracker) {
+	_ = c.sdk.TrackMetric("$ld:ai:judge:function:single", context, 1, ldvalue.String(key))
+
+	// Extend variables with reserved judge placeholders
+	extendedVariables := make(map[string]interface{})
+	for k, v := range variables {
+		// Warn if user tries to override reserved variables
+		if k == "message_history" || k == "response_to_evaluate" {
+			c.logger.Warnf("AI Config '%s': variable '%s' is reserved by judge and will be ignored", key, k)
+			continue
+		}
+		extendedVariables[k] = v
+	}
+
+	// Inject reserved variables as literal placeholder strings
+	// These will be preserved through the first interpolation and resolved during Judge.Evaluate()
+	extendedVariables["message_history"] = "{{message_history}}"
+	extendedVariables["response_to_evaluate"] = "{{response_to_evaluate}}"
+
+	return c.evaluateConfig(key, context, defaultValue, extendedVariables)
+}
@@ -329,6 +329,42 @@ func TestConfigMethodTracking(t *testing.T) {
 	assert.ElementsMatch(t, expectedEvents, mockSDK.events)
 }
 
+// TestJudgeConfigMethodTracking verifies that JudgeConfig emits only the judge metric,
+// not the config metric, so judge evaluations are not double-counted on the dashboard.
+func TestJudgeConfigMethodTracking(t *testing.T) {
+	json := []byte(`{
+		"_ldMeta": {"variationKey": "1", "enabled": true},
+		"mode": "judge",
+		"evaluationMetricKey": "toxicity",
+		"messages": [{"content": "test", "role": "system"}]
+	}`)
+	mockSDK := newMockSDK(json, nil)
+	client, err := NewClient(mockSDK)
+	require.NoError(t, err)
+	require.NotNil(t, client)
+
+	defaultConfig := Disabled()
+	context := ldcontext.New("user-key")
+	configKey := "judge-config-key"
+
+	config, tracker := client.JudgeConfig(configKey, context, defaultConfig, nil)
+
+	require.NotNil(t, config)
+	require.NotNil(t, tracker)
+
+	// Only the judge metric should be emitted; evaluateConfig does not emit any metric.
+	expectedEvents := []mockEvent{
+		{
+			eventName:   "$ld:ai:judge:function:single",
+			context:     context,
+			metricValue: 1,
+			data:        ldvalue.String(configKey),
+		},
+	}
+	assert.ElementsMatch(t, expectedEvents, mockSDK.events,
+		"JudgeConfig must not emit $ld:ai:config:function:single to avoid double-counting")
+}
+
 func TestCanSetModelParameters(t *testing.T) {
 	client, err := NewClient(newMockSDK(nil, nil))
 	require.NoError(t, err)
@@ -579,3 +615,167 @@ func TestInterpolation(t *testing.T) {
 		assert.Equal(t, "user_kind=<>,cat_kind=<>", result)
 	})
 }
+
+func TestParseJudgeSpecificFields(t *testing.T) {
+	json := []byte(`{
+		"_ldMeta": {"variationKey": "1", "enabled": true},
+		"mode": "judge",
+		"evaluationMetricKey": "toxicity",
+		"judgeConfiguration": {
+			"judges": [
+				{"key": "judge1", "samplingRate": 0.5},
+				{"key": "judge2", "samplingRate": 1.0}
+			]
+		},
+		"messages": [
+			{"content": "test", "role": "system"}
+		]
+	}`)
+
+	client, err := NewClient(newMockSDK(json, nil))
+	require.NoError(t, err)
+	require.NotNil(t, client)
+
+	cfg, _ := client.Config("key", ldcontext.New("user"), Disabled(), nil)
+
+	assert.Equal(t, "judge", cfg.Mode())
+	assert.Equal(t, "toxicity", cfg.EvaluationMetricKey())
+
+	judgeConfig := cfg.JudgeConfiguration()
+	require.NotNil(t, judgeConfig)
+	require.Len(t, judgeConfig.Judges, 2)
+	assert.Equal(t, "judge1", judgeConfig.Judges[0].Key)
+	assert.Equal(t, 0.5, judgeConfig.Judges[0].SamplingRate)
+	assert.Equal(t, "judge2", judgeConfig.Judges[1].Key)
+	assert.Equal(t, 1.0, judgeConfig.Judges[1].SamplingRate)
+}
+
+func TestParseEvaluationMetricKeys(t *testing.T) {
+	json := []byte(`{
+		"_ldMeta": {"variationKey": "1", "enabled": true},
+		"mode": "judge",
+		"evaluationMetricKeys": ["relevance", "accuracy"],
+		"messages": [
+			{"content": "test", "role": "system"}
+		]
+	}`)
+
+	client, err := NewClient(newMockSDK(json, nil))
+	require.NoError(t, err)
+	require.NotNil(t, client)
+
+	cfg, _ := client.Config("key", ldcontext.New("user"), Disabled(), nil)
+
+	assert.Equal(t, "judge", cfg.Mode())
+	assert.Equal(t, "", cfg.EvaluationMetricKey())
+	assert.Equal(t, []string{"relevance", "accuracy"}, cfg.EvaluationMetricKeys())
+}
+
+func TestParseEvaluationMetricKeyPriority(t *testing.T) {
+	json := []byte(`{
+		"_ldMeta": {"variationKey": "1", "enabled": true},
+		"mode": "judge",
+		"evaluationMetricKey": "toxicity",
+		"evaluationMetricKeys": ["relevance", "accuracy"],
+		"messages": [
+			{"content": "test", "role": "system"}
+		]
+	}`)
+
+	client, err := NewClient(newMockSDK(json, nil))
+	require.NoError(t, err)
+	require.NotNil(t, client)
+
+	cfg, _ := client.Config("key", ldcontext.New("user"), Disabled(), nil)
+
+	assert.Equal(t, "judge", cfg.Mode())
+	// Both fields should be parsed
+	assert.Equal(t, "toxicity", cfg.EvaluationMetricKey())
+	assert.Equal(t, []string{"relevance", "accuracy"}, cfg.EvaluationMetricKeys())
+}
+
+func TestJudgeConfigurationImmutable(t *testing.T) {
+	// Test that mutations to JudgeConfiguration don't affect the Config
+	judgeConfig := &datamodel.JudgeConfiguration{
+		Judges: []datamodel.Judge{
+			{Key: "judge1", SamplingRate: 0.5},
+			{Key: "judge2", SamplingRate: 1.0},
+		},
+	}
+
+	builder := NewConfig().
+		Enable().
+		WithJudgeConfiguration(judgeConfig)
+	cfg := builder.Build()
+
+	// Mutate the original
+	judgeConfig.Judges[0].Key = "mutated"
+	judgeConfig.Judges = append(judgeConfig.Judges, datamodel.Judge{Key: "judge3", SamplingRate: 0.3})
+
+	// Config should not be affected
+	retrieved := cfg.JudgeConfiguration()
+	require.NotNil(t, retrieved)
+	require.Len(t, retrieved.Judges, 2)
+	assert.Equal(t, "judge1", retrieved.Judges[0].Key) // Should still be original value
+	assert.Equal(t, "judge2", retrieved.Judges[1].Key)
+
+	// Mutate the retrieved config
+	retrieved.Judges[0].Key = "mutated_again"
+	retrieved.Judges = append(retrieved.Judges, datamodel.Judge{Key: "judge4", SamplingRate: 0.4})
+
+	// Config should still not be affected
+	retrieved2 := cfg.JudgeConfiguration()
+	require.NotNil(t, retrieved2)
+	require.Len(t, retrieved2.Judges, 2)
+	assert.Equal(t, "judge1", retrieved2.Judges[0].Key) // Should still be original value
+	assert.Equal(t, "judge2", retrieved2.Judges[1].Key)
+}
+
+// TestJudgeConfig_PreservesReservedPlaceholders verifies that JudgeConfig injects reserved variables
+// so that {{message_history}} and {{response_to_evaluate}} are preserved for the second interpolation
+// pass during Judge.Evaluate(). Without this, Config's first Mustache pass would render them as empty.
+func TestJudgeConfig_PreservesReservedPlaceholders(t *testing.T) {
+	json := []byte(`{
+		"_ldMeta": {"variationKey": "1", "enabled": true},
+		"mode": "judge",
+		"evaluationMetricKey": "toxicity",
+		"messages": [
+			{"content": "You are a judge.", "role": "system"},
+			{"content": "Input: {{message_history}}\nOutput: {{response_to_evaluate}}", "role": "user"}
+		]
+	}`)
+
+	client, err := NewClient(newMockSDK(json, nil))
+	require.NoError(t, err)
+	require.NotNil(t, client)
+
+	cfg, _ := client.JudgeConfig("judge-key", ldcontext.New("user"), Disabled(), nil)
+
+	msgs := cfg.Messages()
+	require.Len(t, msgs, 2)
+	assert.Equal(t, "You are a judge.", msgs[0].Content)
+	assert.Contains(t, msgs[1].Content, "{{message_history}}", "JudgeConfig must preserve placeholder for second interpolation")
+	assert.Contains(t, msgs[1].Content, "{{response_to_evaluate}}", "JudgeConfig must preserve placeholder for second interpolation")
+	assert.Equal(t, "Input: {{message_history}}\nOutput: {{response_to_evaluate}}", msgs[1].Content)
+}
+
+// TestConfig_WithoutReservedVarsWipesJudgePlaceholders documents that Config (without reserved vars)
+// renders {{message_history}} and {{response_to_evaluate}} as empty when used for judge templates.
+func TestConfig_WithoutReservedVarsWipesJudgePlaceholders(t *testing.T) {
+	json := []byte(`{
+		"_ldMeta": {"variationKey": "1", "enabled": true},
+		"messages": [
+			{"content": "Input: {{message_history}}\nOutput: {{response_to_evaluate}}", "role": "user"}
+		]
+	}`)
+
+	client, err := NewClient(newMockSDK(json, nil))
+	require.NoError(t, err)
+	require.NotNil(t, client)
+
+	cfg, _ := client.Config("key", ldcontext.New("user"), Disabled(), nil)
+
+	msgs := cfg.Messages()
+	require.Len(t, msgs, 1)
+	assert.Equal(t, "Input: \nOutput: ", msgs[0].Content, "Config without reserved vars renders placeholders as empty")
+}
@@ -60,6 +60,33 @@ func (c *Config) CustomModelParam(key string) (ldvalue.Value, bool) {
 	return val, ok
 }
 
+// Mode returns the AI Config mode (e.g., "completion", "agent", "judge").
+func (c *Config) Mode() string {
+	return c.c.Mode
+}
+
+// EvaluationMetricKey returns the evaluation metric key for judge mode configs.
+func (c *Config) EvaluationMetricKey() string {
+	return c.c.EvaluationMetricKey
+}
+
+// EvaluationMetricKeys returns the deprecated array of evaluation metric keys.
+// Use EvaluationMetricKey instead.
+func (c *Config) EvaluationMetricKeys() []string {
+	return slices.Clone(c.c.EvaluationMetricKeys)
+}
+
+// JudgeConfiguration returns the judge configuration attached to this config, if any.
+// Returns a defensive copy to prevent mutations.
+func (c *Config) JudgeConfiguration() *datamodel.JudgeConfiguration {
+	if c.c.JudgeConfiguration == nil {
+		return nil
+	}
+	return &datamodel.JudgeConfiguration{
+		Judges: slices.Clone(c.c.JudgeConfiguration.Judges),
+	}
+}
+
 // AsLdValue is used internally.
 func (c *Config) AsLdValue() ldvalue.Value {
 	return ldvalue.FromJSONMarshal(c.c)
@@ -68,12 +95,16 @@ func (c *Config) AsLdValue() ldvalue.Value {
 // ConfigBuilder is used to define a default AI Config, returned when LaunchDarkly is unreachable or there
 // is an error evaluating the Config.
 type ConfigBuilder struct {
-	messages          []datamodel.Message
-	enabled           bool
-	providerName      string
-	modelName         string
-	modelParams       map[string]ldvalue.Value
-	modelCustomParams map[string]ldvalue.Value
+	messages             []datamodel.Message
+	enabled              bool
+	providerName         string
+	modelName            string
+	modelParams          map[string]ldvalue.Value
+	modelCustomParams    map[string]ldvalue.Value
+	mode                 string
+	evaluationMetricKey  string
+	evaluationMetricKeys []string
+	judgeConfiguration   *datamodel.JudgeConfiguration
 }
 
 // NewConfig returns a new ConfigBuilder. By default, the Config is disabled.
@@ -141,8 +172,47 @@ func (cb *ConfigBuilder) WithCustomModelParam(key string, value ldvalue.Value) *
 	return cb
 }
 
+// WithMode sets the AI Config mode (e.g., "completion", "agent", "judge").
+func (cb *ConfigBuilder) WithMode(mode string) *ConfigBuilder {
+	cb.mode = mode
+	return cb
+}
+
+// WithEvaluationMetricKey sets the evaluation metric key for judge mode configs.
+func (cb *ConfigBuilder) WithEvaluationMetricKey(key string) *ConfigBuilder {
+	cb.evaluationMetricKey = key
+	return cb
+}
+
+// WithEvaluationMetricKeys sets the deprecated array of evaluation metric keys.
+// Use WithEvaluationMetricKey instead.
+func (cb *ConfigBuilder) WithEvaluationMetricKeys(keys []string) *ConfigBuilder {
+	cb.evaluationMetricKeys = slices.Clone(keys)
+	return cb
+}
+
+// WithJudgeConfiguration sets the judge configuration for this config.
+// The provided judgeConfig is defensively copied.
+func (cb *ConfigBuilder) WithJudgeConfiguration(judgeConfig *datamodel.JudgeConfiguration) *ConfigBuilder {
+	if judgeConfig == nil {
+		cb.judgeConfiguration = nil
+		return cb
+	}
+	cb.judgeConfiguration = &datamodel.JudgeConfiguration{
+		Judges: slices.Clone(judgeConfig.Judges),
+	}
+	return cb
+}
+
 // Build creates a Config from the current builder state.
 func (cb *ConfigBuilder) Build() Config {
+	var judgeConfig *datamodel.JudgeConfiguration
+	if cb.judgeConfiguration != nil {
+		judgeConfig = &datamodel.JudgeConfiguration{
+			Judges: slices.Clone(cb.judgeConfiguration.Judges),
+		}
+	}
+
 	return Config{
 		c: datamodel.Config{
 			Messages: slices.Clone(cb.messages),
@@ -157,6 +227,10 @@ func (cb *ConfigBuilder) Build() Config {
 			Provider: datamodel.Provider{
 				Name: cb.providerName,
 			},
+			Mode:                 cb.mode,
+			EvaluationMetricKey:  cb.evaluationMetricKey,
+			EvaluationMetricKeys: slices.Clone(cb.evaluationMetricKeys),
+			JudgeConfiguration:   judgeConfig,
 		},
 	}
 }