Skip to content
3 changes: 3 additions & 0 deletions changelog.d/1399.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Function closures now correctly update type state of the program.

authors: zettroke
27 changes: 15 additions & 12 deletions src/compiler/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,19 @@ impl CompilerError {
}

impl<'a> Compiler<'a> {
pub(crate) fn new(fns: &'a [Box<dyn Function>], config: CompileConfig) -> Self {
Self {
fns,
diagnostics: vec![],
fallible: false,
abortable: false,
external_queries: vec![],
external_assignments: vec![],
skip_missing_query_target: vec![],
fallible_expression_error: None,
config,
}
}
/// Compiles a given source into the final [`Program`].
///
/// # Arguments
Expand All @@ -106,17 +119,7 @@ impl<'a> Compiler<'a> {
let initial_state = state.clone();
let mut state = state.clone();

let mut compiler = Self {
fns,
diagnostics: vec![],
fallible: false,
abortable: false,
external_queries: vec![],
external_assignments: vec![],
skip_missing_query_target: vec![],
fallible_expression_error: None,
config,
};
let mut compiler = Compiler::new(fns, config);
let expressions = compiler.compile_root_exprs(ast, &mut state);

let (errors, warnings): (Vec<_>, Vec<_>) =
Expand Down Expand Up @@ -272,7 +275,7 @@ impl<'a> Compiler<'a> {
Some(Group::new(expr))
}

fn compile_root_exprs(
pub(crate) fn compile_root_exprs(
&mut self,
nodes: impl IntoIterator<Item = Node<ast::RootExpr>>,
state: &mut TypeState,
Expand Down
85 changes: 69 additions & 16 deletions src/compiler/expression/function_call.rs
Original file line number Diff line number Diff line change
Expand Up @@ -501,30 +501,38 @@ impl<'a> Builder<'a> {
state: &mut TypeState,
) -> Result<(Option<Closure>, bool), FunctionCallError> {
// Check if we have a closure we need to compile.
if let Some((variables, input)) = self.closure.clone() {
if let Some((variables, input)) = &self.closure {
// TODO: This assumes the closure will run exactly once, which is incorrect.
// see: https://github.com/vectordotdev/vector/issues/13782

let block = closure_block.expect("closure must contain block");

let mut variables_types = vec![];
// At this point, we've compiled the block, so we can remove the
// closure variables from the compiler's local environment.
variables
.iter()
.for_each(|ident| match locals.remove_variable(ident) {
Some(details) => state.local.insert_variable(ident.clone(), details),
None => {
state.local.remove_variable(ident);
}
});
for ident in variables {
let variable_details = state
.local
.remove_variable(ident)
.expect("Closure variable must be present");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can avoid expects in this fucntion. See example: e2a0009.

Also, do you have any example where this change can break existing behavior? I wonder if we should mark this as a breaking PR.

Copy link
Contributor Author

@Zettroke Zettroke May 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is more of an invariant I tried to document by using expect.
The code in Builder::check_closure is called earlier and adds these variables to TypeState. So if they are absent it means there is a bug.
Maybe I should specify it in expect message?

As for examples of broken code, there is an example I found by scanning or VRL codebase.

elements = []

for_each(array!(.x)) -> |_index, value| {
    elements = push(elements, string!(value))
}

# log_state()

command_string = ""
for_each(elements) -> |_index, value| {
    command_string = command_string + " " + string(value)
}

Error:

error[E103]: unhandled fallible assignment
   ┌─ :14:26
   │
14 │         command_string = command_string + " " + string(value)
   │         ---------------- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ this expression is fallible because at least one argument's type cannot be verified to be valid
   │         │
   │         or change this to an infallible assignment:
   │         command_string, err = command_string + " " + string(value)
   │

The difference is in Kind of elements probed at log_state() point:
current-vrl:

Ident(elements): Details {
    type_def: TypeDef {
        fallibility: CannotFail,
        kind: "array",
        purity: Impure,
        returns: "any",
    },
    value: Some(
        Array(
            [],
    ),
}

fixed-vrl:

Ident(elements): Details {
    type_def: TypeDef {
        fallibility: CannotFail,
        kind: "[string or undefined]",
        purity: Impure,
        returns: "any",
    },
    value: None,
}

So the second for_each doesn't type check. I can probably make simpler example, but too tired right now 😄

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for taking the time to explain this. We should mark this as a breaking change and provide an example in the changelog. As always, there is no rush.

Copy link
Contributor Author

@Zettroke Zettroke May 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pront After some work of adapting our VRL code base to this fix I think I have a better example of broken code.

parsed_value = {}

for_each(object!(.value)) -> |key, value| {
    kv = parse_json!(value)

    parsed_value = set!(parsed_value, [key], kv)
}

result = ""

for_each(object!(parsed_value)) -> |_asd, value| {
  result = result + value.id + value.level + " " # <- Now this is fallible because type of `value` is unknown
}

.result = result

Error:

error[E103]: unhandled fallible assignment
   ┌─ :12:12
   │                                                                                                                                                                                                                                                                                                                
12 │   result = result + value.id + value.level + " " # <- Now this is fallible because type of `value` is unknown
   │   -------- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ this expression is fallible because at least one argument's type cannot be verified to be valid                                                                                                                                                               
   │   │                                                                                                                                                                                                                                                                                                            
   │   or change this to an infallible assignment:                                                                                                                                                                                                                                                                  
   │   result, err = result + value.id + value.level + " "                                                                                                                                                                                                                                                          
   │                                                                                                                                                                                                                                                                                                                
   = see documentation about error handling at https://errors.vrl.dev/#handling
   = see functions characteristics documentation at https://vrl.dev/expressions/#function-call-characteristics
   = learn more about error code 103 at https://errors.vrl.dev/103
   = see language documentation at https://vrl.dev
   = try your code in the VRL REPL, learn more at https://vrl.dev/examples

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also found a trick to keep added error handling compatible with older vrl versions, though it's very ugly 😅
parsed_value = parse_json!("{}")
Couldn't think of anything better :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I would be more comfortable if we included this in the next-next release. BTW I fixed .github/workflows/vector_integration_check.yaml on main so you can just undo those changes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are the before and after types for the value (inside the second for_each closure) here? And how does parsed_value = parse_json!("{}") line changes the type? Just trying to understand if this behavior breakage makes sense before asking all users to change their VRL programs.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Earlier example:

	"t1": {
		"bytes": true
	},
	"t2": {
		"never": true
	}

Copy link
Contributor Author

@Zettroke Zettroke Jun 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parse_json!("{}") makes it any, so for_each can't deduce the type of (key, value) pair. Therefore,
it's type is never which forces the error handling.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Optional) Can you please add a "before and after" in the changelog? It would be very helpful for the next breaking guide and VRL users.

variables_types.push(variable_details);

// If outer scope has this variable, restore its state
if let Some(details) = locals.remove_variable(ident) {
state.local.insert_variable(ident.clone(), details);
}
}

let (block_span, (block, block_type_def)) = block.take();
let (block_span, (block, block_type_def)) = closure_block
.ok_or(FunctionCallError::MissingClosure {
call_span: Span::default(), // TODO can we provide a better span?
example: None,
})?
.take();

let closure_fallible = block_type_def.is_fallible();

// Check the type definition of the resulting block.This needs to match
// whatever is configured by the closure input type.
let expected_kind = input.output.into_kind();
let expected_kind = input.clone().output.into_kind();
let found_kind = block_type_def
.kind()
.union(block_type_def.returns().clone());
Expand All @@ -537,7 +545,7 @@ impl<'a> Builder<'a> {
});
}

let fnclosure = Closure::new(variables, block, block_type_def);
let fnclosure = Closure::new(variables.clone(), variables_types, block, block_type_def);
self.list.set_closure(fnclosure.clone());

// closure = Some(fnclosure);
Expand Down Expand Up @@ -700,6 +708,27 @@ impl Expression for FunctionCall {

let mut expr_result = self.expr.apply_type_info(&mut state);

// Closure can change state of locals in our `state`, so we need to update it.
if let Some(closure) = &self.closure {
// To get correct `type_info()` from closure we need to add closure arguments into current state
let mut closure_state = state.clone();
for (ident, details) in closure
.variables
.iter()
.cloned()
.zip(closure.variables_types.iter().cloned())
{
closure_state.local.insert_variable(ident, details);
}
let mut closure_info = closure.block.type_info(&closure_state);
// No interaction with closure arguments can't affect parent state, so remove them before merge
for ident in &closure.variables {
closure_info.state.local.remove_variable(ident);
}

state = state.merge(closure_info.state);
}

// If one of the arguments only partially matches the function type
// definition, then we mark the entire function as fallible.
//
Expand Down Expand Up @@ -1224,9 +1253,10 @@ impl DiagnosticMessage for FunctionCallError {

#[cfg(test)]
mod tests {
use crate::compiler::{value::kind, FunctionExpression};

use super::*;
use crate::compiler::{value::kind, Compiler, FunctionExpression};
use crate::parser::parse;
use crate::stdlib::ForEach;

#[derive(Clone, Debug)]
struct Fn;
Expand Down Expand Up @@ -1411,4 +1441,27 @@ mod tests {

assert_eq!(Ok(expected), params);
}

#[test]
fn closure_type_state() {
let program = parse(
r#"
v = ""

for_each({}) -> |key, value| {
v = 0
}
"#,
)
.unwrap();

let fns = vec![Box::new(ForEach) as Box<dyn Function>];
let mut compiler = Compiler::new(&fns, CompileConfig::default());

let mut state = TypeState::default();
compiler.compile_root_exprs(program, &mut state);
let var = state.local.variable(&Ident::new("v")).unwrap();

assert_eq!(var.type_def.kind(), &Kind::bytes().or_integer());
}
}
25 changes: 16 additions & 9 deletions src/compiler/function.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
#![allow(clippy::missing_errors_doc)]
pub mod closure;

use super::{
expression::{container::Variant, Block, Container, Expr, Expression},
state::TypeState,
value::{kind, Kind},
CompileConfig, Span, TypeDef,
};
use crate::compiler::type_def::Details;
use crate::diagnostic::{DiagnosticMessage, Label, Note};
use crate::parser::ast::Ident;
use crate::path::OwnedTargetPath;
Expand All @@ -10,13 +17,6 @@ use std::{
fmt,
};

use super::{
expression::{container::Variant, Block, Container, Expr, Expression},
state::TypeState,
value::{kind, Kind},
CompileConfig, Span, TypeDef,
};

pub type Compiled = Result<Box<dyn Expression>, Box<dyn DiagnosticMessage>>;
pub type CompiledArgument =
Result<Option<Box<dyn std::any::Any + Send + Sync>>, Box<dyn DiagnosticMessage>>;
Expand Down Expand Up @@ -448,15 +448,22 @@ mod test_impls {
#[derive(Debug, Clone, PartialEq)]
pub struct Closure {
pub variables: Vec<Ident>,
pub variables_types: Vec<Details>,
pub block: Block,
pub block_type_def: TypeDef,
}

impl Closure {
#[must_use]
pub fn new<T: Into<Ident>>(variables: Vec<T>, block: Block, block_type_def: TypeDef) -> Self {
pub fn new(
variables: Vec<Ident>,
variables_types: Vec<Details>,
block: Block,
block_type_def: TypeDef,
) -> Self {
Self {
variables: variables.into_iter().map(Into::into).collect(),
variables,
variables_types,
block,
block_type_def,
}
Expand Down
2 changes: 1 addition & 1 deletion src/compiler/program.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ pub struct ProgramInfo {
/// Returns whether the compiled program can fail at runtime.
///
/// A program can only fail at runtime if the fallible-function-call
/// (`foo!()`) is used within the source.
/// (`foo!()`) is used within the source.vrl
pub fallible: bool,

/// Returns whether the compiled program can be aborted at runtime.
Expand Down
2 changes: 1 addition & 1 deletion src/compiler/type_def.rs
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ impl From<TypeDef> for Kind {
}

#[derive(Debug, Clone, PartialEq)]
pub(crate) struct Details {
pub struct Details {
pub(crate) type_def: TypeDef,
pub(crate) value: Option<Value>,
}
Expand Down
1 change: 1 addition & 0 deletions src/stdlib/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ impl FunctionExpression for FilterFn {
variables,
block,
block_type_def: _,
..
} = &self.closure;
let runner = closure::Runner::new(variables, |ctx| block.resolve(ctx));

Expand Down
1 change: 1 addition & 0 deletions src/stdlib/for_each.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ impl FunctionExpression for ForEachFn {
variables,
block,
block_type_def: _,
..
} = &self.closure;
let runner = closure::Runner::new(variables, |ctx| block.resolve(ctx));

Expand Down
1 change: 1 addition & 0 deletions src/stdlib/map_keys.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ impl FunctionExpression for MapKeysFn {
variables,
block,
block_type_def: _,
..
} = &self.closure;
let runner = closure::Runner::new(variables, |ctx| block.resolve(ctx));

Expand Down
1 change: 1 addition & 0 deletions src/stdlib/map_values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ impl FunctionExpression for MapValuesFn {
variables,
block,
block_type_def: _,
..
} = &self.closure;
let runner = closure::Runner::new(variables, |ctx| block.resolve(ctx));

Expand Down
Loading