Skip to content

Simplify multiple-of-element size access to arrays #8627

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions regression/cbmc/Array_operations4/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
int main()
{
char source[8];
int int_source[2];
int target[4];
int n;
if(n >= 0 && n < 3)
{
__CPROVER_array_replace(&target[n], source);
__CPROVER_array_replace(&target[n], int_source);
__CPROVER_assert(target[n] == int_source[0], "");
__CPROVER_assert(target[n + 1] == int_source[1], "");
}
}
13 changes: 13 additions & 0 deletions regression/cbmc/Array_operations4/program-only.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
CORE paths-lifo-expected-failure
main.c
--program-only
target!0@1#2 ==
target!0@1#3 ==
^EXIT=0$
^SIGNAL=0$
--
byte_update_
--
This test demonstrates that we can simplify byte_update expressions to, e.g.,
WITH expressions.
Disabled for paths-lifo mode, which does not support --program-only.
9 changes: 9 additions & 0 deletions regression/cbmc/Array_operations4/test.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
CORE
main.c

^VERIFICATION SUCCESSFUL$
^EXIT=0$
^SIGNAL=0$
--
^warning: ignoring
--
109 changes: 109 additions & 0 deletions regression/cbmc/havoc_slice/functional.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#include <stdint.h>
#include <stdlib.h>

/*

When translating to SMT, structs are represented by algebraic datatypes (ADTs)
and arrays of structs by arrays of ADTs.

When forming a pointer ranging over an array of struct using a nondeterministic
index, the pointer offset appears completely non-deterministic to CBMC, and
in-place updates made using assignments or __CPROVER_array_replace then expand
into large sequences of byte_updates ranging over the whole array.

If we could somehow track the fact that a pointer formed using arr[i] is still
aligned on array cell boundaries, i.e. is of the form i*sizeof(T) where T is the
type of the array, across intermediate variables and assignments then we would
be able to encode these cases in SMT more optimally:

T arr[N];
size_t i = nondetd_size_t();
if (i < N) {
T *ai = &arr[i];
T v = nondet_T();
*ai = v;
// here we have ai of the form &a[0] + i*sizeof(T) assigned with a value of
size sizeof(T)
// can be modeled as a functional array update at index i.
}

size_t k = nondet_size_t();
if (k < N) {
size_t S = N-k;
T nondet[S];
T *ak = &a[k];
__CPROVER_array_replace(ak, nondet);
// here we have ai of the form &a[0] + k*sizeof(T) updated in place with a
value of size k*sizeof(T)
// can be modeled as a functional slice update at index k with k elements.
}

At the moment these constructs blow up with --z3 and --bitwuzla
*/

// #define N 4 // use 8, 16, ... to see the blowup
#define K 4

typedef struct
{
int32_t coeffs[N];
} vec_N;

typedef struct
{
vec_N vec[K];
} vec_K_N;

unsigned int __VERIFIER_nondet_unsigned_int();
vec_N __VERIFIER_nondet_vec_N();

int main(void)
{
vec_K_N *v = malloc(sizeof(vec_K_N));
__CPROVER_assume(v);

unsigned int i = __VERIFIER_nondet_unsigned_int();

// models a nondet loop step from an arbitrary state
if(i < K)
{
#ifdef ASSIGN_DIRECT
// nondet assignment without indirection through a
// simplifies to a functional update
v->vec[i] = __VERIFIER_nondet_vec_N();
#endif

// simulates how symex models argument passing for a function call
vec_N *__arg = &v->vec[i];
vec_N *a = __arg;

#ifdef ASSIGN
// nondet assignment with indirection through a
// currently does NOT simplifies to a functional update but ultimately
// should
*a = __VERIFIER_nondet_vec_N();
#endif

#ifdef SLICE_BYTES
// Modeled as a byte slice operation without indirection
// does NOT simplify to a functional array update due to lack of pattern
// matching on the pointer offset expression.
// We could realize the pointer offset is of the form i*16 and that the
// new value is of size 16 too but we currently don't.
char nondet[sizeof(vec_N)];
__CPROVER_array_replace((char *)a, nondet);
#endif

#ifdef SLICE_TYPED
// Modeled as a typed slice operation without indirection.
// Does NOT simplify to a functional array update due to lack of pattern
// matching on the pointer offset expression and types.
// We could realize the pointer offset is of the form i*16 and that the
// new value is of size 16 too but we currently don't.
vec_N nondet[1];
__CPROVER_array_replace(a, nondet);
#endif
__CPROVER_assert(a->coeffs[0] > 0, "expected to fail");
}
return 0;
}
8 changes: 8 additions & 0 deletions regression/cbmc/havoc_slice/functional.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CORE
functional.c
-DN=4 -DASSIGN_DIRECT
^VERIFICATION FAILED$
^EXIT=10$
^SIGNAL=0$
--
^warning: ignoring
10 changes: 10 additions & 0 deletions regression/cbmc/havoc_slice/functional_assign.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CORE
functional.c
-DN=4 -DASSIGN --program-only
^EXIT=0$
^SIGNAL=0$
--
byte_update
--
We want these tests not to produce any byte_update expressions, but instead want
updates at specific array indices.
10 changes: 10 additions & 0 deletions regression/cbmc/havoc_slice/functional_assign_direct.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CORE
functional.c
-DN=4 -DASSIGN_DIRECT --program-only
^EXIT=0$
^SIGNAL=0$
--
byte_update
--
We want these tests not to produce any byte_update expressions, but instead want
updates at specific array indices.
10 changes: 10 additions & 0 deletions regression/cbmc/havoc_slice/functional_slice_bytes.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CORE
functional.c
-DN=4 -DSLICE_BYTES --program-only
^EXIT=0$
^SIGNAL=0$
--
byte_update
--
We want these tests not to produce any byte_update expressions, but instead want
updates at specific array indices.
10 changes: 10 additions & 0 deletions regression/cbmc/havoc_slice/functional_slice_typed.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CORE
functional.c
-DN=4 -DSLICE_TYPED --program-only
^EXIT=0$
^SIGNAL=0$
--
byte_update
--
We want these tests not to produce any byte_update expressions, but instead want
updates at specific array indices.
1 change: 1 addition & 0 deletions regression/cbmc/trace-values/unbounded_array.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ int main(int argc, char *argv[])
unsigned long size;
__CPROVER_assume(size < 100 && size > 10);
int *array = malloc(size * sizeof(int));
__CPROVER_assume(array);
array[size - 1] = 42;
int fixed_array[] = {1, 2};
__CPROVER_array_replace(array, fixed_array);
Expand Down
5 changes: 5 additions & 0 deletions regression/validate-trace-xml-schema/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,14 @@
['show_properties1', 'test.desc'],
# program-only instead of trace
['vla1', 'program-only.desc'],
['Array_operations4', 'program-only.desc'],
['Pointer_Arithmetic19', 'test.desc'],
['Quantifiers-simplify', 'simplify_not_forall.desc'],
['array-cell-sensitivity15', 'test.desc'],
['havoc_slice', 'functional_assign.desc'],
['havoc_slice', 'functional_assign_direct.desc'],
['havoc_slice', 'functional_slice_bytes.desc'],
['havoc_slice', 'functional_slice_typed.desc'],
['saturating_arithmetric', 'output-formula.desc'],
# these test for invalid command line handling
['bad_option', 'test_multiple.desc'],
Expand Down
138 changes: 137 additions & 1 deletion src/util/pointer_offset_size.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,143 @@ std::optional<exprt> get_subexpression_at_offset(
const auto offset_bytes = numeric_cast<mp_integer>(offset);

if(!offset_bytes.has_value())
return {};
{
// offset is not a constant; try to see whether it is a multiple of a
// constant, or a sum that involves a multiple of a constant
if(auto array_type = type_try_dynamic_cast<array_typet>(expr.type()))
{
const auto target_size_bits = pointer_offset_bits(target_type, ns);
const auto elem_size_bits =
pointer_offset_bits(array_type->element_type(), ns);

// no arrays of zero-, or unknown-sized elements, or ones where elements
// have a bit-width that isn't a multiple of bytes
if(
!target_size_bits.has_value() || !elem_size_bits.has_value() ||
*elem_size_bits <= 0 ||
*elem_size_bits % config.ansi_c.char_width != 0 ||
*target_size_bits != *elem_size_bits)
{
return {};
}

// if we have an offset C + x (where C is a constant) we can try to
// recurse by first looking at the member at offset C
if(
offset.id() == ID_plus && offset.operands().size() == 2 &&
(to_multi_ary_expr(offset).op0().is_constant() ||
to_multi_ary_expr(offset).op1().is_constant()))
{
const plus_exprt &offset_plus = to_plus_expr(offset);
const auto &const_factor = numeric_cast_v<mp_integer>(to_constant_expr(
offset_plus.op0().is_constant() ? offset_plus.op0()
: offset_plus.op1()));
const exprt &other_factor = offset_plus.op0().is_constant()
? offset_plus.op1()
: offset_plus.op0();

auto expr_at_offset_C =
get_subexpression_at_offset(expr, const_factor, target_type, ns);

if(
expr_at_offset_C.has_value() && expr_at_offset_C->id() == ID_index &&
to_index_expr(*expr_at_offset_C).index().is_zero())
{
return get_subexpression_at_offset(
to_index_expr(*expr_at_offset_C).array(),
other_factor,
target_type,
ns);
}
}

// give up if the offset expression isn't of the form K * i or i * K
// (where K is a constant)
if(
offset.id() != ID_mult || offset.operands().size() != 2 ||
(!to_multi_ary_expr(offset).op0().is_constant() &&
!to_multi_ary_expr(offset).op1().is_constant()))
{
return {};
}

const mult_exprt &offset_mult = to_mult_expr(offset);
const auto &const_factor = numeric_cast_v<mp_integer>(to_constant_expr(
offset_mult.op0().is_constant() ? offset_mult.op0()
: offset_mult.op1()));
const exprt &other_factor =
offset_mult.op0().is_constant() ? offset_mult.op1() : offset_mult.op0();

if(const_factor % (*elem_size_bits / config.ansi_c.char_width) != 0)
return {};

exprt index = mult_exprt{
other_factor,
from_integer(
const_factor / (*elem_size_bits / config.ansi_c.char_width),
other_factor.type())};

return get_subexpression_at_offset(
index_exprt{
expr,
typecast_exprt::conditional_cast(index, array_type->index_type())},
0,
target_type,
ns);
}
else if(
auto struct_tag_type =
type_try_dynamic_cast<struct_tag_typet>(expr.type()))
{
// If the offset expression is of the form K * i or i * K (where K is a
// constant) and the first component of the struct is an array we will
// recurse on that member.
const auto &components = ns.follow_tag(*struct_tag_type).components();
if(
!components.empty() &&
can_cast_type<array_typet>(components.front().type()) &&
offset.id() == ID_mult && offset.operands().size() == 2 &&
(to_multi_ary_expr(offset).op0().is_constant() ||
to_multi_ary_expr(offset).op1().is_constant()))
{
return get_subexpression_at_offset(
member_exprt{expr, components.front()}, offset, target_type, ns);
}
// if we have an offset C + x (where C is a constant) we can try to
// recurse by first looking at the member at offset C
else if(
offset.id() == ID_plus && offset.operands().size() == 2 &&
(to_multi_ary_expr(offset).op0().is_constant() ||
to_multi_ary_expr(offset).op1().is_constant()))
{
const plus_exprt &offset_plus = to_plus_expr(offset);
const auto &const_factor = numeric_cast_v<mp_integer>(to_constant_expr(
offset_plus.op0().is_constant() ? offset_plus.op0()
: offset_plus.op1()));
const exprt &other_factor = offset_plus.op0().is_constant()
? offset_plus.op1()
: offset_plus.op0();

auto expr_at_offset_C =
get_subexpression_at_offset(expr, const_factor, target_type, ns);

if(
expr_at_offset_C.has_value() && expr_at_offset_C->id() == ID_index &&
to_index_expr(*expr_at_offset_C).index().is_zero())
{
return get_subexpression_at_offset(
to_index_expr(*expr_at_offset_C).array(),
other_factor,
target_type,
ns);
}
}

return {};
}
else
return {};
}
else
return get_subexpression_at_offset(expr, *offset_bytes, target_type, ns);
}
Loading
Loading