diff --git a/SeQuant/core/algorithm.hpp b/SeQuant/core/algorithm.hpp
index 6c2bd5ba9e..8f9d050e6a 100644
--- a/SeQuant/core/algorithm.hpp
+++ b/SeQuant/core/algorithm.hpp
@@ -138,34 +138,6 @@ bool next_permutation_parity(int& parity, BidirIt first, BidirIt last,
   }
 }
 
-///
-/// Given a size of a container @c n, returns a range of bitsets. The bitsets
-/// represent the sieve that can be used to construct the subsequences of the
-/// size-n container.
-///
-inline auto subset_indices(size_t n) {
-  using ranges::views::iota;
-  using ranges::views::transform;
-  return iota(size_t{1}, size_t(1 << n)) |
-         transform([n](auto i) { return boost::dynamic_bitset<>(n, i); });
-}
-
-///
-/// Given an iterable @c it and a bitset @c bs, select the elements in the
-/// iterable that correspond to an 'on' bit.
-///
-template <typename Iterable>
-auto subsequence(Iterable const& it, boost::dynamic_bitset<> const& bs) {
-  using ranges::views::filter;
-  using ranges::views::iota;
-  using ranges::views::transform;
-  using ranges::views::zip;
-
-  auto bits = iota(size_t{0}) | transform([&bs](auto i) { return bs.at(i); });
-  return zip(it, bits) | filter([](auto&& kv) { return std::get<1>(kv); }) |
-         transform([](auto&& kv) { return std::get<0>(kv); });
-}
-
 ///
 /// All elements in the vector belong to the integral range [-1,N)
 /// where N is the length of the [Expr] (ie. the iterable of expressions)
@@ -191,6 +163,120 @@ auto inits(Rng const& rng) {
          });
 }
 
+namespace bits {
+
+///
+/// @brief Generates all ordered bipartitions of the set bits in the mask.
+///
+/// For a given mask representing a set S, this function returns a view of pairs
+/// (A, B) such that A ∪ B = S, A ∩ B = {}, and A, B are subsets of S. The
+/// iteration order results in B descending and A ascending.
+///
+/// @tparam T The unsigned integer type of the mask.
+/// @param mask The bitmask representing the set.
+/// @return A view of pairs of disjoint submasks.
+///
+template <std::unsigned_integral T>
+constexpr auto bipartitions_ordered(T mask) {
+  // number of possible bipartitions
+  size_t const nparts = (1 << std::popcount(mask));
+  return std::views::iota(size_t{0}, nparts) |
+         std::views::transform([mask, p = mask](auto) mutable {
+           auto p_ = p;
+           p = (p - 1) & mask;
+           return std::pair<T, T>{p_ ^ mask, p_};
+         });
+}
+
+///
+/// @brief Generates unordered bipartitions of the mask.
+///
+/// This function returns a subset of the ordered bipartitions such that for
+/// every pair {A, B}, the pair {B, A} is excluded (unless A == B, which is
+/// handled naturally). Specifically, it takes the first half of the ordered
+/// bipartitions.
+///
+/// @param mask The bitmask representing the set.
+/// @return A view of unordered bipartitions.
+///
+constexpr auto bipartitions_unordered(std::unsigned_integral auto mask) {
+  auto bps = bipartitions_ordered(mask);
+  size_t const nparts = std::ranges::distance(bps) / 2;
+  return bps | std::views::take(nparts);
+}
+
+///
+/// @brief Alias for bipartitions_unordered.
+///
+/// @param mask The bitmask representing the set.
+/// @return A view of unordered bipartitions.
+///
+constexpr auto bipartitions(std::unsigned_integral auto mask) {
+  return bipartitions_unordered(mask);
+}
+
+///
+/// @brief Generates all subsets of the mask in ascending numerical order.
+///
+/// @param mask The bitmask representing the set.
+/// @return A view of submasks sorted ascending.
+///
+constexpr auto subsets_ascending(std::unsigned_integral auto mask) {
+  return bipartitions_ordered(mask) | std::views::elements<0>;
+}
+
+///
+/// @brief Generates all subsets of the mask in descending numerical order.
+///
+/// @param mask The bitmask representing the set.
+/// @return A view of submasks sorted descending.
+///
+constexpr auto subsets_descending(std::unsigned_integral auto mask) {
+  return bipartitions_ordered(mask) | std::views::elements<1>;
+}
+
+///
+/// @brief Generates all subsets of the mask in ascending numerical order.
+///
+/// @param mask The bitmask representing the set.
+/// @return A view of submasks.
+///
+constexpr auto subsets(std::unsigned_integral auto mask) {
+  return subsets_ascending(mask);
+}
+
+///
+/// @brief Generates indices of set bits in the mask.
+/// @param mask The bitmask representing the set.
+/// @return A view of indices of set bits.
+///
+constexpr auto on_bits_index(std::unsigned_integral auto mask) {
+  size_t const nbits = std::popcount(mask);
+  return std::views::iota(size_t{0}, nbits) |
+         std::views::transform([m = mask](auto) mutable {
+           size_t i = std::countr_zero(m);
+           m &= (m - 1);
+           return i;
+         });
+}
+
+///
+/// @brief Creates a view adaptor that selects elements from the input range
+/// based on indices.
+/// This function returns a closure object that, when applied to a range of
+/// indices, maps those indices to elements in the source range `rng`.
+/// @param rng The source range from which elements are selected.
+/// @return A view adaptor that transforms indices into elements from `rng`.
+///
+constexpr auto sieve(std::ranges::viewable_range auto&& rng) {
+  auto elems = std::views::all(std::forward<decltype(rng)>(rng));
+  return std::views::transform([elems](auto i) {
+    return *std::ranges::next(std::ranges::begin(elems), i);
+  });
+}
+
+}  // namespace bits
+
 }  // namespace sequant
 
 #endif  // SEQUANT_ALGORITHM_HPP
diff --git a/SeQuant/core/eval/eval_expr.cpp b/SeQuant/core/eval/eval_expr.cpp
index d8483765f1..f1076a7298 100644
--- a/SeQuant/core/eval/eval_expr.cpp
+++ b/SeQuant/core/eval/eval_expr.cpp
@@ -30,6 +30,11 @@
 
 namespace sequant {
 
+using EvalExprNode = FullBinaryNode<EvalExpr>;
+
+using IndexSet = container::set<Index, Index::FullLabelCompare>;
+using IndexCount = container::map<Index, size_t, Index::FullLabelCompare>;
+
 namespace {
 
 size_t hash_terminal_tensor(Tensor const&) noexcept;
@@ -300,7 +305,39 @@ struct ExprWithHash {
   size_t hash;
 };
 
-using EvalExprNode = FullBinaryNode<EvalExpr>;
+void index_count(IndexCount& counts, ExprPtr const& expr) {
+  if (!expr) return;
+  if (expr->is<Tensor>()) {
+    for (auto&& i : expr->as<Tensor>().const_indices()) {
+      auto&& [it, yn] = counts.emplace(i, 1);
+      if (!yn) it->second++;
+    }
+  } else if (expr->is<Sum>() && !expr->empty())
+    index_count(counts, expr->front());
+  else if (expr->is<Product>())
+    for (auto&& fac : *expr) index_count(counts, fac);
+}
+
+IndexCount index_count(meta::range_of<ExprPtr> auto const& facs) {
+  IndexCount result;
+  for (auto&& f : facs) index_count(result, f);
+  return result;
+}
+
+IndexSet target_indices(meta::range_of<ExprPtr> auto const& facs,
+                        IndexSet const& tidxs) {
+  IndexSet result;
+  for (auto&& [k, v] : index_count(facs))
+    if (v == 1 || tidxs.contains(k)) result.emplace(k);
+  return result;
+}
+
+auto imed_target_indices(Product const& prod, IndexSet const& tidxs) {
+  return inits(prod.factors()) |
+         ranges::views::transform(
+             [&tidxs](auto&& facs) { return target_indices(facs, tidxs); }) |
+         ranges::to_vector;
+}
 
 ///
 /// \brief Collect tensors appearing as a factor at the leaf node of a product
diff --git a/SeQuant/core/meta.hpp b/SeQuant/core/meta.hpp
index 23b20f76bb..77ce7acfda 100644
--- a/SeQuant/core/meta.hpp
+++ b/SeQuant/core/meta.hpp
@@ -9,6 +9,7 @@
 #include <memory>
 #include <range/v3/range/access.hpp>
 #include <range/v3/range/traits.hpp>
+#include <ranges>
 #include <type_traits>
 
 namespace sequant {
@@ -506,6 +507,52 @@ struct std_array_size<std::array<T, N>>
 template <typename T>
 constexpr inline std::size_t std_array_size_v = std_array_size<T>::value;
 
+///
+/// True if @p T is a range of rank @p Rank whose value type is convertible to
+/// @p V.
+///
+/// A rank of 0 means @p T is directly convertible to @p V.
+/// A rank of 1 means @p T is a range of values convertible to @p V.
+/// A rank of N means @p T is a range of ranges ... (N times) of values
+/// convertible to @p V.
+///
+template <typename, typename, size_t>
+constexpr bool range_of_rank{};
+
+template <typename T, typename V>
+constexpr bool range_of_rank<T, V, 0> = std::is_convertible_v<T, V>;
+
+template <std::ranges::range Rng, typename V, size_t Rank>
+constexpr bool range_of_rank<Rng, V, Rank> =
+    range_of_rank<std::ranges::range_value_t<Rng>, V, (Rank - 1)>;
+
+///
+/// True if @tparam Rng is a range whose value type is convertible
+/// to @tparam V .
+///
+/// A rank of 0 means @tparam Rng is directly convertible to @tparam V.
+/// A rank of 1 means @tparam Rng is a range of values convertible to @tparam V.
+/// A rank of N means @tparam Rng is a range of ranges ... (N times) of values
+/// convertible to @tparam V.
+///
+template <typename Rng, typename V, size_t Rank = 1>
+concept range_of = range_of_rank<Rng, V, Rank>;
+
+///
+/// True if @tparam Rng is a range whose value type is convertible
+/// to @tparam V and the size (std::ranges::distance) is known in constant time.
+///
+/// A rank of 0 means @tparam Rng is directly convertible to @tparam V.
+/// A rank of 1 means @tparam Rng is a range of values convertible to @tparam V.
+/// A rank of N means @tparam Rng is a range of ranges ... (N times) of values
+/// convertible to @tparam V.
+///
+/// @see https://en.cppreference.com/w/cpp/ranges/sized_range.html
+///
+template <typename Rng, typename V, size_t Rank = 1>
+concept sized_range_of =
+    std::ranges::sized_range<Rng> && range_of<Rng, V, Rank>;
+
 }  // namespace meta
 }  // namespace sequant
 
diff --git a/SeQuant/core/optimize.hpp b/SeQuant/core/optimize.hpp
index 42879fb04b..5820ebd54c 100644
--- a/SeQuant/core/optimize.hpp
+++ b/SeQuant/core/optimize.hpp
@@ -21,20 +21,8 @@
 
 #include <bit>
 
-namespace {
-
-///
-/// \tparam T integral type
-/// \return true if @c x has a single bit on in its bit representation.
-///
-template <typename T>
-bool has_single_bit(T x) noexcept {
-  return std::has_single_bit(x);
-}
-
-}  // namespace
-
 namespace sequant {
+
 /// Optimize an expression assuming the number of virtual orbitals
 /// greater than the number of occupied orbitals.
 
@@ -51,49 +39,21 @@ namespace opt {
 /// \param idxs Index objects.
 /// \return flops count
 ///
-template <typename IdxToSz, typename Idxs,
-          std::enable_if_t<std::is_invocable_r_v<size_t, IdxToSz, const Index&>,
-                           bool> = true>
-double ops_count(IdxToSz const& idxsz, Idxs const& idxs) {
-  auto oixs = tot_indices(idxs);
-  double ops = 1.0;
-  for (auto&& idx : ranges::views::concat(oixs.outer, oixs.inner))
-    ops *= std::invoke(idxsz, idx);
-  // ops == 1.0 implies zero flops.
-  return ops == 1.0 ? 0 : ops;
-}
-
-namespace {
-
-///
-/// Non-trivial, unique bipartitions of the bits in an unsigned integral.
-///  eg.
-///  decimal (binary) => [{decimal (binary)}...]
-///  -------------------------------------------
-///         3 (11) => [{1 (01), 2 (10)}]
-///      11 (1011) => [{1  (0001), 10 (1010)},
-///                     {2 (0010), 9 (1001)},
-///                     {3 (0011), 8 (1000)}]
-///      0 (0)     => [] (empty: no partitions possible)
-///      2 (10)    => [] (empty)
-///      4 (100)   => [] (empty)
-///
-/// \tparam I Unsigned integral type.
-/// \param n Represents a bit set.
-/// \param func func is function that takes two arguments of type I.
-///
-template <
-    typename I, typename F,
-    typename = std::enable_if_t<std::is_integral_v<I> && std::is_unsigned_v<I>>,
-    typename = std::enable_if_t<std::is_invocable_v<F, I, I>>>
-void biparts(I n, F const& func) {
-  if (n == 0) return;
-  I const h = static_cast<I>(std::floor(n / 2.0));
-  for (I n_ = 1; n_ <= h; ++n_) {
-    auto const l = n & n_;
-    auto const r = (n - n_) & n;
-    if ((l | r) == n) func(l, r);
-  }
+template <typename F>
+concept has_index_extent = std::is_invocable_r_v<size_t, F, Index const&>;
+
+auto constexpr flops_counter(has_index_extent auto&& ixex) {
+  return [ixex](meta::range_of<Index> auto const& lhs,
+                meta::range_of<Index> auto const& rhs,
+                meta::range_of<Index> auto const& result) {
+    using ranges::views::concat;
+    auto tot_idxs = tot_indices<container::set<Index, Index::LabelCompare>>(
+        concat(lhs, rhs, result));
+    double ops = 1.;
+    for (auto&& idx : concat(tot_idxs.outer, tot_idxs.inner)) ops *= ixex(idx);
+    // ops == 1 implies zero flops
+    return ops == 1. ? 0. : ops;
+  };
 }
 
 ///
@@ -111,145 +71,86 @@ struct OptRes {
   EvalSequence sequence;
 };
 
-///
-/// Returns a vector of Index objects that are common in @c idxs1 and @c idxs2
-/// that are sorted using Index:LabelCompare{}.
-///
-/// @note I1 and I2 containers are assumed to be sorted by using
-/// Index::LabelCompare{};
-///
-template <typename I1, typename I2, typename Comp = std::less<Index>>
-container::svector<Index> common_indices(I1 const& idxs1, I2 const& idxs2) {
-  using std::back_inserter;
-  using std::begin;
-  using std::end;
-  using std::set_intersection;
-
-  SEQUANT_ASSERT(std::is_sorted(begin(idxs1), end(idxs1), Comp{}));
-  SEQUANT_ASSERT(std::is_sorted(begin(idxs2), end(idxs2), Comp{}));
-
-  container::svector<Index> result;
-
-  set_intersection(begin(idxs1), end(idxs1), begin(idxs2), end(idxs2),
-                   back_inserter(result), Comp{});
-  return result;
-}
-
-///
-/// Returns a vector of Index objects that are common in @c idxs1 and @c idxs2
-/// that are sorted using Index::LabelCompare{}.
-///
-/// @note I1 and I2 containers are assumed to be sorted by using
-/// Index::LabelCompare{};
-///
-template <typename I1, typename I2, typename Comp = std::less<Index>>
-container::svector<Index> diff_indices(I1 const& idxs1, I2 const& idxs2) {
-  using std::back_inserter;
-  using std::begin;
-  using std::end;
-  using std::set_symmetric_difference;
-
-  SEQUANT_ASSERT(std::is_sorted(begin(idxs1), end(idxs1), Comp{}));
-  SEQUANT_ASSERT(std::is_sorted(begin(idxs2), end(idxs2), Comp{}));
-
-  container::svector<Index> result;
-
-  set_symmetric_difference(begin(idxs1), end(idxs1), begin(idxs2), end(idxs2),
-                           back_inserter(result), Comp{});
-  return result;
-}
-
-///
-/// \tparam IdxToSz
-/// \param network A TensorNetwork object.
-/// \param idxsz An invocable on Index, that maps Index to its dimension.
-/// \return Optimal evaluation sequence that minimizes flops. If there are
-///         equivalent optimal sequences then the result is the one that keeps
-///         the order of tensors in the network as original as possible.
-template <typename IdxToSz,
-          std::enable_if_t<std::is_invocable_r_v<size_t, IdxToSz, const Index&>,
-                           bool> = true>
-EvalSequence single_term_opt(TensorNetwork const& network,
-                             IdxToSz const& idxsz) {
+template <typename CostFn>
+  requires requires(CostFn&& fn, decltype(OptRes::indices) const& ixs) {
+    { std::forward<CostFn>(fn)(ixs, ixs, ixs) } -> std::floating_point;
+  }
+EvalSequence single_term_opt_impl(TensorNetwork const& network,
+                                  meta::range_of<Index> auto const& tidxs,
+                                  CostFn&& cost_fn) {
   using ranges::views::concat;
-  using IndexContainer = container::svector<Index>;
-  // number of terms
+  using ranges::views::indirect;
+  using ranges::views::transform;
+  using IndexContainer = decltype(OptRes::indices);
   auto const nt = network.tensors().size();
   if (nt == 1) return EvalSequence{0};
   if (nt == 2) return EvalSequence{0, 1, -1};
-  auto nth_tensor_indices = container::svector<IndexContainer>{};
-  nth_tensor_indices.reserve(nt);
 
-  for (std::size_t i = 0; i < nt; ++i) {
-    auto const& tnsr = *network.tensors().at(i);
-
-    nth_tensor_indices.emplace_back();
-    auto& ixs = nth_tensor_indices.back();
-    for (auto&& j : slots(tnsr)) ixs.emplace_back(j);
-
-    ranges::sort(ixs, std::less<Index>{});
+  container::vector<OptRes> results((1 << nt));
+
+  // initialize the intermediate results
+  {
+    auto tensor_indices = network.tensors()  //
+                          | indirect         //
+                          | transform(slots);
+    auto imed_indices = subset_target_indices(tensor_indices, tidxs);
+    SEQUANT_ASSERT(ranges::distance(imed_indices) == ranges::distance(results));
+    for (size_t i = 0; i < results.size(); ++i) {
+      results[i].indices =
+          imed_indices[i] | ranges::views::move | ranges::to<IndexContainer>;
+      results[i].flops =
+          std::popcount(i) > 1
+              ? std::numeric_limits<decltype(OptRes::flops)>::max()
+              : 0;
+      if (std::popcount(i) == 1)
+        results[i].sequence.emplace_back(std::countr_zero(i));
+      // else results[i].sequence is left uninitialized
+    }
   }
 
-  container::svector<OptRes> results((1 << nt), OptRes{{}, 0, {}});
-
-  // power_pos is used, and incremented, only when the
-  // result[1<<0]
-  // result[1<<1]
-  // result[1<<2]
-  // and so on are set
-  size_t power_pos = 0;
-  for (size_t n = 1; n < (1ul << nt); ++n) {
-    double curr_cost = std::numeric_limits<double>::max();
+  // find the optimal evaluation sequence
+  for (size_t n = 0; n < results.size(); ++n) {
+    if (std::popcount(n) < 2) continue;
     std::pair<size_t, size_t> curr_parts{0, 0};
-    container::svector<Index> curr_indices{};
-
-    // function to find the optimal partition
-    auto scan_parts = [&curr_cost,                              //
-                       &curr_parts,                             //
-                       &curr_indices,                           //
-                           & results = std::as_const(results),  //
-                       &idxsz](                                 //
-                          size_t lpart, size_t rpart) {
-      auto commons =
-          common_indices(results[lpart].indices, results[rpart].indices);
-      auto diffs = diff_indices(results[lpart].indices, results[rpart].indices);
-      auto new_cost = ops_count(idxsz,                   //
-                                concat(commons, diffs))  //
-                      + results[lpart].flops             //
-                      + results[rpart].flops;
+    for (auto& curr_cost = results[n].flops;
+         auto&& [lp, rp] : bits::bipartitions(n)) {
+      // do nothing with the trivial bipartition
+      // i.e. one subset is the empty set and the other full
+      if (lp == 0 || rp == 0) continue;
+      auto new_cost = std::forward<CostFn>(cost_fn)(results[lp].indices,  //
+                                                    results[rp].indices,  //
+                                                    results[n].indices)   //
+                      + results[lp].flops + results[rp].flops;
       if (new_cost <= curr_cost) {
         curr_cost = new_cost;
-        curr_parts = decltype(curr_parts){lpart, rpart};
-        curr_indices = std::move(diffs);
+        curr_parts = decltype(curr_parts){lp, rp};
       }
-    };
-
-    biparts(n, scan_parts);
-
-    auto& curr_result = results[n];
-    if (has_single_bit(n)) {
-      SEQUANT_ASSERT(curr_indices.empty());
-      // evaluation of a single atomic tensor
-      curr_result.flops = 0;
-      curr_result.indices = std::move(nth_tensor_indices[power_pos]);
-      curr_result.sequence = EvalSequence{static_cast<int>(power_pos++)};
-    } else {
-      curr_result.flops = curr_cost;
-      curr_result.indices = std::move(curr_indices);
-      auto const& first = results[curr_parts.first].sequence;
-      auto const& second = results[curr_parts.second].sequence;
-
-      curr_result.sequence = (first[0] < second[0] ? concat(first, second)
-                                                   : concat(second, first)) |
-                             ranges::to<EvalSequence>;
-      curr_result.sequence.push_back(-1);
     }
+    auto const& lseq = results[curr_parts.first].sequence;
+    auto const& rseq = results[curr_parts.second].sequence;
+    results[n].sequence =
+        (lseq[0] < rseq[0] ? concat(lseq, rseq) : concat(rseq, lseq)) |
+        ranges::to<EvalSequence>;
+    results[n].sequence.push_back(-1);
   }
 
-  return results[(1 << nt) - 1].sequence;
+  return results.back().sequence;
 }
 
-}  // namespace
+///
+/// \tparam IdxToSz
+/// \param network A TensorNetwork object.
+/// \param idxsz An invocable on Index, that maps Index to its dimension.
+/// \return Optimal evaluation sequence that minimizes flops. If there are
+///         equivalent optimal sequences then the result is the one that keeps
+///         the order of tensors in the network as original as possible.
+///
+template <has_index_extent IdxToSz>
+EvalSequence single_term_opt(TensorNetwork const& network, IdxToSz&& idxsz) {
+  auto cost_fn = flops_counter(std::forward<IdxToSz>(idxsz));
+  decltype(OptRes::indices) tidxs{};
+  return single_term_opt_impl(network, tidxs, cost_fn);
+}
 
 ///
 /// Omit the first factor from the top level product from given expression.
@@ -271,9 +172,8 @@ void pull_scalar(sequant::ExprPtr expr) noexcept;
 ///
 /// @note @c prod is assumed to consist of only Tensor expressions
 ///
-template <typename IdxToSz,
-          std::enable_if_t<std::is_invocable_v<IdxToSz, Index>, bool> = true>
-ExprPtr single_term_opt(Product const& prod, IdxToSz const& idxsz) {
+template <has_index_extent IdxToSz>
+ExprPtr single_term_opt(Product const& prod, IdxToSz&& idxsz) {
   using ranges::views::filter;
   using ranges::views::reverse;
 
@@ -282,7 +182,8 @@ ExprPtr single_term_opt(Product const& prod, IdxToSz const& idxsz) {
                                prod.factors().end(), Product::Flatten::No});
   auto const tensors =
       prod | filter(&ExprPtr::template is<Tensor>) | ranges::to_vector;
-  auto seq = single_term_opt(TensorNetwork{tensors}, idxsz);
+  auto seq =
+      single_term_opt(TensorNetwork{tensors}, std::forward<IdxToSz>(idxsz));
   auto result = container::svector<ExprPtr>{};
   for (auto i : seq)
     if (i == -1) {
@@ -330,9 +231,7 @@ Sum reorder(Sum const& sum);
 /// \param reorder_sum If true, the summands are reordered so that terms with
 ///                    common sub-expressions appear closer to each other.
 /// \return Optimized expression for lower evaluation cost.
-template <typename IdxToSize, typename = std::enable_if_t<std::is_invocable_r_v<
-                                  size_t, IdxToSize, const Index&>>>
-ExprPtr optimize(ExprPtr const& expr, IdxToSize const& idx2size,
+ExprPtr optimize(ExprPtr const& expr, has_index_extent auto const& idx2size,
                  bool reorder_sum) {
   using ranges::views::transform;
   if (expr->is<Product>()) {
diff --git a/SeQuant/core/utility/indices.hpp b/SeQuant/core/utility/indices.hpp
index 248f159174..122d688b4c 100644
--- a/SeQuant/core/utility/indices.hpp
+++ b/SeQuant/core/utility/indices.hpp
@@ -364,20 +364,31 @@ TensorOfTensorIndices<Container> tot_indices(Rng const& idxs) {
   using ranges::views::join;
   using ranges::views::transform;
 
-  // Container indep_idxs;
+  constexpr auto emplace_into = [](Container& target, auto&& value) {
+    if constexpr (requires { target.emplace_back(value); }) {
+      // for sequence containers like vectors, lists
+      target.emplace_back(value);
+    } else if constexpr (requires { target.emplace(value); }) {
+      // for associative containers like set
+      target.emplace(value);
+    } else {
+      static_assert(false,
+                    "Container does not support emplace_back or emplace");
+    }
+  };
 
   TensorOfTensorIndices<Container> result;
   auto& outer = result.outer;
 
   for (auto&& i : idxs | transform(&Index::proto_indices) | join)
-    if (!ranges::contains(outer, i)) outer.emplace_back(i);
+    if (!ranges::contains(outer, i)) emplace_into(outer, i);
 
   for (auto&& i : idxs | filter(not_fn(&Index::has_proto_indices)))
-    if (!ranges::contains(outer, i)) outer.emplace_back(i);
+    if (!ranges::contains(outer, i)) emplace_into(outer, i);
 
   auto& inner = result.inner;
   for (auto&& i : idxs | filter(&Index::has_proto_indices))
-    inner.emplace_back(i);
+    emplace_into(inner, i);
 
   return result;
 }
@@ -400,9 +411,7 @@ inline bool ordinal_compare(Index const& idx1, Index const& idx2) {
 ///   eg. [a_1^{i_1,i_2},a_2^{i_2,i_3}] -> "a_1i_1i_2,a_2i_2i_3"
 ///   eg. [i_1, i_2] -> "i_1,i_2"
 ///
-template <typename Rng, typename Idx = ranges::range_value_t<Rng>,
-          typename = std::enable_if_t<std::is_same_v<Idx, Index>>>
-std::string csv_labels(Rng&& idxs) {
+std::string csv_labels(meta::range_of<Index> auto&& idxs) {
   using ranges::views::concat;
   using ranges::views::intersperse;
   using ranges::views::join;
@@ -416,10 +425,10 @@ std::string csv_labels(Rng&& idxs) {
     return sequant::to_string(v | ranges::to<std::wstring>);
   };
 
-  return std::forward<Rng>(idxs)  //
-         | transform(str)         //
-         | intersperse(",")       //
-         | join                   //
+  return std::forward<decltype(idxs)>(idxs)  //
+         | transform(str)                    //
+         | intersperse(",")                  //
+         | join                              //
          | ranges::to<std::string>;
 }
 
@@ -568,6 +577,79 @@ auto get_ket_idx(T&& container)
   }
 }
 
+///
+/// @brief Computes index counts for all subsets of a given range of index
+/// groups.
+///
+/// This function generates a vector where each element corresponds to a subset
+/// of the input range `rng`. The subsets are indexed by a bitmask, where the
+/// $i$-th bit being set means the $i$-th element of `rng` is included in the
+/// subset. For each subset, it counts the occurrences of each `Index`.
+///
+/// @param rng A range of ranges of `Index`. The outer range represents a
+/// collection of index groups (e.g., indices of multiple tensors).
+/// @return A vector of maps, where `result[mask]` contains a map of
+/// `Index` to its count in the subset defined by `mask`.
+///
+auto subset_index_counts(meta::range_of<Index, 2> auto const& rng) {
+  size_t const N = ranges::distance(rng);
+  container::vector<container::map<Index, size_t, Index::FullLabelCompare>>
+      result((1 << N));
+  for (size_t i = 1; i < result.size(); ++i) {
+    for (auto&& ixs : bits::on_bits_index(i) | bits::sieve(rng)) {
+      for (auto&& ix : ixs)
+        if (auto [it, yn] = result[i].try_emplace(ix, 1); !yn)  //
+          ++(it->second);
+    }
+  }
+  return result;
+}
+
+///
+/// @brief Determines the target indices for all subsets of a given range of
+/// index groups.
+///
+/// This function computes the set of "target" indices for each subset of `rng`.
+/// An index is considered a target for a subset if it appears exactly once
+/// within that subset (making it an open index for that subset) or if it is
+/// present in the provided `tixs` (target indices) and also appears in the
+/// subset. Additionally, indices that appear in the subset and also appear in
+/// the complementary subset (implied by `counts.at(counts.size() - i -
+/// 1).contains(k)`) are included, which effectively identifies contracted
+/// indices that need to be preserved as open indices if they are matched in the
+/// complement.
+///
+/// @param rng A range of ranges of `Index`. The outer range represents a
+/// collection of index groups.
+/// @param tixs A range of `Index` representing the global target indices (e.g.,
+/// indices of the result tensor).
+/// @return A vector of sets, where `result[mask]` contains the set of target
+/// `Index` objects for the subset defined by `mask`.
+///
+auto subset_target_indices(meta::range_of<Index, 2> auto const& rng,
+                           meta::range_of<Index> auto const& tixs) {
+  using IndexSet = container::set<Index, Index::FullLabelCompare>;
+  size_t const N = ranges::distance(rng);
+  container::vector<IndexSet> result((1 << N));
+
+  if (result.empty()) return result;
+
+  for (auto i = 0; i < N; ++i)
+    for (auto&& ix : ranges::at(rng, i)) result[(1 << i)].emplace(ix);
+
+  auto counts = subset_index_counts(rng);
+
+  for (auto&& [k, v] : *counts.rbegin())
+    if (v == 1 || ranges::contains(tixs, k)) result.rbegin()->emplace(k);
+
+  for (auto i = 0; i < result.size(); ++i)
+    for (auto&& [k, v] : counts[i])
+      if (v == 1 || (v > 0 && counts.at(counts.size() - i - 1).contains(k)))
+        result[i].emplace(k);
+
+  return result;
+}
+
 }  // namespace sequant
 
 #endif  // SEQUANT_CORE_UTILITY_INDICES_HPP
diff --git a/tests/unit/test_optimize.cpp b/tests/unit/test_optimize.cpp
index 951e286aaf..2ac948525e 100644
--- a/tests/unit/test_optimize.cpp
+++ b/tests/unit/test_optimize.cpp
@@ -201,6 +201,27 @@ TEST_CASE("optimize", "[optimize]") {
       REQUIRE(optimized->as<Sum>().summands().size() == 1);
       REQUIRE(sum->as<Sum>().summand(0).as<Product>().factors().size() == 1);
     }
+
+    SECTION("Non-covariant indices") {
+      auto uocc = reg->retrieve_ptr(L"a");
+      auto aux = reg->retrieve_ptr(L"x");
+      auto const aux_sz = aux->approximate_size();
+      auto const uocc_sz = uocc->approximate_size();
+      REQUIRE(uocc);
+      REQUIRE(aux);
+      aux->approximate_size(3 * uocc->approximate_size());
+
+      auto const G_abcd_thc =
+          parse_expr(L"X{a1;;x1} X{;a2;x1} Y{;;x1,x2} X{a3;;x2} X{;a4;x2}")
+              ->as<Product>();
+      auto const G_abcd_thc_opt =
+          parse_expr(L"((X{a1;;x1} X{;a2;x1}) Y{;;x1,x2})(X{a3;;x2} X{;a4;x2})")
+              ->as<Product>();
+      REQUIRE(single_term_opt(G_abcd_thc)->as<Product>() == G_abcd_thc_opt);
+      aux->approximate_size(aux_sz);
+      REQUIRE(aux->approximate_size() == aux_sz);
+      REQUIRE(uocc->approximate_size() == uocc_sz);
+    }
   }
 
   SECTION("CSE") {