@@ -159,10 +159,7 @@ void DefaultPositionInputs::Update(DeviceSpan<int32_t> next_tokens, int total_le
159159 // Fallback: use CPU interface with write-only staging buffer to avoid D2H copy
160160 auto byte_span = attention_mask_->GetByteSpan ();
161161 auto cpu_span = byte_span.CpuSpan ();
162- GetDeviceInterface (DeviceType::CPU)->UpdateCompactAttentionMask (
163- cpu_span.data (),
164- static_cast <int >(attention_mask_shape_[0 ]),
165- total_length, type_);
162+ GetDeviceInterface (DeviceType::CPU)->UpdateCompactAttentionMask (cpu_span.data (), static_cast <int >(attention_mask_shape_[0 ]), total_length, type_);
166163 byte_span.CopyCpuToDevice ();
167164 }
168165 } else if (is_first_update_) {
@@ -209,10 +206,7 @@ void DefaultPositionInputs::RewindTo(size_t index) {
209206 static_cast <int >(index), type_)) {
210207 auto byte_span = attention_mask_->GetByteSpan ();
211208 auto cpu_span = byte_span.CpuSpan ();
212- GetDeviceInterface (DeviceType::CPU)->UpdateCompactAttentionMask (
213- cpu_span.data (),
214- static_cast <int >(attention_mask_shape_[0 ]),
215- static_cast <int >(index), type_);
209+ GetDeviceInterface (DeviceType::CPU)->UpdateCompactAttentionMask (cpu_span.data (), static_cast <int >(attention_mask_shape_[0 ]), static_cast <int >(index), type_);
216210 byte_span.CopyCpuToDevice ();
217211 }
218212 } else if (attention_mask_shape_[0 ] == 1 ) {
0 commit comments