Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9159495

Browse files
authored
Fix: support widening ofListViewVector offsets and sizes (#5796)
Since the list view "logical" type does not include the sizes andoffsets as part of its type, we should allow resizing.Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent8113798 commit9159495

File tree

11 files changed

+385
-85
lines changed

11 files changed

+385
-85
lines changed

‎Cargo.lock‎

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more aboutcustomizing how changed files appear on GitHub.

‎vortex-array/src/arrays/list/vtable/kernel/filter.rs‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ impl ExecuteParentKernel<ListVTable> for ListFilterKernel {
6363
letmut vec =ListViewVectorMut::with_capacity(
6464
array.elements().dtype(),
6565
selection.true_count(),
66+
0,
6667
);
6768
vec.append_nulls(selection.true_count());
6869
returnOk(Some(vec.freeze().into()));

‎vortex-compute/src/cast/pvector.rs‎

Lines changed: 2 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,12 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use num_traits::NumCast;
5-
use vortex_buffer::Buffer;
6-
use vortex_buffer::BufferMut;
75
use vortex_dtype::DType;
86
use vortex_dtype::NativePType;
97
use vortex_dtype::match_each_native_ptype;
108
use vortex_error::VortexResult;
119
use vortex_error::vortex_bail;
1210
use vortex_error::vortex_err;
13-
use vortex_mask::AllOr;
14-
use vortex_mask::Mask;
1511
use vortex_vector::Scalar;
1612
use vortex_vector::ScalarOps;
1713
use vortex_vector::Vector;
@@ -20,6 +16,7 @@ use vortex_vector::primitive::PScalar;
2016
use vortex_vector::primitive::PVector;
2117
use vortex_vector::primitive::PrimitiveScalar;
2218
use vortex_vector::primitive::PrimitiveVector;
19+
use vortex_vector::primitive::cast;
2320

2421
usecrate::cast::Cast;
2522
usecrate::cast::try_cast_scalar_common;
@@ -44,7 +41,7 @@ impl<T: NativePType> Cast for PVector<T> {
4441
// We can possibly convert to the target `PType` and we have compatible nullability.
4542
DType::Primitive(target_ptype, n)if n.is_nullable() ||self.validity().all_true() =>{
4643
match_each_native_ptype!(*target_ptype, |Dst|{
47-
let result = cast_pvector::<T,Dst>(self)?;
44+
let result =cast::cast_pvector::<T,Dst>(self)?;
4845
Ok(PrimitiveVector::from(result).into())
4946
})
5047
}
@@ -55,48 +52,6 @@ impl<T: NativePType> Cast for PVector<T> {
5552
}
5653
}
5754

58-
/// Cast a [`PVector<F>`] to a [`PVector<T>`] by converting each element.
59-
///
60-
/// Returns an error if any valid element cannot be converted (e.g., overflow).
61-
fncast_pvector<Src:NativePType,Dst:NativePType>(
62-
src:&PVector<Src>,
63-
) ->VortexResult<PVector<Dst>>{
64-
let elements:&[Src] = src.as_ref();
65-
match src.validity().bit_buffer(){
66-
AllOr::All =>{
67-
letmut buffer =BufferMut::with_capacity(elements.len());
68-
for&itemin elements{
69-
let converted = <DstasNumCast>::from(item).ok_or_else(
70-
||vortex_err!(ComputeError:"Failed to cast {} to {:?}", item,Dst::PTYPE),
71-
)?;
72-
// SAFETY: We pre-allocated the required capacity.
73-
unsafe{ buffer.push_unchecked(converted)}
74-
}
75-
Ok(PVector::from(buffer.freeze()))
76-
}
77-
AllOr::None =>Ok(PVector::new(
78-
Buffer::zeroed(elements.len()),
79-
Mask::new_false(elements.len()),
80-
)),
81-
AllOr::Some(bit_buffer) =>{
82-
letmut buffer =BufferMut::with_capacity(elements.len());
83-
for(&item, valid)in elements.iter().zip(bit_buffer.iter()){
84-
if valid{
85-
let converted = <DstasNumCast>::from(item).ok_or_else(
86-
||vortex_err!(ComputeError:"Failed to cast {} to {:?}", item,Dst::PTYPE),
87-
)?;
88-
// SAFETY: We pre-allocated the required capacity.
89-
unsafe{ buffer.push_unchecked(converted)}
90-
}else{
91-
// SAFETY: We pre-allocated the required capacity.
92-
unsafe{ buffer.push_unchecked(Dst::default())}
93-
}
94-
}
95-
Ok(PVector::new(buffer.freeze(), src.validity().clone()))
96-
}
97-
}
98-
}
99-
10055
impl<T:NativePType>CastforPScalar<T>{
10156
typeOutput =Scalar;
10257

‎vortex-dtype/src/ptype.rs‎

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,56 @@ impl PType {
767767
}
768768
}
769769
}
770+
771+
/// Returns the minimum unsigned integer [`PType`] that can represent the given value.
772+
#[inline]
773+
pubconstfnmin_unsigned_ptype_for_value(value:u64) ->Self{
774+
if value <= u8::MAXasu64{
775+
Self::U8
776+
}elseif value <= u16::MAXasu64{
777+
Self::U16
778+
}elseif value <= u32::MAXasu64{
779+
Self::U32
780+
}else{
781+
Self::U64
782+
}
783+
}
784+
785+
/// Returns the minimum signed integer [`PType`] that can represent the given value.
786+
#[inline]
787+
pubconstfnmin_signed_ptype_for_value(value:i64) ->Self{
788+
if value >= i8::MINasi64 && value <= i8::MAXasi64{
789+
Self::I8
790+
}elseif value >= i16::MINasi64 && value <= i16::MAXasi64{
791+
Self::I16
792+
}elseif value >= i32::MINasi64 && value <= i32::MAXasi64{
793+
Self::I32
794+
}else{
795+
Self::I64
796+
}
797+
}
798+
799+
/// Returns the wider of two unsigned integer [`PType`]s based on byte width.
800+
#[inline]
801+
pubconstfnmax_unsigned_ptype(self,other:Self) ->Self{
802+
debug_assert!(self.is_unsigned_int() && other.is_unsigned_int());
803+
ifself.byte_width() >= other.byte_width(){
804+
self
805+
}else{
806+
other
807+
}
808+
}
809+
810+
/// Returns the wider of two signed integer [`PType`]s based on byte width.
811+
#[inline]
812+
pubconstfnmax_signed_ptype(self,other:Self) ->Self{
813+
debug_assert!(self.is_signed_int() && other.is_signed_int());
814+
ifself.byte_width() >= other.byte_width(){
815+
self
816+
}else{
817+
other
818+
}
819+
}
770820
}
771821

772822
implDisplayforPType{

‎vortex-scalar/src/vectors.rs‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ impl Scalar {
7272
let lscalar =self.as_list();
7373
match lscalar.elements(){
7474
None =>{
75-
letmut list_view =ListViewVectorMut::with_capacity(elems_dtype,1);
75+
letmut list_view =ListViewVectorMut::with_capacity(elems_dtype,1,0);
7676
list_view.append_nulls(1);
7777
ListViewScalar::new(list_view.freeze()).into()
7878
}

‎vortex-vector/Cargo.toml‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,6 @@ vortex-dtype = { workspace = true }
2525
vortex-error = {workspace =true }
2626
vortex-mask = {workspace =true }
2727

28+
num-traits = {workspace =true }
2829
paste = {workspace =true }
2930
static_assertions = {workspace =true }

‎vortex-vector/src/listview/tests.rs‎

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,3 +412,66 @@ fn test_try_into_mut() {
412412
let original = mut_result2.unwrap_err();
413413
assert_eq!(original.len(),2);
414414
}
415+
416+
#[test]
417+
fntest_extend_upcasts_offset_and_size_types(){
418+
use vortex_dtype::PType;
419+
420+
// Create first list with u8 offsets/sizes and u8::MAX elements
421+
// This forces upcasting when extended with larger types
422+
let elements1 =PVectorMut::from_iter((0..u8::MAXasi32).map(Some));
423+
let offsets1 =PVectorMut::from_iter([0u8]).into();
424+
let sizes1 =PVectorMut::from_iter([u8::MAX]).into();
425+
let validity1 =MaskMut::new_true(1);
426+
427+
letmut list =ListViewVectorMut::new(Box::new(elements1.into()), offsets1, sizes1, validity1);
428+
429+
// Verify initial types
430+
assert_eq!(list.offsets().ptype(),PType::U8);
431+
assert_eq!(list.sizes().ptype(),PType::U8);
432+
433+
// Create second list with u32 offsets and u16 sizes, with u16::MAX elements
434+
let elements2:PrimitiveVector =PVectorMut::from_iter((0..u16::MAXasi32).map(Some))
435+
.freeze()
436+
.into();
437+
let offsets2:PrimitiveVector =PVectorMut::from_iter([0u32]).freeze().into();
438+
let sizes2:PrimitiveVector =PVectorMut::from_iter([u16::MAX]).freeze().into();
439+
let validity2 =Mask::new_true(1);
440+
441+
let list2 =ListViewVector::new(Arc::new(elements2.into()), offsets2, sizes2, validity2);
442+
443+
// Extend - this should upcast offsets to u32 and sizes to u16
444+
list.extend_from_vector(&list2);
445+
446+
// Verify types were upcasted
447+
assert_eq!(list.offsets().ptype(),PType::U32);
448+
assert_eq!(list.sizes().ptype(),PType::U16);
449+
450+
// Verify lengths
451+
assert_eq!(list.len(),2);
452+
453+
let frozen = list.freeze();
454+
455+
// Check that first list's offset is still 0 and size is u8::MAX
456+
let offsets = frozen.offsets();
457+
let sizes = frozen.sizes();
458+
459+
let(o0, o1) =match offsets{
460+
PrimitiveVector::U32(pvec) =>(*pvec.get(0).unwrap(),*pvec.get(1).unwrap()),
461+
_ =>panic!("Expected U32 offsets"),
462+
};
463+
let(s0, s1) =match sizes{
464+
PrimitiveVector::U16(pvec) =>(*pvec.get(0).unwrap(),*pvec.get(1).unwrap()),
465+
_ =>panic!("Expected U16 sizes"),
466+
};
467+
468+
assert_eq!(o0,0);
469+
assert_eq!(s0,u8::MAXasu16);
470+
// Second list's offset should be adjusted by first list's element count (u8::MAX)
471+
assert_eq!(o1,u8::MAXasu32);
472+
assert_eq!(s1,u16::MAX);
473+
474+
// Verify element count
475+
let total_elements =(u8::MAXasusize) +(u16::MAXasusize);
476+
assert_eq!(frozen.elements().len(), total_elements);
477+
}

‎vortex-vector/src/listview/vector_mut.rs‎

Lines changed: 59 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
66
use std::sync::Arc;
77

8+
use num_traits::NumCast;
89
use vortex_dtype::DType;
910
use vortex_dtype::PType;
1011
use vortex_error::VortexExpect;
@@ -184,12 +185,17 @@ impl ListViewVectorMut {
184185
}
185186

186187
/// Creates a new [`ListViewVectorMut`] with the specified capacity.
187-
pubfnwith_capacity(element_dtype:&DType,capacity:usize) ->Self{
188+
pubfnwith_capacity(element_dtype:&DType,capacity:usize,elements_capacity:usize) ->Self{
189+
let offsets_ptype =PType::min_unsigned_ptype_for_value(elements_capacityasu64);
190+
let sizes_ptype = offsets_ptype;
191+
192+
// SAFETY: Everything is empty and the offsets and sizes `PType` is the same, so all
193+
// invariants are satisfied.
188194
unsafe{
189195
Self::new_unchecked(
190196
Box::new(VectorMut::with_capacity(element_dtype,0)),
191-
PrimitiveVectorMut::with_capacity(PType::U64, capacity),
192-
PrimitiveVectorMut::with_capacity(PType::U32, capacity),
197+
PrimitiveVectorMut::with_capacity(offsets_ptype, capacity),
198+
PrimitiveVectorMut::with_capacity(sizes_ptype, capacity),
193199
MaskMut::with_capacity(capacity),
194200
)
195201
}
@@ -300,27 +306,23 @@ impl VectorMutOps for ListViewVectorMut {
300306
self.len =self.validity.len();
301307
}
302308

303-
/// This will also panic if we try to extend the `ListViewVector` beyond the maximum offset
304-
/// representable by the type of the `offsets` primitive vector.
305309
fnextend_from_vector(&mutself,other:&ListViewVector){
306310
// Extend the elements with the other's elements.
307311
let old_elements_len =self.elements.len()asu64;
308312
self.elements.extend_from_vector(&other.elements);
309313
let new_elements_len =self.elements.len()asu64;
310314

311-
//Then extend thesizes withthe other's sizes (these donotneed any adjustment).
312-
self.sizes.extend_from_vector(&other.sizes);
315+
//Extendsizes withautomatic upcasting (doesnotpanic on type mismatch).
316+
self.sizes.extend_from_vector_with_upcast(&other.sizes);
313317

314-
// We need this assertion to ensure that the casts below are infallible.
315-
assert!(
316-
new_elements_len <self.offsets.ptype().max_value_as_u64(),
317-
"the elements length {new_elements_len} is not representable by the offsets type {}",
318-
self.offsets.ptype()
318+
// Extend offsets with adjustment and automatic upcasting based on `new_elements_len`.
319+
adjust_and_extend_offsets(
320+
&mutself.offsets,
321+
&other.offsets,
322+
old_elements_len,
323+
new_elements_len,
319324
);
320325

321-
// Finally, extend the offsets after adding the old `elements` length to each.
322-
adjust_and_extend_offsets(&mutself.offsets,&other.offsets, old_elements_len);
323-
324326
self.validity.append_mask(&other.validity);
325327
self.len += other.len;
326328
debug_assert_eq!(self.len,self.validity.len());
@@ -505,39 +507,60 @@ fn validate_views_bound(
505507
Ok(())
506508
}
507509

508-
// TODO(connor): It would be better to separate everything inside the macros into its own function,
509-
// but that would require adding another macro that sets a type `$type` to be used by the caller.
510-
/// Checks that all views are `<= elements_len`.
510+
/// Adjusts and extends offsets from `new_offsets` into `curr_offsets`, upcasting if needed.
511+
///
512+
/// Each offset from `new_offsets` is adjusted by adding `old_elements_len` before appending.
513+
///
514+
/// If the resulting offsets would exceed the current offset type's capacity, the offset vector is
515+
/// automatically upcasted to a wider type.
511516
#[expect(
512517
clippy::cognitive_complexity,
513518
reason ="complexity from nested match_each_* macros"
514519
)]
515520
fnadjust_and_extend_offsets(
516-
our_offsets:&mutPrimitiveVectorMut,
517-
other:&PrimitiveVector,
521+
curr_offsets:&mutPrimitiveVectorMut,
522+
new_offsets:&PrimitiveVector,
518523
old_elements_len:u64,
524+
new_elements_len:u64,
519525
){
520-
our_offsets.reserve(other.len());
526+
// Make sure we use the correct width to fit all offsets.
527+
let target_ptype =PType::min_unsigned_ptype_for_value(new_elements_len)
528+
.max_unsigned_ptype(curr_offsets.ptype())
529+
.max_unsigned_ptype(new_offsets.ptype());
530+
531+
if curr_offsets.ptype() != target_ptype{
532+
let old_offsets = std::mem::replace(
533+
curr_offsets,
534+
PrimitiveVectorMut::with_capacity(target_ptype,0),
535+
);
536+
*curr_offsets = old_offsets.upcast(target_ptype);
537+
}
521538

522-
// Adjust each offset from `other` by adding the current elements length to each of the
539+
curr_offsets.reserve(new_offsets.len());
540+
541+
// Adjust each offset from `new_offsets` by adding the current elements length to each of the
523542
// incoming offsets.
524-
match_each_integer_pvector_mut!(our_offsets, |self_offsets|{
525-
match_each_integer_pvector!(other, |other_offsets|{
526-
letother_offsets_slice =other_offsets.as_ref();
527-
528-
// Append each offset from `other`, adjusted by the elements_offset.
529-
for i in0..other.len(){
530-
// All offset types are representable via a `u64` since we also ensure offsets
531-
//arealways non-negative.
543+
match_each_integer_pvector_mut!(curr_offsets, |curr|{
544+
match_each_integer_pvector!(new_offsets, |new|{
545+
letnew_offsets_slice =new.as_ref();
546+
547+
// Append each offset from `new_offsets`, adjusted by the elements_offset.
548+
for i in0..new.len(){
549+
// All offset types are representable via a `u64` since we also ensure offsets are
550+
// always non-negative.
532551
#[allow(clippy::unnecessary_cast)]
533-
let adjusted_offset = other_offsets_slice[i]asu64 + old_elements_len;
552+
let adjusted_offset = new_offsets_slice[i]asu64 + old_elements_len;
553+
debug_assert!(
554+
adjusted_offset < new_elements_len,
555+
"new list view offset is somehow out of bounds, something has gone wrong"
556+
);
534557

535-
// SAFETY: We just reserved capacity for `other.len()` elements above, and we
536-
// also know the cast is fine because we verified above that the maximum
537-
// possible offset is representable by the offset type.
538-
#[allow(clippy::cast_possible_truncation)]
558+
let converted =NumCast::from(adjusted_offset)
559+
.vortex_expect("offset conversion should succeed after upcast");
560+
561+
// SAFETY: We reserved capacity for `new_offsets.len()` elements above.
539562
unsafe{
540-
self_offsets.push_unchecked(adjusted_offsetas _);
563+
curr.push_unchecked(converted);
541564
}
542565
}
543566
});

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp