NotificationsYou must be signed in to change notification settings
Fork104
Star2.4k

Commit9159495

authored

Fix: support widening ofListViewVector offsets and sizes (#5796)

Since the list view "logical" type does not include the sizes andoffsets as part of its type, we should allow resizing.Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>

1 parent8113798 commit9159495Copy full SHA for 9159495

File tree

11 files changed

+385

-85

lines changed

Cargo.lock
vortex-array/src/arrays/list/vtable/kernel
- filter.rs
vortex-compute/src/cast
- pvector.rs
vortex-dtype/src
- ptype.rs
vortex-scalar/src
- vectors.rs
vortex-vector
- Cargo.toml
- src
  - listview
    - tests.rs
    - vector_mut.rs
  - primitive
    - cast.rs
    - mod.rs
  - vector_mut.rs

11 files changed

+385

-85

lines changed

`‎Cargo.lock‎`

Lines changed: 1 addition & 0 deletions

Some generated files are not rendered by default. Learn more aboutcustomizing how changed files appear on GitHub.

`‎vortex-array/src/arrays/list/vtable/kernel/filter.rs‎`

Lines changed: 1 addition & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -63,6 +63,7 @@ impl ExecuteParentKernel<ListVTable> for ListFilterKernel {`
`63`	`63`	`letmut vec =ListViewVectorMut::with_capacity(`
`64`	`64`	`array.elements().dtype(),`
`65`	`65`	`selection.true_count(),`
	`66`	`+0,`
`66`	`67`	`);`
`67`	`68`	`vec.append_nulls(selection.true_count());`
`68`	`69`	`returnOk(Some(vec.freeze().into()));`

`‎vortex-compute/src/cast/pvector.rs‎`

Lines changed: 2 additions & 47 deletions

Original file line number	Diff line number	Diff line change
`@@ -2,16 +2,12 @@`
`2`	`2`	`// SPDX-FileCopyrightText: Copyright the Vortex contributors`
`3`	`3`
`4`	`4`	`use num_traits::NumCast;`
`5`		`-use vortex_buffer::Buffer;`
`6`		`-use vortex_buffer::BufferMut;`
`7`	`5`	`use vortex_dtype::DType;`
`8`	`6`	`use vortex_dtype::NativePType;`
`9`	`7`	`use vortex_dtype::match_each_native_ptype;`
`10`	`8`	`use vortex_error::VortexResult;`
`11`	`9`	`use vortex_error::vortex_bail;`
`12`	`10`	`use vortex_error::vortex_err;`
`13`		`-use vortex_mask::AllOr;`
`14`		`-use vortex_mask::Mask;`
`15`	`11`	`use vortex_vector::Scalar;`
`16`	`12`	`use vortex_vector::ScalarOps;`
`17`	`13`	`use vortex_vector::Vector;`
`@@ -20,6 +16,7 @@ use vortex_vector::primitive::PScalar;`
`20`	`16`	`use vortex_vector::primitive::PVector;`
`21`	`17`	`use vortex_vector::primitive::PrimitiveScalar;`
`22`	`18`	`use vortex_vector::primitive::PrimitiveVector;`
	`19`	`+use vortex_vector::primitive::cast;`
`23`	`20`
`24`	`21`	`usecrate::cast::Cast;`
`25`	`22`	`usecrate::cast::try_cast_scalar_common;`
`@@ -44,7 +41,7 @@ impl<T: NativePType> Cast for PVector<T> {`
`44`	`41`	// We can possibly convert to the target `PType` and we have compatible nullability.
`45`	`42`	`DType::Primitive(target_ptype, n)if n.is_nullable() \|\|self.validity().all_true() =>{`
`46`	`43`	`match_each_native_ptype!(*target_ptype, \|Dst\|{`
`47`		`-let result = cast_pvector::<T,Dst>(self)?;`
	`44`	`+let result =cast::cast_pvector::<T,Dst>(self)?;`
`48`	`45`	`Ok(PrimitiveVector::from(result).into())`
`49`	`46`	`})`
`50`	`47`	`}`
`@@ -55,48 +52,6 @@ impl<T: NativePType> Cast for PVector<T> {`
`55`	`52`	`}`
`56`	`53`	`}`
`57`	`54`
`58`		-/// Cast a [`PVector<F>`] to a [`PVector<T>`] by converting each element.
`59`		`-///`
`60`		`-/// Returns an error if any valid element cannot be converted (e.g., overflow).`
`61`		`-fncast_pvector<Src:NativePType,Dst:NativePType>(`
`62`		`-src:&PVector<Src>,`
`63`		`-) ->VortexResult<PVector<Dst>>{`
`64`		`-let elements:&[Src] = src.as_ref();`
`65`		`-match src.validity().bit_buffer(){`
`66`		`-AllOr::All =>{`
`67`		`-letmut buffer =BufferMut::with_capacity(elements.len());`
`68`		`-for&itemin elements{`
`69`		`-let converted = <DstasNumCast>::from(item).ok_or_else(`
`70`		`- \|\|vortex_err!(ComputeError:"Failed to cast {} to {:?}", item,Dst::PTYPE),`
`71`		`-)?;`
`72`		`-// SAFETY: We pre-allocated the required capacity.`
`73`		`-unsafe{ buffer.push_unchecked(converted)}`
`74`		`-}`
`75`		`-Ok(PVector::from(buffer.freeze()))`
`76`		`-}`
`77`		`-AllOr::None =>Ok(PVector::new(`
`78`		`-Buffer::zeroed(elements.len()),`
`79`		`-Mask::new_false(elements.len()),`
`80`		`-)),`
`81`		`-AllOr::Some(bit_buffer) =>{`
`82`		`-letmut buffer =BufferMut::with_capacity(elements.len());`
`83`		`-for(&item, valid)in elements.iter().zip(bit_buffer.iter()){`
`84`		`-if valid{`
`85`		`-let converted = <DstasNumCast>::from(item).ok_or_else(`
`86`		`- \|\|vortex_err!(ComputeError:"Failed to cast {} to {:?}", item,Dst::PTYPE),`
`87`		`-)?;`
`88`		`-// SAFETY: We pre-allocated the required capacity.`
`89`		`-unsafe{ buffer.push_unchecked(converted)}`
`90`		`-}else{`
`91`		`-// SAFETY: We pre-allocated the required capacity.`
`92`		`-unsafe{ buffer.push_unchecked(Dst::default())}`
`93`		`-}`
`94`		`-}`
`95`		`-Ok(PVector::new(buffer.freeze(), src.validity().clone()))`
`96`		`-}`
`97`		`-}`
`98`		`-}`
`99`		`-`
`100`	`55`	`impl<T:NativePType>CastforPScalar<T>{`
`101`	`56`	`typeOutput =Scalar;`
`102`	`57`

`‎vortex-dtype/src/ptype.rs‎`

Lines changed: 50 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -767,6 +767,56 @@ impl PType {`
`767`	`767`	`}`
`768`	`768`	`}`
`769`	`769`	`}`
	`770`	`+`
	`771`	+/// Returns the minimum unsigned integer [`PType`] that can represent the given value.
	`772`	`+#[inline]`
	`773`	`+pubconstfnmin_unsigned_ptype_for_value(value:u64) ->Self{`
	`774`	`+if value <= u8::MAXasu64{`
	`775`	`+Self::U8`
	`776`	`+}elseif value <= u16::MAXasu64{`
	`777`	`+Self::U16`
	`778`	`+}elseif value <= u32::MAXasu64{`
	`779`	`+Self::U32`
	`780`	`+}else{`
	`781`	`+Self::U64`
	`782`	`+}`
	`783`	`+}`
	`784`	`+`
	`785`	+/// Returns the minimum signed integer [`PType`] that can represent the given value.
	`786`	`+#[inline]`
	`787`	`+pubconstfnmin_signed_ptype_for_value(value:i64) ->Self{`
	`788`	`+if value >= i8::MINasi64 && value <= i8::MAXasi64{`
	`789`	`+Self::I8`
	`790`	`+}elseif value >= i16::MINasi64 && value <= i16::MAXasi64{`
	`791`	`+Self::I16`
	`792`	`+}elseif value >= i32::MINasi64 && value <= i32::MAXasi64{`
	`793`	`+Self::I32`
	`794`	`+}else{`
	`795`	`+Self::I64`
	`796`	`+}`
	`797`	`+}`
	`798`	`+`
	`799`	+/// Returns the wider of two unsigned integer [`PType`]s based on byte width.
	`800`	`+#[inline]`
	`801`	`+pubconstfnmax_unsigned_ptype(self,other:Self) ->Self{`
	`802`	`+debug_assert!(self.is_unsigned_int() && other.is_unsigned_int());`
	`803`	`+ifself.byte_width() >= other.byte_width(){`
	`804`	`+self`
	`805`	`+}else{`
	`806`	`+ other`
	`807`	`+}`
	`808`	`+}`
	`809`	`+`
	`810`	+/// Returns the wider of two signed integer [`PType`]s based on byte width.
	`811`	`+#[inline]`
	`812`	`+pubconstfnmax_signed_ptype(self,other:Self) ->Self{`
	`813`	`+debug_assert!(self.is_signed_int() && other.is_signed_int());`
	`814`	`+ifself.byte_width() >= other.byte_width(){`
	`815`	`+self`
	`816`	`+}else{`
	`817`	`+ other`
	`818`	`+}`
	`819`	`+}`
`770`	`820`	`}`
`771`	`821`
`772`	`822`	`implDisplayforPType{`

`‎vortex-scalar/src/vectors.rs‎`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -72,7 +72,7 @@ impl Scalar {`
`72`	`72`	`let lscalar =self.as_list();`
`73`	`73`	`match lscalar.elements(){`
`74`	`74`	`None =>{`
`75`		`-letmut list_view =ListViewVectorMut::with_capacity(elems_dtype,1);`
	`75`	`+letmut list_view =ListViewVectorMut::with_capacity(elems_dtype,1,0);`
`76`	`76`	`list_view.append_nulls(1);`
`77`	`77`	`ListViewScalar::new(list_view.freeze()).into()`
`78`	`78`	`}`

`‎vortex-vector/Cargo.toml‎`

Lines changed: 1 addition & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -25,5 +25,6 @@ vortex-dtype = { workspace = true }`
`25`	`25`	`vortex-error = {workspace =true }`
`26`	`26`	`vortex-mask = {workspace =true }`
`27`	`27`
	`28`	`+num-traits = {workspace =true }`
`28`	`29`	`paste = {workspace =true }`
`29`	`30`	`static_assertions = {workspace =true }`

`‎vortex-vector/src/listview/tests.rs‎`

Lines changed: 63 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -412,3 +412,66 @@ fn test_try_into_mut() {`
`412`	`412`	`let original = mut_result2.unwrap_err();`
`413`	`413`	`assert_eq!(original.len(),2);`
`414`	`414`	`}`
	`415`	`+`
	`416`	`+#[test]`
	`417`	`+fntest_extend_upcasts_offset_and_size_types(){`
	`418`	`+use vortex_dtype::PType;`
	`419`	`+`
	`420`	`+// Create first list with u8 offsets/sizes and u8::MAX elements`
	`421`	`+// This forces upcasting when extended with larger types`
	`422`	`+let elements1 =PVectorMut::from_iter((0..u8::MAXasi32).map(Some));`
	`423`	`+let offsets1 =PVectorMut::from_iter([0u8]).into();`
	`424`	`+let sizes1 =PVectorMut::from_iter([u8::MAX]).into();`
	`425`	`+let validity1 =MaskMut::new_true(1);`
	`426`	`+`
	`427`	`+letmut list =ListViewVectorMut::new(Box::new(elements1.into()), offsets1, sizes1, validity1);`
	`428`	`+`
	`429`	`+// Verify initial types`
	`430`	`+assert_eq!(list.offsets().ptype(),PType::U8);`
	`431`	`+assert_eq!(list.sizes().ptype(),PType::U8);`
	`432`	`+`
	`433`	`+// Create second list with u32 offsets and u16 sizes, with u16::MAX elements`
	`434`	`+let elements2:PrimitiveVector =PVectorMut::from_iter((0..u16::MAXasi32).map(Some))`
	`435`	`+.freeze()`
	`436`	`+.into();`
	`437`	`+let offsets2:PrimitiveVector =PVectorMut::from_iter([0u32]).freeze().into();`
	`438`	`+let sizes2:PrimitiveVector =PVectorMut::from_iter([u16::MAX]).freeze().into();`
	`439`	`+let validity2 =Mask::new_true(1);`
	`440`	`+`
	`441`	`+let list2 =ListViewVector::new(Arc::new(elements2.into()), offsets2, sizes2, validity2);`
	`442`	`+`
	`443`	`+// Extend - this should upcast offsets to u32 and sizes to u16`
	`444`	`+ list.extend_from_vector(&list2);`
	`445`	`+`
	`446`	`+// Verify types were upcasted`
	`447`	`+assert_eq!(list.offsets().ptype(),PType::U32);`
	`448`	`+assert_eq!(list.sizes().ptype(),PType::U16);`
	`449`	`+`
	`450`	`+// Verify lengths`
	`451`	`+assert_eq!(list.len(),2);`
	`452`	`+`
	`453`	`+let frozen = list.freeze();`
	`454`	`+`
	`455`	`+// Check that first list's offset is still 0 and size is u8::MAX`
	`456`	`+let offsets = frozen.offsets();`
	`457`	`+let sizes = frozen.sizes();`
	`458`	`+`
	`459`	`+let(o0, o1) =match offsets{`
	`460`	`+PrimitiveVector::U32(pvec) =>(pvec.get(0).unwrap(),pvec.get(1).unwrap()),`
	`461`	`+ _ =>panic!("Expected U32 offsets"),`
	`462`	`+};`
	`463`	`+let(s0, s1) =match sizes{`
	`464`	`+PrimitiveVector::U16(pvec) =>(pvec.get(0).unwrap(),pvec.get(1).unwrap()),`
	`465`	`+ _ =>panic!("Expected U16 sizes"),`
	`466`	`+};`
	`467`	`+`
	`468`	`+assert_eq!(o0,0);`
	`469`	`+assert_eq!(s0,u8::MAXasu16);`
	`470`	`+// Second list's offset should be adjusted by first list's element count (u8::MAX)`
	`471`	`+assert_eq!(o1,u8::MAXasu32);`
	`472`	`+assert_eq!(s1,u16::MAX);`
	`473`	`+`
	`474`	`+// Verify element count`
	`475`	`+let total_elements =(u8::MAXasusize) +(u16::MAXasusize);`
	`476`	`+assert_eq!(frozen.elements().len(), total_elements);`
	`477`	`+}`

`‎vortex-vector/src/listview/vector_mut.rs‎`

Lines changed: 59 additions & 36 deletions

Original file line number	Diff line number	Diff line change
`@@ -5,6 +5,7 @@`
`5`	`5`
`6`	`6`	`use std::sync::Arc;`
`7`	`7`
	`8`	`+use num_traits::NumCast;`
`8`	`9`	`use vortex_dtype::DType;`
`9`	`10`	`use vortex_dtype::PType;`
`10`	`11`	`use vortex_error::VortexExpect;`
`@@ -184,12 +185,17 @@ impl ListViewVectorMut {`
`184`	`185`	`}`
`185`	`186`
`186`	`187`	/// Creates a new [`ListViewVectorMut`] with the specified capacity.
`187`		`-pubfnwith_capacity(element_dtype:&DType,capacity:usize) ->Self{`
	`188`	`+pubfnwith_capacity(element_dtype:&DType,capacity:usize,elements_capacity:usize) ->Self{`
	`189`	`+let offsets_ptype =PType::min_unsigned_ptype_for_value(elements_capacityasu64);`
	`190`	`+let sizes_ptype = offsets_ptype;`
	`191`	`+`
	`192`	+// SAFETY: Everything is empty and the offsets and sizes `PType` is the same, so all
	`193`	`+// invariants are satisfied.`
`188`	`194`	`unsafe{`
`189`	`195`	`Self::new_unchecked(`
`190`	`196`	`Box::new(VectorMut::with_capacity(element_dtype,0)),`
`191`		`-PrimitiveVectorMut::with_capacity(PType::U64, capacity),`
`192`		`-PrimitiveVectorMut::with_capacity(PType::U32, capacity),`
	`197`	`+PrimitiveVectorMut::with_capacity(offsets_ptype, capacity),`
	`198`	`+PrimitiveVectorMut::with_capacity(sizes_ptype, capacity),`
`193`	`199`	`MaskMut::with_capacity(capacity),`
`194`	`200`	`)`
`195`	`201`	`}`
`@@ -300,27 +306,23 @@ impl VectorMutOps for ListViewVectorMut {`
`300`	`306`	`self.len =self.validity.len();`
`301`	`307`	`}`
`302`	`308`
`303`		-/// This will also panic if we try to extend the `ListViewVector` beyond the maximum offset
`304`		-/// representable by the type of the `offsets` primitive vector.
`305`	`309`	`fnextend_from_vector(&mutself,other:&ListViewVector){`
`306`	`310`	`// Extend the elements with the other's elements.`
`307`	`311`	`let old_elements_len =self.elements.len()asu64;`
`308`	`312`	`self.elements.extend_from_vector(&other.elements);`
`309`	`313`	`let new_elements_len =self.elements.len()asu64;`
`310`	`314`
`311`		`-//Then extend thesizes withthe other's sizes (these donotneed any adjustment).`
`312`		`-self.sizes.extend_from_vector(&other.sizes);`
	`315`	`+//Extendsizes withautomatic upcasting (doesnotpanic on type mismatch).`
	`316`	`+self.sizes.extend_from_vector_with_upcast(&other.sizes);`
`313`	`317`
`314`		`-// We need this assertion to ensure that the casts below are infallible.`
`315`		`-assert!(`
`316`		`- new_elements_len <self.offsets.ptype().max_value_as_u64(),`
`317`		`-"the elements length {new_elements_len} is not representable by the offsets type {}",`
`318`		`-self.offsets.ptype()`
	`318`	+// Extend offsets with adjustment and automatic upcasting based on `new_elements_len`.
	`319`	`+adjust_and_extend_offsets(`
	`320`	`+&mutself.offsets,`
	`321`	`+&other.offsets,`
	`322`	`+ old_elements_len,`
	`323`	`+ new_elements_len,`
`319`	`324`	`);`
`320`	`325`
`321`		-// Finally, extend the offsets after adding the old `elements` length to each.
`322`		`-adjust_and_extend_offsets(&mutself.offsets,&other.offsets, old_elements_len);`
`323`		`-`
`324`	`326`	`self.validity.append_mask(&other.validity);`
`325`	`327`	`self.len += other.len;`
`326`	`328`	`debug_assert_eq!(self.len,self.validity.len());`
`@@ -505,39 +507,60 @@ fn validate_views_bound(`
`505`	`507`	`Ok(())`
`506`	`508`	`}`
`507`	`509`
`508`		`-// TODO(connor): It would be better to separate everything inside the macros into its own function,`
`509`		-// but that would require adding another macro that sets a type `$type` to be used by the caller.
`510`		-/// Checks that all views are `<= elements_len`.
	`510`	+/// Adjusts and extends offsets from `new_offsets` into `curr_offsets`, upcasting if needed.
	`511`	`+///`
	`512`	+/// Each offset from `new_offsets` is adjusted by adding `old_elements_len` before appending.
	`513`	`+///`
	`514`	`+/// If the resulting offsets would exceed the current offset type's capacity, the offset vector is`
	`515`	`+/// automatically upcasted to a wider type.`
`511`	`516`	`#[expect(`
`512`	`517`	`clippy::cognitive_complexity,`
`513`	`518`	`reason ="complexity from nested match_each_* macros"`
`514`	`519`	`)]`
`515`	`520`	`fnadjust_and_extend_offsets(`
`516`		`-our_offsets:&mutPrimitiveVectorMut,`
`517`		`-other:&PrimitiveVector,`
	`521`	`+curr_offsets:&mutPrimitiveVectorMut,`
	`522`	`+new_offsets:&PrimitiveVector,`
`518`	`523`	`old_elements_len:u64,`
	`524`	`+new_elements_len:u64,`
`519`	`525`	`){`
`520`		`- our_offsets.reserve(other.len());`
	`526`	`+// Make sure we use the correct width to fit all offsets.`
	`527`	`+let target_ptype =PType::min_unsigned_ptype_for_value(new_elements_len)`
	`528`	`+.max_unsigned_ptype(curr_offsets.ptype())`
	`529`	`+.max_unsigned_ptype(new_offsets.ptype());`
	`530`	`+`
	`531`	`+if curr_offsets.ptype() != target_ptype{`
	`532`	`+let old_offsets = std::mem::replace(`
	`533`	`+ curr_offsets,`
	`534`	`+PrimitiveVectorMut::with_capacity(target_ptype,0),`
	`535`	`+);`
	`536`	`+*curr_offsets = old_offsets.upcast(target_ptype);`
	`537`	`+}`
`521`	`538`
`522`		-// Adjust each offset from `other` by adding the current elements length to each of the
	`539`	`+ curr_offsets.reserve(new_offsets.len());`
	`540`	`+`
	`541`	+// Adjust each offset from `new_offsets` by adding the current elements length to each of the
`523`	`542`	`// incoming offsets.`
`524`		`-match_each_integer_pvector_mut!(our_offsets, \|self_offsets\|{`
`525`		`- match_each_integer_pvector!(other, \|other_offsets\|{`
`526`		`-letother_offsets_slice =other_offsets.as_ref();`
`527`		`-`
`528`		-// Append each offset from `other`, adjusted by the elements_offset.
`529`		`-for i in0..other.len(){`
`530`		-// All offset types are representable via a `u64` since we also ensure offsets
`531`		`-//arealways non-negative.`
	`543`	`+match_each_integer_pvector_mut!(curr_offsets, \|curr\|{`
	`544`	`+ match_each_integer_pvector!(new_offsets, \|new\|{`
	`545`	`+letnew_offsets_slice =new.as_ref();`
	`546`	`+`
	`547`	+// Append each offset from `new_offsets`, adjusted by the elements_offset.
	`548`	`+for i in0..new.len(){`
	`549`	+// All offset types are representable via a `u64` since we also ensure offsets are
	`550`	`+// always non-negative.`
`532`	`551`	`#[allow(clippy::unnecessary_cast)]`
`533`		`-let adjusted_offset = other_offsets_slice[i]asu64 + old_elements_len;`
	`552`	`+let adjusted_offset = new_offsets_slice[i]asu64 + old_elements_len;`
	`553`	`+ debug_assert!(`
	`554`	`+ adjusted_offset < new_elements_len,`
	`555`	`+"new list view offset is somehow out of bounds, something has gone wrong"`
	`556`	`+);`
`534`	`557`
`535`		-// SAFETY: We just reserved capacity for `other.len()` elements above, and we
`536`		`-// also know the cast is fine because we verified above that the maximum`
`537`		`-// possible offset is representable by the offset type.`
`538`		`-#[allow(clippy::cast_possible_truncation)]`
	`558`	`+let converted =NumCast::from(adjusted_offset)`
	`559`	`+.vortex_expect("offset conversion should succeed after upcast");`
	`560`	`+`
	`561`	+// SAFETY: We reserved capacity for `new_offsets.len()` elements above.
`539`	`562`	`unsafe{`
`540`		`-self_offsets.push_unchecked(adjusted_offsetas _);`
	`563`	`+curr.push_unchecked(converted);`
`541`	`564`	`}`
`542`	`565`	`}`
`543`	`566`	`});`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit9159495

File tree

11 files changed

11 files changed

`‎Cargo.lock‎`

`‎vortex-array/src/arrays/list/vtable/kernel/filter.rs‎`

`‎vortex-compute/src/cast/pvector.rs‎`

`‎vortex-dtype/src/ptype.rs‎`

`‎vortex-scalar/src/vectors.rs‎`

`‎vortex-vector/Cargo.toml‎`

`‎vortex-vector/src/listview/tests.rs‎`

`‎vortex-vector/src/listview/vector_mut.rs‎`

0 commit comments