diff --git a/Cargo.lock b/Cargo.lock index f30be55a3e2..6a68083f777 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2605,7 +2605,6 @@ dependencies = [ "futures-core", "futures-task", "futures-util", - "num_cpus", ] [[package]] @@ -3929,16 +3928,6 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "num_enum" version = "0.7.4" @@ -6260,6 +6249,7 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" name = "vortex" version = "0.1.0" dependencies = [ + "anyhow", "arrow-array", "codspeed-divan-compat", "itertools 0.14.0", @@ -6441,9 +6431,7 @@ dependencies = [ "arrow-schema", "cxx", "cxx-build", - "futures", "paste", - "prost 0.14.1", "take_mut", "tokio", "vortex", @@ -6489,7 +6477,6 @@ dependencies = [ name = "vortex-decimal-byte-parts" version = "0.1.0" dependencies = [ - "itertools 0.14.0", "num-traits", "prost 0.14.1", "rstest", @@ -6554,12 +6541,10 @@ dependencies = [ "anyhow", "arrow-array", "arrow-buffer", - "arrow-schema", "bindgen", "bitvec", "cbindgen", "cc", - "crossbeam-queue", "dashmap", "futures", "glob", @@ -6589,7 +6574,6 @@ dependencies = [ "flatbuffers", "jiff", "object_store", - "parquet", "prost 0.14.1", "pyo3", "serde_json", @@ -6670,7 +6654,6 @@ dependencies = [ name = "vortex-file" version = "0.1.0" dependencies = [ - "arcref", "async-trait", "bytes", "dashmap", @@ -6684,7 +6667,6 @@ dependencies = [ "object_store", "rustc-hash", "tokio", - "tracing", "uuid", "vortex-alp", "vortex-array", @@ -6839,7 +6821,6 @@ dependencies = [ "pco", "pin-project-lite", "prost 0.14.1", - "roaring", "rstest", "tokio", "tracing", @@ -6883,7 +6864,6 @@ dependencies = [ name = "vortex-pco" version = "0.1.0" dependencies = [ - "half", "pco", "prost 0.14.1", "rstest", @@ -6995,7 +6975,6 @@ dependencies = [ name = "vortex-sequence" version = "0.1.0" dependencies = [ - "arcref", "num-traits", "prost 0.14.1", "rstest", @@ -7020,7 +6999,6 @@ dependencies = [ "num-traits", "prost 0.14.1", "rstest", - "rstest_reuse", "vortex-array", "vortex-buffer", "vortex-dtype", diff --git a/Cargo.toml b/Cargo.toml index 6663e1c248a..62a35c16925 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -121,7 +121,7 @@ moka = { version = "0.12.10", default-features = false } multiversion = "0.8.0" num-traits = "0.2.19" num_enum = { version = "0.7.3", default-features = false } -object_store = { version = "0.12.3", features = ["aws"] } +object_store = { version = "0.12.3", default-features = false } once_cell = "1.21" opentelemetry = "0.30.0" opentelemetry-otlp = "0.30.0" diff --git a/bench-vortex/Cargo.toml b/bench-vortex/Cargo.toml index 7a93397e0e7..8724cd9894e 100644 --- a/bench-vortex/Cargo.toml +++ b/bench-vortex/Cargo.toml @@ -72,7 +72,6 @@ url = { workspace = true } uuid = { workspace = true, features = ["v4"] } vortex = { workspace = true, features = [ "object_store", - "parquet", "files", "tokio", "zstd", diff --git a/encodings/decimal-byte-parts/Cargo.toml b/encodings/decimal-byte-parts/Cargo.toml index 07d6c037223..cb79a92ede9 100644 --- a/encodings/decimal-byte-parts/Cargo.toml +++ b/encodings/decimal-byte-parts/Cargo.toml @@ -17,7 +17,6 @@ version = { workspace = true } workspace = true [dependencies] -itertools = { workspace = true } num-traits = { workspace = true } prost = { workspace = true } vortex-array = { workspace = true } diff --git a/encodings/pco/Cargo.toml b/encodings/pco/Cargo.toml index f873bee78f1..9aeb29e6006 100644 --- a/encodings/pco/Cargo.toml +++ b/encodings/pco/Cargo.toml @@ -17,7 +17,6 @@ version = { workspace = true } workspace = true [dependencies] -half = { workspace = true } pco = { workspace = true } prost = { workspace = true } vortex-array = { workspace = true } diff --git a/encodings/pco/src/array.rs b/encodings/pco/src/array.rs index 1dba5925794..a0b3fcfb943 100644 --- a/encodings/pco/src/array.rs +++ b/encodings/pco/src/array.rs @@ -18,7 +18,7 @@ use vortex_array::vtable::{ }; use vortex_array::{ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, ToCanonical, vtable}; use vortex_buffer::{BufferMut, ByteBuffer, ByteBufferMut}; -use vortex_dtype::{DType, PType}; +use vortex_dtype::{DType, PType, half}; use vortex_error::{VortexError, VortexResult, vortex_bail, vortex_err}; use vortex_scalar::Scalar; diff --git a/encodings/sequence/Cargo.toml b/encodings/sequence/Cargo.toml index c73ffbc6ac3..04da6dd4efb 100644 --- a/encodings/sequence/Cargo.toml +++ b/encodings/sequence/Cargo.toml @@ -14,7 +14,6 @@ rust-version = { workspace = true } version = { workspace = true } [dependencies] -arcref = { workspace = true } num-traits = { workspace = true } prost = { workspace = true } vortex-array = { workspace = true } diff --git a/encodings/sparse/Cargo.toml b/encodings/sparse/Cargo.toml index 25f0581c409..a65ad199d35 100644 --- a/encodings/sparse/Cargo.toml +++ b/encodings/sparse/Cargo.toml @@ -20,7 +20,6 @@ workspace = true itertools = { workspace = true } num-traits = { workspace = true } prost = { workspace = true } -rstest_reuse = { workspace = true } vortex-array = { workspace = true } vortex-buffer = { workspace = true } vortex-dtype = { workspace = true } diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index 4a4e663c5d1..eb6ab360b1c 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -22,7 +22,6 @@ arcref = { workspace = true } arrow-arith = { workspace = true } arrow-array = { workspace = true, features = ["ffi"] } arrow-buffer = { workspace = true } -arrow-cast = { workspace = true } arrow-data = { workspace = true } arrow-ord = { workspace = true } arrow-schema = { workspace = true } @@ -73,6 +72,7 @@ table-display = ["dep:tabled"] test-harness = ["dep:goldenfile", "dep:rstest", "dep:rstest_reuse"] [dev-dependencies] +arrow-cast = { workspace = true } divan = { workspace = true } rstest = { workspace = true } vortex-array = { path = ".", features = ["test-harness"] } diff --git a/vortex-array/src/arrays/varbin/canonical.rs b/vortex-array/src/arrays/varbin/canonical.rs index e2fa595a185..e4f74321e5b 100644 --- a/vortex-array/src/arrays/varbin/canonical.rs +++ b/vortex-array/src/arrays/varbin/canonical.rs @@ -1,6 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::sync::Arc; + +use arrow_array::cast::AsArray; +use arrow_array::{BinaryViewArray, StringViewArray}; use arrow_schema::DataType; use vortex_dtype::DType; use vortex_error::VortexResult; @@ -17,11 +21,28 @@ impl CanonicalVTable for VarBinVTable { let nullable = dtype.is_nullable(); let array_ref = array.to_array().into_arrow_preferred()?; - let array = match dtype { - DType::Utf8(_) => arrow_cast::cast(array_ref.as_ref(), &DataType::Utf8View)?, - DType::Binary(_) => arrow_cast::cast(array_ref.as_ref(), &DataType::BinaryView)?, - _ => unreachable!("VarBinArray must have Utf8 or Binary dtype"), + let array = match (&dtype, array_ref.data_type()) { + (DType::Utf8(_), DataType::Utf8) => { + Arc::new(StringViewArray::from(array_ref.as_string::())) + as Arc + } + (DType::Utf8(_), DataType::LargeUtf8) => { + Arc::new(StringViewArray::from(array_ref.as_string::())) + as Arc + } + + (DType::Binary(_), DataType::Binary) => { + Arc::new(BinaryViewArray::from(array_ref.as_binary::())) + } + (DType::Binary(_), DataType::LargeBinary) => { + Arc::new(BinaryViewArray::from(array_ref.as_binary::())) + } + // If its already a view, no need to do anything + (DType::Binary(_), DataType::BinaryView) | (DType::Utf8(_), DataType::Utf8View) => { + array_ref + } + _ => unreachable!("VarBinArray must have Utf8 or Binary dtype, instead got: {dtype}",), }; Ok(Canonical::VarBinView( ArrayRef::from_arrow(array.as_ref(), nullable).to_varbinview()?, diff --git a/vortex-cxx/Cargo.toml b/vortex-cxx/Cargo.toml index 98d36e8ceb9..2dada4120ea 100644 --- a/vortex-cxx/Cargo.toml +++ b/vortex-cxx/Cargo.toml @@ -25,9 +25,7 @@ anyhow = { workspace = true } arrow-array = { workspace = true, features = ["ffi"] } arrow-schema = { workspace = true } cxx = "1.0" -futures = { workspace = true, features = ["thread-pool"] } paste = { workspace = true } -prost = { workspace = true } take_mut = { workspace = true } tokio = { workspace = true, features = ["rt", "rt-multi-thread", "macros"] } vortex = { workspace = true, features = ["tokio"] } diff --git a/vortex-duckdb/Cargo.toml b/vortex-duckdb/Cargo.toml index 054e7ae9c6e..0b6cead59b4 100644 --- a/vortex-duckdb/Cargo.toml +++ b/vortex-duckdb/Cargo.toml @@ -23,16 +23,14 @@ crate-type = ["staticlib", "cdylib", "rlib"] anyhow = { workspace = true } arrow-array = { workspace = true } arrow-buffer = { workspace = true } -arrow-schema = { workspace = true } bitvec = { workspace = true } -crossbeam-queue = { workspace = true } dashmap = { workspace = true } futures = { workspace = true } glob = { workspace = true } itertools = { workspace = true } log = { workspace = true } num-traits = { workspace = true } -object_store = { workspace = true } +object_store = { workspace = true, features = ["aws"] } parking_lot = { workspace = true } tempfile = { workspace = true } tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread"] } diff --git a/vortex-error/Cargo.toml b/vortex-error/Cargo.toml index c78a27e1cf7..22ad04b6cd4 100644 --- a/vortex-error/Cargo.toml +++ b/vortex-error/Cargo.toml @@ -23,7 +23,6 @@ arrow-schema = { workspace = true } flatbuffers = { workspace = true, optional = true } jiff = { workspace = true } object_store = { workspace = true, optional = true } -parquet = { workspace = true, optional = true } prost = { workspace = true, optional = true } pyo3 = { workspace = true, optional = true } serde_json = { workspace = true, optional = true } diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index c17628a4ce9..69e01799405 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -103,9 +103,6 @@ pub enum VortexError { IOError(io::Error, Box), /// A wrapper for UTF-8 conversion errors. Utf8Error(std::str::Utf8Error, Box), - /// A wrapper for errors from the Parquet library. - #[cfg(feature = "parquet")] - ParquetError(parquet::errors::ParquetError, Box), /// A wrapper for errors from the standard library when converting a slice to an array. TryFromSliceError(std::array::TryFromSliceError, Box), /// A wrapper for errors from the Object Store library. @@ -139,6 +136,11 @@ impl VortexError { pub fn with_context>(self, msg: T) -> Self { VortexError::Context(msg.into(), Box::new(self)) } + + /// Wrap an a generic error into a Vortex error + pub fn generic(err: Box) -> Self { + Self::Generic(err, Box::new(Backtrace::capture())) + } } impl Display for VortexError { @@ -200,10 +202,6 @@ impl Display for VortexError { VortexError::Utf8Error(err, backtrace) => { write!(f, "{err}\nBacktrace:\n{backtrace}") } - #[cfg(feature = "parquet")] - VortexError::ParquetError(err, backtrace) => { - write!(f, "{err}\nBacktrace:\n{backtrace}") - } VortexError::TryFromSliceError(err, backtrace) => { write!(f, "{err}\nBacktrace:\n{backtrace}") } @@ -253,19 +251,13 @@ impl Error for VortexError { VortexError::ArrowError(err, _) => Some(err), #[cfg(feature = "flatbuffers")] VortexError::FlatBuffersError(err, _) => Some(err), - VortexError::FmtError(err, _) => Some(err), VortexError::IOError(err, _) => Some(err), - VortexError::Utf8Error(err, _) => Some(err), - #[cfg(feature = "parquet")] - VortexError::ParquetError(err, _) => Some(err), - VortexError::TryFromSliceError(err, _) => Some(err), #[cfg(feature = "object_store")] VortexError::ObjectStore(err, _) => Some(err), VortexError::JiffError(err, _) => Some(err), #[cfg(feature = "tokio")] VortexError::JoinError(err, _) => Some(err), VortexError::UrlError(err, _) => Some(err), - VortexError::TryFromInt(err, _) => Some(err), #[cfg(feature = "serde")] VortexError::SerdeJsonError(err, _) => Some(err), #[cfg(feature = "prost")] @@ -486,12 +478,6 @@ impl From for VortexError { } } -impl From for VortexError { - fn from(value: fmt::Error) -> Self { - VortexError::FmtError(value, Box::new(Backtrace::capture())) - } -} - impl From for VortexError { fn from(value: io::Error) -> Self { VortexError::IOError(value, Box::new(Backtrace::capture())) @@ -504,13 +490,6 @@ impl From for VortexError { } } -#[cfg(feature = "parquet")] -impl From for VortexError { - fn from(value: parquet::errors::ParquetError) -> Self { - VortexError::ParquetError(value, Box::new(Backtrace::capture())) - } -} - impl From for VortexError { fn from(value: std::array::TryFromSliceError) -> Self { VortexError::TryFromSliceError(value, Box::new(Backtrace::capture())) diff --git a/vortex-file/Cargo.toml b/vortex-file/Cargo.toml index 0787125e38c..8882692b1dd 100644 --- a/vortex-file/Cargo.toml +++ b/vortex-file/Cargo.toml @@ -14,7 +14,6 @@ rust-version = { workspace = true } version = { workspace = true } [dependencies] -arcref = { workspace = true } async-trait = { workspace = true } bytes = { workspace = true } dashmap = { workspace = true } @@ -29,7 +28,6 @@ moka = { workspace = true, features = ["future"] } object_store = { workspace = true, optional = true } rustc-hash = { workspace = true } tokio = { workspace = true, features = ["rt"], optional = true } -tracing = { workspace = true, optional = true } # Needed to pickup the "js" feature for wasm targets from the workspace configuration uuid = { workspace = true } vortex-alp = { workspace = true } @@ -81,5 +79,5 @@ tokio = [ "vortex-layout/tokio", "vortex-scan/tokio", ] -tracing = ["dep:tracing", "vortex-io/tracing", "vortex-layout/tracing"] +tracing = ["vortex-io/tracing", "vortex-layout/tracing"] zstd = ["dep:vortex-zstd", "vortex-layout/zstd"] diff --git a/vortex-io/Cargo.toml b/vortex-io/Cargo.toml index ee9b0c9f3eb..84479e772ae 100644 --- a/vortex-io/Cargo.toml +++ b/vortex-io/Cargo.toml @@ -22,7 +22,7 @@ futures = { workspace = true, features = ["std"] } futures-util = { workspace = true } # Needed to pickup the "wasm_js" feature for wasm targets from the workspace configuration getrandom_v03 = { workspace = true } -object_store = { workspace = true, optional = true } +object_store = { workspace = true, optional = true, features = ["fs"] } pin-project = { workspace = true } # this is the maximum subset of fetaures that is safe for wasm32 targets tokio = { workspace = true, features = ["io-util", "rt", "sync"] } diff --git a/vortex-layout/Cargo.toml b/vortex-layout/Cargo.toml index f8d574c5077..582dc2315b1 100644 --- a/vortex-layout/Cargo.toml +++ b/vortex-layout/Cargo.toml @@ -30,7 +30,6 @@ paste = { workspace = true } pco = { workspace = true } pin-project-lite = { workspace = true } prost = { workspace = true } -roaring = { workspace = true, optional = true } tokio = { workspace = true, features = ["rt"], optional = true } tracing = { workspace = true, optional = true } vortex-array = { workspace = true } @@ -55,7 +54,6 @@ tokio = { workspace = true, features = ["rt", "macros"] } vortex-layout = { path = ".", features = ["tokio", "test-harness"] } [features] -roaring = ["dep:roaring"] test-harness = [] tokio = ["dep:tokio", "vortex-error/tokio", "tracing"] zstd = ["dep:vortex-zstd"] diff --git a/vortex-scan/Cargo.toml b/vortex-scan/Cargo.toml index 62d6f5ce384..dfd3addec49 100644 --- a/vortex-scan/Cargo.toml +++ b/vortex-scan/Cargo.toml @@ -42,7 +42,7 @@ vortex-layout = { workspace = true, features = ["test-harness"] } [features] default = [] -roaring = ["dep:roaring", "vortex-layout/roaring"] +roaring = ["dep:roaring"] tokio = ["dep:tokio"] [lints] diff --git a/vortex-tui/Cargo.toml b/vortex-tui/Cargo.toml index 45110b4a1b8..7415f132ba0 100644 --- a/vortex-tui/Cargo.toml +++ b/vortex-tui/Cargo.toml @@ -26,7 +26,7 @@ parquet = { workspace = true, features = ["arrow", "async"] } ratatui = { workspace = true } taffy = { workspace = true } tokio = { workspace = true, features = ["rt-multi-thread"] } -vortex = { workspace = true, features = ["parquet", "tokio"] } +vortex = { workspace = true, features = ["tokio"] } [lints] workspace = true diff --git a/vortex-tui/src/convert.rs b/vortex-tui/src/convert.rs index 8539bbb1a31..74266e7f546 100644 --- a/vortex-tui/src/convert.rs +++ b/vortex-tui/src/convert.rs @@ -15,7 +15,7 @@ use vortex::arrow::FromArrowArray; use vortex::compressor::CompactCompressor; use vortex::dtype::DType; use vortex::dtype::arrow::FromArrowType; -use vortex::error::{VortexError, VortexExpect, VortexResult}; +use vortex::error::{VortexError, VortexExpect}; use vortex::file::{VortexWriteOptions, WriteStrategyBuilder}; use vortex::stream::ArrayStreamAdapter; @@ -41,7 +41,7 @@ pub struct Flags { const BATCH_SIZE: usize = 8192; /// Convert Parquet files to Vortex. -pub async fn exec_convert(flags: Flags) -> VortexResult<()> { +pub async fn exec_convert(flags: Flags) -> anyhow::Result<()> { let input_path = flags.file.clone(); if !flags.quiet { eprintln!("Converting input Parquet file: {}", input_path.display()); @@ -60,7 +60,7 @@ pub async fn exec_convert(flags: Flags) -> VortexResult<()> { .build()? .map(|record_batch| { record_batch - .map_err(VortexError::from) + .map_err(|e| VortexError::generic(e.into())) .map(|rb| ArrayRef::from_arrow(rb, false)) }) .boxed(); diff --git a/vortex/Cargo.toml b/vortex/Cargo.toml index 99db80abca7..16d8257decd 100644 --- a/vortex/Cargo.toml +++ b/vortex/Cargo.toml @@ -48,6 +48,7 @@ vortex-zigzag = { workspace = true } vortex-zstd = { workspace = true, optional = true } [dev-dependencies] +anyhow = { workspace = true } arrow-array = { workspace = true } divan = { workspace = true } itertools = { workspace = true } @@ -55,14 +56,13 @@ mimalloc = { workspace = true } parquet = { workspace = true } rand = { workspace = true } tokio = { workspace = true, features = ["full"] } -vortex = { path = ".", features = ["parquet", "tokio"] } +vortex = { path = ".", features = ["tokio"] } [features] default = ["files", "zstd"] files = ["dep:vortex-file"] memmap2 = ["vortex-buffer/memmap2"] object_store = ["vortex-file/object_store"] -parquet = ["vortex-error/parquet"] python = ["vortex-error/python"] tokio = ["vortex-file/tokio", "vortex-scan/tokio"] tracing = ["vortex-file/tracing", "vortex-layout/tracing"] diff --git a/vortex/src/lib.rs b/vortex/src/lib.rs index 573f36b64ef..039d952a4d1 100644 --- a/vortex/src/lib.rs +++ b/vortex/src/lib.rs @@ -54,7 +54,7 @@ mod test { use crate as vortex; #[test] - fn convert() -> VortexResult<()> { + fn convert() -> anyhow::Result<()> { // [convert] use std::fs::File;