spiraldb
Showing with 87 additions and 39 deletions.

+1 −0 CHANGELOG.md

+30 −14 Cargo.lock

+1 −0 Cargo.toml

+55 −25 src/alp/mod.rs
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Other
 
+- Port over fixes from vortex repo ([#28](https://github.com/spiraldb/alp/pull/28))
 - Add ci ([#25](https://github.com/spiraldb/alp/pull/25))
 - *(deps)* lock file maintenance ([#24](https://github.com/spiraldb/alp/pull/24))
 - *(deps)* lock file maintenance ([#22](https://github.com/spiraldb/alp/pull/22))

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -8,6 +8,7 @@ edition = "2021"
 
 [dependencies]
 fastlanes = "0.1"
+itertools = "0.14.0"
 num-traits = "0.2.19"
 serde = { version = "1", optional = true, features = ["derive"] }
 

diff --git a/src/alp/mod.rs b/src/alp/mod.rs
@@ -1,8 +1,8 @@
+use itertools::Itertools;
+use num_traits::{CheckedSub, Float, PrimInt, ToPrimitive};
 use std::fmt::{Display, Formatter};
 use std::mem::size_of;
 
-use num_traits::{CheckedSub, Float, PrimInt, ToPrimitive};
-
 const SAMPLE_SIZE: usize = 32;
 
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
@@ -46,18 +46,16 @@ pub fn encode_single<F: ALPFloat>(value: F, exponents: Exponents) -> Result<F::A
 
 /// Decodes an integer value to its matching floating point representation given the same exponents.
 pub fn decode_single<F: ALPFloat>(encoded: F::ALPInt, exponents: Exponents) -> F {
-    F::from_int(encoded) * F::F10[exponents.f as usize] * F::IF10[exponents.e as usize]
+    F::decode_single(encoded, exponents)
 }
 
 /// Encodes a single value, it might not round-trip back it its original value
 /// # Safety
 ///
 /// The returned value may not decode back to the original value.
 #[inline(always)]
-pub unsafe fn encode_single_unchecked<F: ALPFloat>(value: F, exponents: Exponents) -> F::ALPInt {
-    (value * F::F10[exponents.e as usize] * F::IF10[exponents.f as usize])
-        .fast_round()
-        .as_int()
+pub fn encode_single_unchecked<F: ALPFloat>(value: F, exponents: Exponents) -> F::ALPInt {
+    F::encode_single_unchecked(value, exponents)
 }
 
 pub trait ALPFloat: private::Sealed + Float + Display + 'static {
@@ -81,16 +79,18 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
     /// Convert from the integer type back to the float type using `as`.
     fn from_int(n: Self::ALPInt) -> Self;
 
+    fn is_eq(self, other: Self) -> bool;
+
     fn find_best_exponents(values: &[Self]) -> Exponents {
         let mut best_exp = Exponents { e: 0, f: 0 };
         let mut best_nbytes: usize = usize::MAX;
 
-        let sample: Option<Vec<Self>> = (values.len() > SAMPLE_SIZE).then(|| {
+        let sample = (values.len() > SAMPLE_SIZE).then(|| {
             values
                 .iter()
                 .step_by(values.len() / SAMPLE_SIZE)
                 .cloned()
-                .collect()
+                .collect_vec()
         });
 
         for e in (0..Self::MAX_EXPONENT).rev() {
@@ -115,12 +115,10 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
 
     #[inline]
     fn estimate_encoded_size(encoded: &[Self::ALPInt], patches: &[Self]) -> usize {
-        let minmax = encoded.iter().fold(None, |minmax, next| {
-            let (min, max) = minmax.unwrap_or((next, next));
-
-            Some((min.min(next), max.max(next)))
-        });
-        let bits_per_encoded = minmax
+        let bits_per_encoded = encoded
+            .iter()
+            .minmax()
+            .into_option()
             // estimating bits per encoded value assuming frame-of-reference + bitpacking-without-patches
             .and_then(|(min, max)| max.checked_sub(min))
             .and_then(|range_size: <Self as ALPFloat>::ALPInt| range_size.to_u64())
@@ -168,11 +166,23 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
         (exp, encoded_output, patch_indices, patch_values)
     }
 
+    fn encode_above(value: Self, exponents: Exponents) -> Self::ALPInt {
+        (value * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
+            .ceil()
+            .as_int()
+    }
+
+    fn encode_below(value: Self, exponents: Exponents) -> Self::ALPInt {
+        (value * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
+            .floor()
+            .as_int()
+    }
+
     #[inline]
     fn encode_single(value: Self, exponents: Exponents) -> Result<Self::ALPInt, Self> {
-        let encoded = unsafe { Self::encode_single_unchecked(value, exponents) };
+        let encoded = Self::encode_single_unchecked(value, exponents);
         let decoded = Self::decode_single(encoded, exponents);
-        if decoded == value {
+        if decoded.is_eq(value) {
             return Ok(encoded);
         }
         Err(value)
@@ -183,11 +193,9 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
         Self::from_int(encoded) * Self::F10[exponents.f as usize] * Self::IF10[exponents.e as usize]
     }
 
-    /// # Safety
-    ///
-    /// The returned value may not decode back to the original value.
+    /// Encodes a single value, it might not round-trip back it its original value
     #[inline(always)]
-    unsafe fn encode_single_unchecked(value: Self, exponents: Exponents) -> Self::ALPInt {
+    fn encode_single_unchecked(value: Self, exponents: Exponents) -> Self::ALPInt {
         (value * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
             .fast_round()
             .as_int()
@@ -209,10 +217,10 @@ fn encode_chunk_unchecked<T: ALPFloat>(
 
     // encode the chunk, counting the number of patches
     let mut chunk_patch_count = 0;
-    encoded_output.extend(chunk.iter().map(|v| {
-        let encoded = unsafe { T::encode_single_unchecked(*v, exp) };
+    encoded_output.extend(chunk.iter().map(|&v| {
+        let encoded = encode_single_unchecked(v, exp);
         let decoded = T::decode_single(encoded, exp);
-        let neq = (decoded != *v) as usize;
+        let neq = !decoded.is_eq(v) as usize;
         chunk_patch_count += neq;
         encoded
     }));
@@ -234,7 +242,7 @@ fn encode_chunk_unchecked<T: ALPFloat>(
             // write() is only safe to call more than once because the values are primitive (i.e., Drop is a no-op)
             patch_indices_mut[chunk_patch_index].write(i as u64);
             patch_values_mut[chunk_patch_index].write(chunk[i - num_prev_encoded]);
-            chunk_patch_index += (decoded != chunk[i - num_prev_encoded]) as usize;
+            chunk_patch_index += !decoded.is_eq(chunk[i - num_prev_encoded]) as usize;
         }
         assert_eq!(chunk_patch_index, chunk_patch_count);
         unsafe {
@@ -309,6 +317,10 @@ impl ALPFloat for f32 {
     fn from_int(n: Self::ALPInt) -> Self {
         n as _
     }
+
+    fn is_eq(self, other: Self) -> bool {
+        self.to_bits() == other.to_bits()
+    }
 }
 
 impl ALPFloat for f64 {
@@ -380,4 +392,22 @@ impl ALPFloat for f64 {
     fn from_int(n: Self::ALPInt) -> Self {
         n as _
     }
+
+    fn is_eq(self, other: Self) -> bool {
+        self.to_bits() == other.to_bits()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn non_finite_numbers() {
+        let original = vec![0.0f32, -0.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY];
+        let (_, encoded, patch_idx, _) = encode(&original, None);
+
+        assert_eq!(patch_idx, vec![1, 2, 3, 4]);
+        assert_eq!(encoded, vec![0, 0, 0, 0, 0]);
+    }
 }