Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: spiraldb/alp
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 46353d480e73869b03f47d38a9d1b9cabf80859b
Choose a base ref
..
head repository: spiraldb/alp
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: eba1e17375384dccb9c98bdbe9468677247302c1
Choose a head ref
Showing with 87 additions and 39 deletions.
  1. +1 −0 CHANGELOG.md
  2. +30 −14 Cargo.lock
  3. +1 −0 Cargo.toml
  4. +55 −25 src/alp/mod.rs
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Other

- Port over fixes from vortex repo ([#28](https://github.com/spiraldb/alp/pull/28))
- Add ci ([#25](https://github.com/spiraldb/alp/pull/25))
- *(deps)* lock file maintenance ([#24](https://github.com/spiraldb/alp/pull/24))
- *(deps)* lock file maintenance ([#22](https://github.com/spiraldb/alp/pull/22))
44 changes: 30 additions & 14 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@ edition = "2021"

[dependencies]
fastlanes = "0.1"
itertools = "0.14.0"
num-traits = "0.2.19"
serde = { version = "1", optional = true, features = ["derive"] }

80 changes: 55 additions & 25 deletions src/alp/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use itertools::Itertools;
use num_traits::{CheckedSub, Float, PrimInt, ToPrimitive};
use std::fmt::{Display, Formatter};
use std::mem::size_of;

use num_traits::{CheckedSub, Float, PrimInt, ToPrimitive};

const SAMPLE_SIZE: usize = 32;

#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
@@ -46,18 +46,16 @@ pub fn encode_single<F: ALPFloat>(value: F, exponents: Exponents) -> Result<F::A

/// Decodes an integer value to its matching floating point representation given the same exponents.
pub fn decode_single<F: ALPFloat>(encoded: F::ALPInt, exponents: Exponents) -> F {
F::from_int(encoded) * F::F10[exponents.f as usize] * F::IF10[exponents.e as usize]
F::decode_single(encoded, exponents)
}

/// Encodes a single value, it might not round-trip back it its original value
/// # Safety
///
/// The returned value may not decode back to the original value.
#[inline(always)]
pub unsafe fn encode_single_unchecked<F: ALPFloat>(value: F, exponents: Exponents) -> F::ALPInt {
(value * F::F10[exponents.e as usize] * F::IF10[exponents.f as usize])
.fast_round()
.as_int()
pub fn encode_single_unchecked<F: ALPFloat>(value: F, exponents: Exponents) -> F::ALPInt {
F::encode_single_unchecked(value, exponents)
}

pub trait ALPFloat: private::Sealed + Float + Display + 'static {
@@ -81,16 +79,18 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
/// Convert from the integer type back to the float type using `as`.
fn from_int(n: Self::ALPInt) -> Self;

fn is_eq(self, other: Self) -> bool;

fn find_best_exponents(values: &[Self]) -> Exponents {
let mut best_exp = Exponents { e: 0, f: 0 };
let mut best_nbytes: usize = usize::MAX;

let sample: Option<Vec<Self>> = (values.len() > SAMPLE_SIZE).then(|| {
let sample = (values.len() > SAMPLE_SIZE).then(|| {
values
.iter()
.step_by(values.len() / SAMPLE_SIZE)
.cloned()
.collect()
.collect_vec()
});

for e in (0..Self::MAX_EXPONENT).rev() {
@@ -115,12 +115,10 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {

#[inline]
fn estimate_encoded_size(encoded: &[Self::ALPInt], patches: &[Self]) -> usize {
let minmax = encoded.iter().fold(None, |minmax, next| {
let (min, max) = minmax.unwrap_or((next, next));

Some((min.min(next), max.max(next)))
});
let bits_per_encoded = minmax
let bits_per_encoded = encoded
.iter()
.minmax()
.into_option()
// estimating bits per encoded value assuming frame-of-reference + bitpacking-without-patches
.and_then(|(min, max)| max.checked_sub(min))
.and_then(|range_size: <Self as ALPFloat>::ALPInt| range_size.to_u64())
@@ -168,11 +166,23 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
(exp, encoded_output, patch_indices, patch_values)
}

fn encode_above(value: Self, exponents: Exponents) -> Self::ALPInt {
(value * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
.ceil()
.as_int()
}

fn encode_below(value: Self, exponents: Exponents) -> Self::ALPInt {
(value * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
.floor()
.as_int()
}

#[inline]
fn encode_single(value: Self, exponents: Exponents) -> Result<Self::ALPInt, Self> {
let encoded = unsafe { Self::encode_single_unchecked(value, exponents) };
let encoded = Self::encode_single_unchecked(value, exponents);
let decoded = Self::decode_single(encoded, exponents);
if decoded == value {
if decoded.is_eq(value) {
return Ok(encoded);
}
Err(value)
@@ -183,11 +193,9 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
Self::from_int(encoded) * Self::F10[exponents.f as usize] * Self::IF10[exponents.e as usize]
}

/// # Safety
///
/// The returned value may not decode back to the original value.
/// Encodes a single value, it might not round-trip back it its original value
#[inline(always)]
unsafe fn encode_single_unchecked(value: Self, exponents: Exponents) -> Self::ALPInt {
fn encode_single_unchecked(value: Self, exponents: Exponents) -> Self::ALPInt {
(value * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
.fast_round()
.as_int()
@@ -209,10 +217,10 @@ fn encode_chunk_unchecked<T: ALPFloat>(

// encode the chunk, counting the number of patches
let mut chunk_patch_count = 0;
encoded_output.extend(chunk.iter().map(|v| {
let encoded = unsafe { T::encode_single_unchecked(*v, exp) };
encoded_output.extend(chunk.iter().map(|&v| {
let encoded = encode_single_unchecked(v, exp);
let decoded = T::decode_single(encoded, exp);
let neq = (decoded != *v) as usize;
let neq = !decoded.is_eq(v) as usize;
chunk_patch_count += neq;
encoded
}));
@@ -234,7 +242,7 @@ fn encode_chunk_unchecked<T: ALPFloat>(
// write() is only safe to call more than once because the values are primitive (i.e., Drop is a no-op)
patch_indices_mut[chunk_patch_index].write(i as u64);
patch_values_mut[chunk_patch_index].write(chunk[i - num_prev_encoded]);
chunk_patch_index += (decoded != chunk[i - num_prev_encoded]) as usize;
chunk_patch_index += !decoded.is_eq(chunk[i - num_prev_encoded]) as usize;
}
assert_eq!(chunk_patch_index, chunk_patch_count);
unsafe {
@@ -309,6 +317,10 @@ impl ALPFloat for f32 {
fn from_int(n: Self::ALPInt) -> Self {
n as _
}

fn is_eq(self, other: Self) -> bool {
self.to_bits() == other.to_bits()
}
}

impl ALPFloat for f64 {
@@ -380,4 +392,22 @@ impl ALPFloat for f64 {
fn from_int(n: Self::ALPInt) -> Self {
n as _
}

fn is_eq(self, other: Self) -> bool {
self.to_bits() == other.to_bits()
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn non_finite_numbers() {
let original = vec![0.0f32, -0.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY];
let (_, encoded, patch_idx, _) = encode(&original, None);

assert_eq!(patch_idx, vec![1, 2, 3, 4]);
assert_eq!(encoded, vec![0, 0, 0, 0, 0]);
}
}