1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
//! Defines a stored database.

use core::borrow::Borrow;
use core::cell::{OnceCell, Ref, RefCell, RefMut};
use core::hash::Hash;
use core::num::NonZeroUsize;
use std::collections::hash_map::{Entry as HashMapEntry};
use uuid::Uuid;

use crate::error::Error;
use crate::io::{FileSystem, HashedFileIn};
use crate::kmeans::Scalar;
use crate::linalg::{dot, subtract};
use crate::nbest::{NBestByKey, TakeNBestByKey};
use crate::protos::database::{
    AttributesLog as ProtosAttributesLog,
    Database as ProtosDatabase,
    Partition as ProtosPartition,
    VectorSet as ProtosVectorSet,
};
use crate::protos::{Deserialize, read_message};
use crate::slice::AsSlice;
use crate::vector::BlockVectorSet;

use super::{AttributeTable, AttributeValue, Attributes};

/// Extension of a Protocol Buffers file.
pub const PROTOBUF_EXTENSION: &str = "binpb";

/// Capability of loading a database.
///
/// Supposed to be specifalized for a specific [`Database`].
pub trait LoadDatabase<T, FS> {
    /// Loads a database.
    fn load_database<P>(fs: FS, path: P) -> Result<Database<T, FS>, Error>
    where
        P: AsRef<str>;
}

/// Stored database.
pub struct Database<T, FS> {
    fs: FS,
    vector_size: usize,
    num_partitions: usize,
    num_divisions: usize,
    num_codes: usize,
    partition_ids: Vec<String>,
    partitions: RefCell<Vec<Option<Partition<T>>>>,
    partition_centroids_id: String,
    partition_centroids: OnceCell<BlockVectorSet<T>>,
    codebook_ids: Vec<String>,
    codebooks: RefCell<Option<Vec<BlockVectorSet<T>>>>,
    attributes_log_ids: Vec<String>,
    attributes_log_load_flags: RefCell<Vec<bool>>,
    attribute_names: Vec<String>,
    attribute_table: RefCell<Option<AttributeTable>>,
}

impl<T, FS> Database<T, FS>
where
    FS: FileSystem,
{
    /// Returns the vector size.
    pub fn vector_size(&self) -> usize {
        self.vector_size
    }

    /// Returns the number of partitions.
    pub fn num_partitions(&self) -> usize {
        self.num_partitions
    }

    /// Returns the number of subvector divisions.
    pub fn num_divisions(&self) -> usize {
        self.num_divisions
    }

    /// Returns the number of codes in each codebook.
    pub fn num_codes(&self) -> usize {
        self.num_codes
    }

    /// Returns the subvector size.
    pub fn subvector_size(&self) -> usize {
        self.vector_size / self.num_divisions
    }

    /// Returns the ID of a partition.
    ///
    /// `None` if `index` ≥ `num_partitions`.
    pub fn get_partition_id(&self, index: usize) -> Option<&String> {
        self.partition_ids.get(index)
    }

    /// Returns a code vector in a specified division.
    ///
    /// `None` if `index` ≥ `num_divisons`.
    pub fn get_codebook_id(&self, index: usize) -> Option<&String> {
        self.codebook_ids.get(index)
    }
}

impl<T, FS> Database<T, FS>
where
    FS: FileSystem,
    Self: LoadPartition<T>,
{
    /// Returns an attribute value of a given vector.
    ///
    /// The first call to this function will take longer because it loads all
    /// the attributes.
    /// If you want to get attributes of your query results, please use
    /// [`QueryResult::get_attribute`] instead.
    ///
    /// `None` if the vector exists but no value is associated with `key`.
    ///
    /// Fails if no vector is associated with `vector_id`.
    pub fn get_attribute<K>(
        &self,
        vector_id: &Uuid,
        key: &K,
    ) -> Result<Option<AttributeValueRef>, Error>
    where
        String: Borrow<K>,
        K: Hash + Eq + ?Sized,
    {
        if self.attribute_table.borrow().is_none() {
            self.load_attribute_table()?;
        }
        self.get_attribute_internal(vector_id, key)
    }

    // Returns an attribute value of a given vector in a specific partition.
    fn get_attribute_in_partition<K>(
        &self,
        partition_index: usize,
        vector_id: &Uuid,
        key: &K,
    ) -> Result<Option<AttributeValueRef>, Error>
    where
        String: Borrow<K>,
        K: Hash + Eq + ?Sized,
    {
        self.load_attributes_log(partition_index)?;
        self.get_attribute_internal(vector_id, key)
    }

    fn get_attribute_internal<K>(
        &self,
        vector_id: &Uuid,
        key: &K,
    ) -> Result<Option<AttributeValueRef>, Error>
    where
        String: Borrow<K>,
        K: Hash + Eq + ?Sized,
    {
        let attribute_table = Ref::filter_map(
            self.attribute_table.borrow(),
            |tbl| tbl.as_ref(),
        ).expect("attribute table must be loaded");
        let attributes = Ref::filter_map(
            attribute_table,
            |tbl| tbl.get(vector_id),
        ).or(Err(Error::InvalidArgs(
            format!("no such vector ID: {}", vector_id),
        )))?;
        match Ref::filter_map(attributes, |attrs| attrs.get(key)) {
            Ok(value) => Ok(Some(value)),
            Err(_) => Ok(None),
        }
    }

    fn load_attribute_table(&self) -> Result<(), Error> {
        for pi in 0..self.num_partitions() {
            self.load_attributes_log(pi)?;
        }
        Ok(())
    }

    // Loads the attributes log of a specified partition if it is not loaded
    // yet.
    //
    // This function also loads the partition to list all the vector IDs in
    // the partition.
    fn load_attributes_log(&self, partition_index: usize) -> Result<(), Error> {
        if self.attributes_log_load_flags.borrow()[partition_index] {
            return Ok(());
        }
        let partition = self.get_partition(partition_index)?;
        let mut f = self.fs.open_compressed_hashed_file(format!(
            "attributes/{}.{}",
            self.attributes_log_ids[partition_index],
            PROTOBUF_EXTENSION,
        ))?;
        let attributes_log: ProtosAttributesLog = read_message(&mut f)?;
        if attributes_log.partition_id != self.partition_ids[partition_index] {
            return Err(Error::InvalidData(format!(
                "inconsistent partition IDs: {} vs {}",
                attributes_log.partition_id,
                self.partition_ids[partition_index],
            )));
        }
        if self.attribute_table.borrow().is_none() {
            self.attribute_table.replace(Some(AttributeTable::new()));
        }
        let mut attribute_table = RefMut::filter_map(
            self.attribute_table.borrow_mut(),
            |tbl| tbl.as_mut(),
        ).expect("attribute table must exist");
        for (i, entry) in attributes_log.entries.into_iter().enumerate() {
            let attribute_name = self.attribute_names
                .get(entry.name_index as usize)
                .ok_or(Error::InvalidData(format!(
                    "attribute name index out of bounds: {}",
                    entry.name_index,
                )))?;
            let vector_id = entry.vector_id
                .into_option()
                .ok_or(Error::InvalidData(format!(
                    "attributes log[{}, {}]: missing vector ID",
                    partition_index,
                    i,
                )))?
                .deserialize()?;
            let value = entry.value
                .into_option()
                .ok_or(Error::InvalidData(format!(
                    "attributes log[{}, {}]: missing value",
                    partition_index,
                    i,
                )))?
                .deserialize()?;
            match attribute_table.entry(vector_id) {
                HashMapEntry::Occupied(slot) => {
                    match slot.into_mut().entry(attribute_name.clone()) {
                        HashMapEntry::Occupied(slot) => {
                            *slot.into_mut() = value;
                        },
                        HashMapEntry::Vacant(slot) => {
                            slot.insert(value);
                        },
                    };
                },
                HashMapEntry::Vacant(slot) => {
                    slot.insert(Attributes::from([
                        (attribute_name.clone(), value),
                    ]));
                },
            };
        }
        // defaults to empty attributes so that
        // get_attribute won't fail for an existing vector without attributes.
        for vector_id in partition.vector_ids.iter() {
            attribute_table
                .entry(vector_id.clone())
                .or_insert_with(Attributes::new);
        }
        self.attributes_log_load_flags.borrow_mut()[partition_index] = true;
        Ok(())
    }

    // Obtains a specified partition.
    //
    // Lazily loads the partition if it is not loaded yet.
    //
    // Fails if:
    // - `index` exceeds the number of partitions
    // - there is any problem on the partition data
    fn get_partition(
        &self,
        index: usize,
    ) -> Result<PartitionRef<'_, T>, Error> {
        if index >= self.num_partitions() {
            return Err(Error::InvalidArgs(format!(
                "partition index out of bounds: {}",
                index,
            )));
        }
        if self.partitions.borrow()[index].is_none() {
            self.partitions.borrow_mut()[index] =
                Some(self.load_partition(index)?);
        }
        let partition =
            Ref:: filter_map(
                self.partitions.borrow(),
                |partitions| partitions[index].as_ref(),
            )
            .or(Err(Error::InvalidData(
                "partition must be loaded".to_string(),
            )))
            .unwrap();
        Ok(partition)
    }
}

// Reference type of a partition.
type PartitionRef<'a, T> = Ref<'a, Partition<T>>;

/// Reference type of an attribute value.
///
/// You should drop this as soon as possible to avoid panics by multiple
/// borrowing.
pub type AttributeValueRef<'a> = Ref<'a, AttributeValue>;

impl<T, FS> Database<T, FS>
where
    T: Scalar,
    FS: FileSystem,
    Self: LoadPartition<T> + LoadCodebook<T> + LoadPartitionCentroids<T>,
{
    /// Queries k-nearest neighbors (k-NN) of a given vector.
    ///
    /// The first call to this function will take longer because it lazily
    /// loads partition centroids, and codebooks.
    pub fn query<'a, V>(
        &'a self,
        v: &V,
        k: NonZeroUsize,
        nprobe: NonZeroUsize,
    ) -> Result<Vec<QueryResult<'a, T, FS>>, Error>
    where
        V: AsSlice<T> + ?Sized,
    {
        self.query_with_events(v, k, nprobe, |_| {})
    }

    /// Queries k-nearest neighbors (k-NN) of a given vector.
    ///
    /// The first call to this function will take longer because it lazily
    /// loads partition centroids, and codebooks.
    pub fn query_with_events<'a, V, EventHandler>(
        &'a self,
        v: &V,
        k: NonZeroUsize,
        nprobe: NonZeroUsize,
        mut event: EventHandler,
    ) -> Result<Vec<QueryResult<'a, T, FS>>, Error>
    where
        V: AsSlice<T> + ?Sized,
        EventHandler: FnMut(QueryEvent) -> (),
    {
        event(QueryEvent::StartingQueryInitialization);
        if self.partition_centroids.get().is_none() {
            // lazily loads partition centroids
            self.partition_centroids
                .set(self.load_partition_centroids()?)
                .unwrap();
        }
        if self.codebooks.borrow().is_none() {
            // loads codebooks if not loaded yet.
            let mut codebooks: Vec<BlockVectorSet<T>> =
                Vec::with_capacity(self.num_divisions());
            for di in 0..self.num_divisions() {
                codebooks.push(self.load_codebook(di)?);
            }
            self.codebooks.replace(Some(codebooks));
        }
        event(QueryEvent::FinishedQueryInitialization);
        event(QueryEvent::StartingPartitionSelection);
        let v = v.as_slice();
        let queries = self.query_partitions(v, k, nprobe)?;
        event(QueryEvent::FinishedPartitionSelection);
        let all_results: Vec<Vec<QueryResult<'a, T, FS>>> = queries
            .into_iter()
            .map(|query| {
                event(QueryEvent::StartingPartitionQuery(
                    query.partition_index,
                ));
                let results = query.execute();
                if results.is_ok() {
                    event(QueryEvent::FinishedPartitionQuery(
                        query.partition_index,
                    ));
                }
                results
            })
            .collect::<Result<Vec<_>, Error>>()?;
        event(QueryEvent::StartingResultSelection);
        let mut all_results: Vec<QueryResult<'a, T, FS>> = all_results
            .into_iter()
            .flatten()
            .n_best_by_key(k.get(), |r| r.squared_distance)
            .into();
        all_results.sort_by(|lhs, rhs| {
            lhs.squared_distance.partial_cmp(&rhs.squared_distance).unwrap()
        });
        event(QueryEvent::FinishedResultSelection);
        Ok(all_results)
    }

    // Queries partitions closest to a given vector.
    //
    // Panics if the partition centroids are not loaded.
    fn query_partitions<'a>(
        &'a self,
        v: &[T],
        k: NonZeroUsize,
        nprobe: NonZeroUsize,
    ) -> Result<Vec<PartitionQuery<'a, T, FS>>, Error> {
        let nprobe = nprobe.get();
        let k = k.get();
        let num_partitions = self.num_partitions();
        if nprobe > num_partitions {
            return Err(Error::InvalidArgs(format!(
                "nprobe {} exceeds the number of partitions {}",
                nprobe,
                num_partitions,
            )));
        }
        let partition_centroids = self.partition_centroids.get()
            .expect("partition centroids must be loaded");
        // localizes vectors and calculates distances
        let mut distances: NBestByKey<(usize, Vec<T>, T), T, _> =
            NBestByKey::new(nprobe, |(_, _, distance)| *distance);
        for pi in 0..num_partitions {
            let mut localized: Vec<T> = Vec::with_capacity(self.vector_size());
            unsafe {
                localized.set_len(self.vector_size());
            }
            let centroid = partition_centroids.get(pi);
            subtract(v, &centroid, &mut localized[..]);
            let distance = dot(&localized[..], &localized[..]);
            distances.push((pi, localized, distance));
        }
        // chooses `nprobes` shortest distances.
        distances.sort_by(|lhs, rhs| lhs.2.partial_cmp(&rhs.2).unwrap());
        // makes queries.
        let queries = distances
            .into_iter()
            .map(|(pi, localized, _)| PartitionQuery {
                db: self,
                codebooks: Ref::map(
                    self.codebooks.borrow(),
                    |cb| cb.as_ref().unwrap(),
                ),
                partition_index: pi,
                localized,
                k,
            })
            .collect();
        Ok(queries)
    }
}

/// Partition.
///
/// Bears the centroid element type `T`, but the centroid is not retained
/// because the database manages centroids.
#[derive(Clone)]
pub struct Partition<T> {
    _t: std::marker::PhantomData<T>,
    encoded_vectors: BlockVectorSet<u32>,
    vector_ids: Vec<Uuid>,
}

impl<T> Partition<T> {
    /// Returns the number of vectors in the partition.
    pub fn num_vectors(&self) -> usize {
        self.encoded_vectors.len()
    }

    /// Returns a specified encoded vector.
    ///
    /// `None` if `idnex` ≥ `num_vectors`.
    pub fn get_encoded_vector(&self, index: usize) -> Option<&[u32]> {
        if index < self.encoded_vectors.len() {
            Some(self.encoded_vectors.get(index))
        } else {
            None
        }
    }

    /// Returns the ID of a specified vector.
    ///
    /// `None` if `index` ≥ `num_vectors`.
    pub fn get_vector_id(&self, index: usize) -> Option<&Uuid> {
        self.vector_ids.get(index)
    }
}

/// Capability of loading a partition.
///
/// Supposed to be specialized for a specific [`Database`].
pub trait LoadPartition<T> {
    /// Loads a partition at a given index.
    ///
    /// `None` if `index` is out of the bounds.
    fn load_partition(&self, index: usize) -> Result<Partition<T>, Error>;
}

/// Capability of loading a codebook.
///
/// Supposed to be specialized for a specific [`Database`].
pub trait LoadCodebook<T> {
    /// Loads a codebook at a given index.
    ///
    /// Fails if `index` is out of the bounds.
    fn load_codebook(&self, index: usize) -> Result<BlockVectorSet<T>, Error>;
}

/// Capability of loading partition centroids.
///
/// Supposed to be specialized for a specific [`Database`].
pub trait LoadPartitionCentroids<T> {
    /// Loads partition centroids.
    ///
    /// Fails if:
    /// - vector size does not match
    /// - number of partitions does not match
    fn load_partition_centroids(&self) -> Result<BlockVectorSet<T>, Error>;
}

/// Events emitted while querying.
#[derive(Debug)]
pub enum QueryEvent {
    /// Starting to initialize a query.
    StartingQueryInitialization,
    /// Finished initializing a query.
    FinishedQueryInitialization,
    /// Starting to select partitions to query.
    StartingPartitionSelection,
    /// Finished selecting partitions to query.
    FinishedPartitionSelection,
    /// Starting to run a query on a specific partition.
    StartingPartitionQuery(usize),
    /// Finished running a query on a specific partition.
    FinishedPartitionQuery(usize),
    /// Starting to select k-nearest neighbors.
    StartingResultSelection,
    /// Finished selecting k-nearest neighbors.
    FinishedResultSelection,
}

/// Query in a specific partition.
struct PartitionQuery<'a, T, FS> {
    db: &'a Database<T, FS>,
    codebooks: Ref<'a, Vec<BlockVectorSet<T>>>,
    partition_index: usize,
    localized: Vec<T>, // query vector - partition centroid
    k: usize,
}

impl<'a, T, FS> PartitionQuery<'a, T, FS>
where
    T: Scalar,
    FS: FileSystem,
    Database<T, FS>: LoadPartition<T> + LoadCodebook<T>,
{
    fn execute(&self) -> Result<Vec<QueryResult<'a, T, FS>>, Error> {
        let num_divisions = self.db.num_divisions();
        let num_codes = self.db.num_codes();
        let subvector_size = self.db.subvector_size();
        // loads the partition
        let partition = self.db.get_partition(self.partition_index)?;
        // calculates the distance table
        let mut distance_table: Vec<T> =
            Vec::with_capacity(num_divisions * num_codes);
        let mut vector_buf: Vec<T> = Vec::with_capacity(subvector_size);
        unsafe {
            vector_buf.set_len(subvector_size);
        }
        for di in 0..num_divisions {
            let from = di * subvector_size;
            let to = from + subvector_size;
            let subv = &self.localized[from..to];
            let codebook = &self.codebooks[di];
            for ci in 0..num_codes {
                let code_vector = codebook.get(ci);
                let d = &mut vector_buf[..];
                subtract(subv, code_vector, d);
                distance_table.push(dot(d, d));
            }
        }
        // approximates the squared distances to vectors in the partition
        let num_vectors = partition.num_vectors();
        let mut results: NBestByKey<QueryResult<'a, T, FS>, T, _> =
            NBestByKey::new(
                self.k,
                |i: &QueryResult<'a, T, FS>| i.squared_distance,
            );
        for vi in 0..num_vectors {
            let encoded_vector = partition.get_encoded_vector(vi).unwrap();
            let mut distance = T::zero();
            for di in 0..num_divisions {
                let ci = encoded_vector[di] as usize;
                distance += distance_table[di * num_codes + ci];
            }
            results.push(QueryResult {
                db: self.db,
                partition_index: self.partition_index,
                vector_id: partition.get_vector_id(vi).unwrap().clone(),
                vector_index: vi,
                squared_distance: distance,
            });
        }
        Ok(results.into())
    }
}

/// Query result.
#[derive(Clone)]
pub struct QueryResult<'a, T, FS> {
    db: &'a Database<T, FS>,
    /// Partition index.
    pub partition_index: usize,
    /// Vector ID. Must be unique across the entire database.
    pub vector_id: Uuid,
    /// Vector index. Local index in the partition.
    pub vector_index: usize,
    /// Approximate squared distance.
    pub squared_distance: T,
}

impl<'a, T, FS> QueryResult<'a, T, FS>
where
    T: Scalar,
    FS: FileSystem,
    Database<T, FS>:
        LoadPartition<T> + LoadCodebook<T> + LoadPartitionCentroids<T>,
{
    /// Returns an attribute value of the vector corresponding to the result.
    ///
    /// The first call of this function on a result belonging to a partition
    /// will take longer because it will load the attributes of the partition.
    pub fn get_attribute<K>(
        &self,
        key: &K,
    ) -> Result<Option<AttributeValueRef>, Error>
    where
        String: Borrow<K>,
        K: Hash + Eq + ?Sized,
    {
        self.db.get_attribute_in_partition(
            self.partition_index,
            &self.vector_id,
            key,
        )
    }
}

mod f32impl {
    use super::*;

    impl<FS> LoadDatabase<f32, FS> for Database<f32, FS>
    where
        FS: FileSystem,
    {
        /// Loads a database.
        ///
        /// Fails if:
        /// - `vector_size` is zero
        /// - `num_divisions` is zero
        /// - `num_partitions` is zero
        /// - `num_codes` is zero
        /// - `vector_size` is not a multiple of `num_divisions`
        /// - `num_partitions` and `partitions_refs.len()` do not match
        /// - `vector_size` and centroid size do not match
        /// - `num_divisions` and `codebook_refs.len()` do not match
        fn load_database<P>(fs: FS, path: P) -> Result<Database<f32, FS>, Error>
        where
            P: AsRef<str>,
        {
            let mut f = fs.open_compressed_hashed_file(path)?;
            let db: ProtosDatabase = read_message(&mut f)?;
            f.verify()?;
            let vector_size = db.vector_size as usize;
            let num_partitions = db.num_partitions as usize;
            let num_divisions = db.num_divisions as usize;
            let num_codes = db.num_codes as usize;
            if vector_size == 0 {
                return Err(Error::InvalidData(format!("vector_size is zero")));
            }
            if num_divisions == 0 {
                return Err(Error::InvalidData(format!("num_divisions is zero")));
            }
            if num_partitions == 0 {
                return Err(Error::InvalidData(format!("num_partitions is zero")));
            }
            if num_codes == 0 {
                return Err(Error::InvalidData(format!("num_codes is zero")));
            }
            if vector_size % num_divisions != 0 {
                return Err(Error::InvalidData(format!(
                    "vector_size {} is not multiple of num_divisions {}",
                    vector_size,
                    num_divisions,
                )));
            }
            if num_partitions != db.partition_ids.len() {
                return Err(Error::InvalidData(format!(
                    "num_partitions {} and partition_ids.len() {} do not match",
                    db.num_partitions,
                    db.partition_ids.len(),
                )));
            }
            if num_divisions != db.codebook_ids.len() {
                return Err(Error::InvalidData(format!(
                    "num_divisions {} and codebook_ids.len() {} do not match",
                    db.num_divisions,
                    db.codebook_ids.len(),
                )));
            }
            let db = Database {
                fs,
                vector_size,
                num_partitions,
                num_divisions,
                num_codes,
                partition_ids: db.partition_ids,
                partitions: RefCell::new(vec![None; num_partitions]),
                partition_centroids_id: db.partition_centroids_id,
                partition_centroids: OnceCell::new(),
                codebook_ids: db.codebook_ids,
                codebooks: RefCell::new(None),
                attributes_log_ids: db.attributes_log_ids,
                attributes_log_load_flags:
                    RefCell::new(vec![false; num_partitions]),
                attribute_names: db.attribute_names,
                attribute_table: RefCell::new(None),
            };
            Ok(db)
        }
    }

    impl<FS> LoadPartitionCentroids<f32> for Database<f32, FS>
    where
        FS: FileSystem,
    {
        fn load_partition_centroids(
            &self,
        ) -> Result<BlockVectorSet<f32>, Error> {
            let mut f = self.fs.open_hashed_file(format!(
                "partitions/{}.{}",
                self.partition_centroids_id,
                PROTOBUF_EXTENSION,
            ))?;
            let partition_centroids: ProtosVectorSet = read_message(&mut f)?;
            let partition_centroids: BlockVectorSet<f32> =
                partition_centroids.deserialize()?;
            if partition_centroids.vector_size() != self.vector_size() {
                return Err(Error::InvalidData(format!(
                    "partition centroids vector size mismatch: expected {}, got {}",
                    self.vector_size(),
                    partition_centroids.vector_size(),
                )));
            }
            if partition_centroids.len() != self.num_partitions() {
                return Err(Error::InvalidData(format!(
                    "partition centroids data length mismatch: expected {}, got {}",
                    self.num_partitions(),
                    partition_centroids.len(),
                )));
            }
            Ok(partition_centroids)
        }
    }

    impl<FS> LoadCodebook<f32> for Database<f32, FS>
    where
        FS: FileSystem,
    {
        /// Loads a codebook.
        ///
        /// Fails if:
        /// - `index` exceeds the number of codebooks.
        /// - codebook file cannot be loaded.
        /// - vector size does not match the subvector size of the database.
        /// - number of vectors does not match that of the database.
        fn load_codebook(
            &self,
            index: usize,
        ) -> Result<BlockVectorSet<f32>, Error>
        where
            FS: FileSystem,
        {
            if index >= self.num_divisions() {
                return Err(Error::InvalidArgs(format!(
                    "index {} exceeds the number of codebooks {}",
                    index,
                    self.num_divisions(),
                )));
            }
            let mut f = self.fs.open_hashed_file(format!(
                "codebooks/{}.{}",
                self.get_codebook_id(index).unwrap(),
                PROTOBUF_EXTENSION,
            ))?;
            let codebook: ProtosVectorSet = read_message(&mut f)?;
            f.verify()?;
            let codebook: BlockVectorSet<f32> = codebook.deserialize()?;
            if codebook.vector_size() != self.subvector_size() {
                return Err(Error::InvalidData(format!(
                    "vector_size is inconsistent: expected {} but got {}",
                    self.subvector_size(),
                    codebook.vector_size(),
                )));
            }
            if codebook.len() != self.num_codes() {
                return Err(Error::InvalidData(format!(
                    "number of codes is inconsistent: expected {} but got {}",
                    self.num_codes(),
                    codebook.len(),
                )));
            }
            Ok(codebook)
        }
    }

    impl<FS> LoadPartition<f32> for Database<f32, FS>
    where
        FS: FileSystem,
    {
        /// Loads a partition.
        ///
        /// Loads a Protocol Buffers message (`p`) from the file system.
        ///
        /// Fails if:
        /// - `index` exceeds the number of partitions.
        /// - `self.vector_size` and `p.vector_size` do not match
        /// - `self.num_divisions` and `p.num_divisions` do not match
        /// - `p.num_vectors` and `p.encoded_vectors.len()` do not match
        /// - `p.num_vectors` and `p.vector_ids.len()` do not match
        /// - `p.num_divisions` and encoded vector length do not match
        fn load_partition(
            &self,
            index: usize,
        ) -> Result<Partition<f32>, Error> {
            if index >= self.num_partitions {
                return Err(Error::InvalidArgs(format!(
                    "index {} exceeds the number of partitions {}",
                    index,
                    self.num_partitions,
                )));
            }
            let mut f = self.fs.open_compressed_hashed_file(format!(
                "partitions/{}.{}",
                self.get_partition_id(index).unwrap(),
                PROTOBUF_EXTENSION,
            ))?;
            let partition: ProtosPartition = read_message(&mut f)?;
            f.verify()?;
            let vector_size = partition.vector_size as usize;
            let num_divisions = partition.num_divisions as usize;
            let encoded_vectors: BlockVectorSet<u32> = partition.encoded_vectors
                .into_option()
                .ok_or(Error::InvalidData(
                    "missing encoded vectors".to_string(),
                ))?
                .deserialize()?;
            if vector_size != self.vector_size() {
                return Err(Error::InvalidData(format!(
                    "vector_size {} and partition.vector_size {} do not match",
                    self.vector_size(),
                    vector_size,
                )));
            }
            if num_divisions != self.num_divisions() {
                return Err(Error::InvalidData(format!(
                    "num_divisions {} and partition.num_divisions {} do not match",
                    self.num_divisions(),
                    num_divisions,
                )));
            }
            if encoded_vectors.len() != partition.vector_ids.len() {
                return Err(Error::InvalidData(format!(
                    "number of vector IDs is inconsistent: exptected {} but got {}",
                    encoded_vectors.len(),
                    partition.vector_ids.len(),
                )));
            }
            let vector_ids: Vec<Uuid> = partition.vector_ids
                .into_iter()
                .map(|id| id.deserialize().unwrap())
                .collect();
            Ok(Partition {
                _t: std::marker::PhantomData,
                encoded_vectors,
                vector_ids,
            })
        }
    }
}