1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
//! IO utilities.

use base64::{
    Engine,
    engine::general_purpose::{URL_SAFE_NO_PAD as base64_engine},
};
use flate2::Compression;
use flate2::read::ZlibDecoder;
use flate2::write::ZlibEncoder;
use std::ffi::OsStr;
use std::io::{Read, Write};
use std::path::{Path, PathBuf};
use tempfile::NamedTempFile;

use crate::error::Error;

/// Abstracts a file system.
pub trait FileSystem {
    /// File that calculates the hash of its contents.
    type HashedFileOut: HashedFileOut;
    /// File whose contents can be verified with the hash.
    type HashedFileIn: HashedFileIn;

    /// Creates a file that calculates the hash of its contents.
    fn create_hashed_file(&self) -> Result<Self::HashedFileOut, Error>;

    /// Creates a hashed file in a given directory.
    fn create_hashed_file_in(
        &self,
        path: impl AsRef<str>,
    ) -> Result<Self::HashedFileOut, Error>;

    /// Opens a file whose contents can be verified with a hash.
    fn open_hashed_file(
        &self,
        path: impl AsRef<str>,
    ) -> Result<Self::HashedFileIn, Error>;

    /// Creates a compressed file that calculates the hash of its contents.
    fn create_compressed_hashed_file(
        &self,
    ) -> Result<CompressedHashedFileOut<Self::HashedFileOut>, Error> {
        let file = self.create_hashed_file()?;
        Ok(CompressedHashedFileOut::new(file))
    }

    /// Creates a compressed hashed file in a given directory.
    fn create_compressed_hashed_file_in(
        &self,
        path: impl AsRef<str>,
    ) -> Result<CompressedHashedFileOut<Self::HashedFileOut>, Error> {
        let file = self.create_hashed_file_in(path)?;
        Ok(CompressedHashedFileOut::new(file))
    }

    /// Opens a compressed file whose contents can be verified with a hash.
    fn open_compressed_hashed_file(
        &self,
        path: impl AsRef<str>,
    ) -> Result<CompressedHashedFileIn<Self::HashedFileIn>, Error> {
        let file = self.open_hashed_file(path)?;
        Ok(CompressedHashedFileIn::new(file))
    }
}

/// File whose name will be the hash of its contents.
pub trait HashedFileOut: Write {
    /// Persists the file.
    ///
    /// Finishes the calculation of the hash and persists the file.
    /// You should flush the stream before calling this function.
    ///
    /// Returns the encoded hash value that is supposed to be a URS-safe Base64
    /// encoded SHA256 digest.
    fn persist(self, extension: impl AsRef<str>) -> Result<String, Error>;
}

/// File whose name is the hash of its contents.
pub trait HashedFileIn: Read {
    /// Verifies the file.
    ///
    /// Finishes the calculation of the hash and verifies the file.
    /// You should call this function after the entire file has been read.
    ///
    /// File name is supposed to be a Base64 encoded URL-safe SHA256 digest.
    fn verify(self) -> Result<(), Error>;
}

/// Compressed file that calculates the hash of its contents.
pub struct CompressedHashedFileOut<W>
where
    W: std::io::Write,
{
    encoder: ZlibEncoder<W>,
}

impl<W> CompressedHashedFileOut<W>
where
    W: std::io::Write,
{
    /// Writes compressed data to a given [`Write`].
    pub fn new(w: W) -> Self {
        Self {
            encoder: ZlibEncoder::new(w, Compression::default()),
        }
    }
}

impl<W> Write for CompressedHashedFileOut<W>
where
    W: std::io::Write,
{
    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
        self.encoder.write(buf)
    }

    fn flush(&mut self) -> std::io::Result<()> {
        self.encoder.flush()
    }
}

impl<W> HashedFileOut for CompressedHashedFileOut<W>
where
    W: HashedFileOut
{
    fn persist(self, extension: impl AsRef<str>) -> Result<String, Error> {
        self.encoder.finish()?.persist(extension)
    }
}

/// Compressed file whose contents can be verified with a hash.
pub struct CompressedHashedFileIn<R>
where
    R: std::io::Read,
{
    decoder: ZlibDecoder<R>,
}

impl<R> CompressedHashedFileIn<R>
where
    R: std::io::Read,
{
    /// Reads compressed data from a given [`Read`].
    pub fn new(r: R) -> Self {
        Self {
            decoder: ZlibDecoder::new(r),
        }
    }
}

impl<R> Read for CompressedHashedFileIn<R>
where
    R: std::io::Read,
{
    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
        self.decoder.read(buf)
    }
}

impl<R> HashedFileIn for CompressedHashedFileIn<R>
where
    R: HashedFileIn,
{
    fn verify(self) -> Result<(), Error> {
        self.decoder.into_inner().verify()
    }
}

/// File system uses the local file system.
pub struct LocalFileSystem {
    // Base path.
    base_path: PathBuf,
}

impl LocalFileSystem {
    /// Creates a local file system working under a given base path.
    pub fn new(base_path: impl AsRef<Path>) -> Self {
        Self {
            base_path: base_path.as_ref().to_path_buf(),
        }
    }
}

impl FileSystem for LocalFileSystem {
    type HashedFileOut = LocalHashedFileOut;
    type HashedFileIn = LocalHashedFileIn;

    fn create_hashed_file(&self) -> Result<Self::HashedFileOut, Error> {
        LocalHashedFileOut::create(self.base_path.clone())
    }

    fn create_hashed_file_in(
        &self,
        path: impl AsRef<str>,
    ) -> Result<Self::HashedFileOut, Error> {
        LocalHashedFileOut::create(self.base_path.join(path.as_ref()))
    }

    fn open_hashed_file(
        &self,
        path: impl AsRef<str>,
    ) -> Result<Self::HashedFileIn, Error> {
        LocalHashedFileIn::open(self.base_path.join(path.as_ref()))
    }
}

/// Writable file in the local file system.
///
/// Created as a temporary file and renamed to the hash of its contents.
pub struct LocalHashedFileOut {
    // Temporary file.
    tempfile: NamedTempFile,
    // Persisted path.
    base_path: PathBuf,
    // Context to calculate an SHA-256 digest.
    context: ring::digest::Context,
}

impl LocalHashedFileOut {
    /// Creates a temporary file to be persisted under a given path.
    fn create(base_path: PathBuf) -> Result<Self, Error> {
        let tempfile = NamedTempFile::new()?;
        Ok(LocalHashedFileOut {
            tempfile,
            base_path,
            context: ring::digest::Context::new(&ring::digest::SHA256),
        })
    }
}

impl Write for LocalHashedFileOut {
    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
        self.context.update(buf);
        self.tempfile.write(buf)
    }

    fn flush(&mut self) -> std::io::Result<()> {
        self.tempfile.flush()
    }
}

impl HashedFileOut for LocalHashedFileOut {
    fn persist(mut self, extension: impl AsRef<str>) -> Result<String, Error> {
        self.flush()?;
        if !self.base_path.exists() {
            std::fs::create_dir_all(&self.base_path)?;
        }
        let hash = self.context.finish();
        let hash = base64_engine.encode(&hash);
        let path = self.base_path
            .join(&hash)
            .with_extension(extension.as_ref());
        self.tempfile.persist(path)?;
        Ok(hash)
    }
}

/// Readable file in the local file system.
pub struct LocalHashedFileIn {
    file: std::fs::File,
    path: PathBuf,
    // Context to calculate an SHA-256 digest.
    context: ring::digest::Context,
}

impl LocalHashedFileIn {
    /// Opens a file whose name is the hash of its contents.
    fn open(path: PathBuf) -> Result<Self, Error> {
        let file = std::fs::File::open(&path)?;
        Ok(LocalHashedFileIn {
            file,
            path,
            context: ring::digest::Context::new(&ring::digest::SHA256),
        })
    }
}

impl Read for LocalHashedFileIn {
    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
        let n = self.file.read(buf)?;
        self.context.update(&buf[..n]);
        Ok(n)
    }
}

impl HashedFileIn for LocalHashedFileIn {
    fn verify(self) -> Result<(), Error> {
        let hash = self.context.finish();
        let hash = base64_engine.encode(&hash);
        if hash.as_str() == self.path.file_stem().unwrap_or(OsStr::new("")) {
            Ok(())
        } else {
            Err(Error::VerificationFailure(format!(
                "Expected hash {:?}, but got {}",
                self.path.file_stem(),
                hash,
            )))
        }
    }
}