diff --git a/rust/nardl/Cargo.lock b/rust/nardl/Cargo.lock index c1d642f..2fabfed 100644 --- a/rust/nardl/Cargo.lock +++ b/rust/nardl/Cargo.lock @@ -26,6 +26,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + [[package]] name = "anstream" version = "0.6.14" @@ -81,14 +96,26 @@ version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd066d0b4ef8ecb03a55319dc13aa6910616d0f44008a045bb1835af830abff5" dependencies = [ + "brotli", + "bzip2", "futures-core", "memchr", "pin-project-lite", "tokio", + "xz2", "zstd", "zstd-safe", ] +[[package]] +name = "async-tempfile" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acb90d9834a8015109afc79f1f548223a0614edcbab62fb35b62d4b707e975e7" +dependencies = [ + "tokio", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -143,6 +170,27 @@ dependencies = [ "generic-array", ] +[[package]] +name = "brotli" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "4.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "bumpalo" version = "3.16.0" @@ -155,6 +203,27 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "camino" version = "1.1.7" @@ -448,6 +517,23 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +[[package]] +name = "futures-io" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.69", +] + [[package]] name = "futures-sink" version = "0.3.30" @@ -467,9 +553,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ "futures-core", + "futures-io", + "futures-macro", + "futures-sink", "futures-task", + "memchr", "pin-project-lite", "pin-utils", + "slab", ] [[package]] @@ -739,6 +830,17 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "memchr" version = "2.7.4" @@ -775,6 +877,8 @@ dependencies = [ name = "nardl" version = "0.1.0" dependencies = [ + "async-compression", + "async-tempfile", "base64", "color-eyre", "ed25519-dalek", @@ -783,7 +887,10 @@ dependencies = [ "narinfo", "nix-nar", "reqwest", + "sha2", "tokio", + "tokio-stream", + "tokio-util", ] [[package]] @@ -1094,6 +1201,7 @@ dependencies = [ "url", "wasm-bindgen", "wasm-bindgen-futures", + "wasm-streams", "web-sys", "webpki-roots", "winreg", @@ -1448,6 +1556,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-stream" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "tokio-util" version = "0.7.11" @@ -1690,6 +1809,19 @@ version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +[[package]] +name = "wasm-streams" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "web-sys" version = "0.3.69" @@ -1880,6 +2012,15 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "zeroize" version = "1.8.1" diff --git a/rust/nardl/Cargo.toml b/rust/nardl/Cargo.toml index 6b6616d..925ea57 100644 --- a/rust/nardl/Cargo.toml +++ b/rust/nardl/Cargo.toml @@ -4,6 +4,8 @@ version = "0.1.0" edition = "2021" [dependencies] +async-compression = { version = "0.4.11", features = [ "tokio", "xz", "bzip2", "zstd", "brotli" ] } +async-tempfile = "0.6.0" base64 = "0.22.1" color-eyre = "0.6" ed25519-dalek = "2.1.1" @@ -11,5 +13,8 @@ env_logger = "0.11" log = "0.4" narinfo = "1.0.1" nix-nar = "0.3.0" -reqwest = { version = "0.12.5", features = ["http2", "rustls-tls", "zstd"], default-features = false } +reqwest = { version = "0.12.5", features = ["http2", "stream", "rustls-tls", "zstd"], default-features = false } +sha2 = "0.10.8" tokio = { version = "1.38.0", features = ["full"] } +tokio-util = { version = "0.7.11", features = ["io-util"] } +tokio-stream = "0.1.15" diff --git a/rust/nardl/src/main.rs b/rust/nardl/src/main.rs index 64b9bea..cf45724 100644 --- a/rust/nardl/src/main.rs +++ b/rust/nardl/src/main.rs @@ -5,16 +5,17 @@ use base64::{ }, Engine, }; -use color_eyre::{ - eyre::{self, Context, OptionExt}, - owo_colors::styles::ReversedDisplay, -}; +use color_eyre::eyre::{self, Context, OptionExt}; use ed25519_dalek::{Signature, VerifyingKey}; -use reqwest::ResponseBuilderExt; +use sha2::Digest; use std::{ collections::{HashMap, HashSet}, path::Path, + pin::Pin, }; +use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeekExt}; +use tokio_stream::StreamExt; +use tokio_util::io::StreamReader; // oh no i'm too lazy to add arguments for now const SUBSTITUERS: [&str; 2] = ["https://cache.nixos.org", "https://cache.lix.systems"]; @@ -26,6 +27,8 @@ const KEYS: [&str; 2] = [ const OUTPUT: &str = "/nix/store/n50jk09x9hshwx1lh6k3qaiygc7yxbv9-lix-2.90.0-rc1"; +const TEST_PREFIX: &str = "/tmp/nardl"; + #[tokio::main] async fn main() -> eyre::Result<()> { env_logger::init(); @@ -65,8 +68,19 @@ async fn main() -> eyre::Result<()> { }) .collect::>>()?; + let temp_dir = async_tempfile::TempDir::new().await?; + let client = reqwest::Client::new(); + // We need a no-compression client so that we don't try to decompress + // twice if someone sends a Content-Encoding header from their cache + let client_no_compression = reqwest::Client::builder() + .no_gzip() + .no_brotli() + .no_zstd() + .no_deflate() + .build()?; + // TODO: Handle priority here for cache in cache_base_urls.iter() { let info_str = client @@ -89,6 +103,7 @@ async fn main() -> eyre::Result<()> { let mut outputs_done = HashSet::new(); loop { + // Basic variables let Some(output) = outputs_remaining.pop() else { break; }; @@ -100,9 +115,11 @@ async fn main() -> eyre::Result<()> { .split_once("-") .ok_or_else(|| eyre::eyre!("Invalid output name {}", output))?; - let narinfo_text = get_narinfo(client.clone(), cache_base_urls.as_slice(), &fingerprint) - .await - .wrap_err_with(|| format!("While processing {}", output))?; + // Parse, verify, and handle narinfo + let (narinfo_text, cache_base_url) = + get_narinfo(client.clone(), cache_base_urls.as_slice(), &fingerprint) + .await + .wrap_err_with(|| format!("While processing {}", output))?; let narinfo_parsed = narinfo::NarInfo::parse(&narinfo_text).unwrap(); verify_signature(&narinfo_parsed, &trusted_keys, store_dir, &output) @@ -114,17 +131,70 @@ async fn main() -> eyre::Result<()> { } outputs_remaining.push(reference.to_string()); } + + // Download nar + let nar_url = cache_base_url.join(narinfo_parsed.url)?; + log::trace!("Found nar url {}", nar_url); + + let response = client_no_compression + .get(nar_url) + .send() + .await? + .error_for_status()?; + + let reader = StreamReader::new(response.bytes_stream().map(|result| { + result.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err)) + })); + + let mut decompressed_stream: Pin> = + if let Some(compression) = narinfo_parsed.compression { + match compression.as_ref() { + "zstd" => Box::pin(async_compression::tokio::bufread::ZstdDecoder::new(reader)), + "xz" => Box::pin(async_compression::tokio::bufread::XzDecoder::new(reader)), + "bzip2" => Box::pin(async_compression::tokio::bufread::BzDecoder::new(reader)), + "br" => Box::pin(async_compression::tokio::bufread::BrotliDecoder::new( + reader, + )), + unknown => eyre::bail!("Unknown compression {} in {}", unknown, output), + } + } else { + Box::pin(reader) + }; + + let mut out_file = tokio::fs::OpenOptions::new() + .read(true) + .write(true) + .truncate(true) + .create(true) + .open(temp_dir.as_ref().join("temp.nar")) + .await?; + tokio::io::copy(&mut decompressed_stream, &mut out_file).await?; + + out_file.seek(std::io::SeekFrom::Start(0)).await?; + + let mut buf = [0u8; 1024]; + let mut hasher = sha2::Sha256::new(); + loop { + let num_read = out_file.read(&mut buf).await?; + if num_read == 0 { + break; + } + hasher.update(&buf[0..num_read]); + } + + let found_hash = hasher.finalize(); + log::trace!("Got hash {:?}", found_hash); } Ok(()) } #[must_use] -async fn get_narinfo( +async fn get_narinfo<'a>( client: reqwest::Client, - cache_base_urls: &[reqwest::Url], + cache_base_urls: &'a [reqwest::Url], fingerprint: &str, -) -> eyre::Result { +) -> eyre::Result<(String, &'a reqwest::Url)> { for cache in cache_base_urls.iter() { let response = client .get(cache.join(&format!("{}.narinfo", fingerprint))?) @@ -134,7 +204,13 @@ async fn get_narinfo( continue; } - return response.text().await.wrap_err("Could not download narinfo"); + return Ok(( + response + .text() + .await + .wrap_err("Could not download narinfo")?, + cache, + )); } eyre::bail!("No cache has fingerprint {}", fingerprint); @@ -159,18 +235,26 @@ fn verify_signature( .wrap_err("Invalid signature length")?, ); + let expected_out_path = store_dir + .join(output_name) + .to_str() + .ok_or_eyre("Path not valid UTF-8")? + .to_owned(); + + if expected_out_path != info.store_path { + eyre::bail!("narinfo describes path we weren't expecting"); + } + // no one documents it, but this is all that's actually signed: // https://git.lix.systems/lix-project/lix/src/commit/d461cc1d7b2f489c3886f147166ba5b5e0e37541/src/libstore/path-info.cc#L25 let fingerprint = format!( "1;{};{};{};{}", - store_dir - .join(output_name) - .to_str() - .ok_or_eyre("Path not valid UTF-8")?, + expected_out_path, info.nar_hash, info.nar_size, info.references .iter() + // our narinfo parser sucks and returns empty strings. i should write a new one .filter(|reference| !reference.is_empty()) .map(|reference| store_dir .join(reference.as_ref())