Skip to content

Commit

Permalink
Merge pull request #181 from dandi/gh-160
Browse files Browse the repository at this point in the history
Wrap URLs in a newtype that enforces HTTP-ness
  • Loading branch information
jwodder committed Aug 8, 2024
2 parents 668252f + f86c9f3 commit 5872641
Show file tree
Hide file tree
Showing 12 changed files with 284 additions and 219 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ In Development
- Reduced the sizes of a number of streams & futures
- Added doc comments to much of the code
- Return 502 status when a backend returns an invalid response
- Require `--api-url` (and other URLs retrieved from APIs) to be HTTP(S)

v0.4.0 (2024-07-09)
-------------------
Expand Down
33 changes: 16 additions & 17 deletions src/dandi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pub(crate) use self::types::*;
pub(crate) use self::version_id::*;
use crate::consts::S3CLIENT_CACHE_SIZE;
use crate::dav::ErrorClass;
use crate::httputil::{urljoin_slashed, BuildClientError, Client, HttpError};
use crate::httputil::{BuildClientError, Client, HttpError, HttpUrl};
use crate::paths::{ParsePureDirPathError, PureDirPath, PurePath};
use crate::s3::{
BucketSpec, GetBucketRegionError, PrefixedS3Client, S3Client, S3Error, S3Location,
Expand All @@ -20,7 +20,6 @@ use serde::de::DeserializeOwned;
use smartstring::alias::CompactString;
use std::sync::Arc;
use thiserror::Error;
use url::Url;

/// A client for fetching data about Dandisets, their versions, and their
/// assets from a DANDI Archive instance
Expand All @@ -30,7 +29,7 @@ pub(crate) struct DandiClient {
inner: Client,

/// The base API URL of the Archive instance
api_url: Url,
api_url: HttpUrl,

/// A cache of [`S3Client`] instances that are used for listing Zarr
/// entries on the Archive's S3 bucket.
Expand All @@ -51,7 +50,7 @@ impl DandiClient {
/// # Errors
///
/// Returns an error if construction of the inner `reqwest::Client` fails
pub(crate) fn new(api_url: Url) -> Result<Self, BuildClientError> {
pub(crate) fn new(api_url: HttpUrl) -> Result<Self, BuildClientError> {
let inner = Client::new()?;
let s3clients = CacheBuilder::new(S3CLIENT_CACHE_SIZE)
.name("s3clients")
Expand All @@ -65,24 +64,26 @@ impl DandiClient {

/// Return the URL formed by appending the given path segments and a
/// trailing slash to the path of the API base URL
fn get_url<I>(&self, segments: I) -> Url
fn get_url<I>(&self, segments: I) -> HttpUrl
where
I: IntoIterator,
I::Item: AsRef<str>,
{
urljoin_slashed(&self.api_url, segments)
let mut url = self.api_url.clone();
url.extend(segments).ensure_dirpath();
url
}

/// Perform a `GET` request to the given URL and return the deserialized
/// JSON response body
async fn get<T: DeserializeOwned>(&self, url: Url) -> Result<T, DandiError> {
async fn get<T: DeserializeOwned>(&self, url: HttpUrl) -> Result<T, DandiError> {
self.inner.get_json(url).await.map_err(Into::into)
}

/// Return a [`futures_util::Stream`] that makes paginated `GET` requests
/// to the given URL and its subsequent pages and yields a `Result<T,
/// DandiError>` value for each item deserialized from the responses
fn paginate<T: DeserializeOwned + 'static>(&self, url: Url) -> Paginate<T> {
fn paginate<T: DeserializeOwned + 'static>(&self, url: HttpUrl) -> Paginate<T> {
Paginate::new(self, url)
}

Expand Down Expand Up @@ -158,7 +159,7 @@ impl DandiClient {

/// Return the URL for the metadata for the given version of the given
/// Dandiset
fn version_metadata_url(&self, dandiset_id: &DandisetId, version_id: &VersionId) -> Url {
fn version_metadata_url(&self, dandiset_id: &DandisetId, version_id: &VersionId) -> HttpUrl {
self.get_url([
"dandisets",
dandiset_id.as_ref(),
Expand Down Expand Up @@ -396,7 +397,7 @@ impl<'a> VersionEndpoint<'a> {
}

/// Return the URL for the version's metadata
fn metadata_url(&self) -> Url {
fn metadata_url(&self) -> HttpUrl {
self.client
.version_metadata_url(&self.dandiset_id, &self.version_id)
}
Expand All @@ -421,7 +422,7 @@ impl<'a> VersionEndpoint<'a> {

/// Return the URL for the metadata of the asset in this version with the
/// given asset ID
fn asset_metadata_url(&self, asset_id: &str) -> Url {
fn asset_metadata_url(&self, asset_id: &str) -> HttpUrl {
self.client.get_url([
"dandisets",
self.dandiset_id.as_ref(),
Expand All @@ -447,10 +448,9 @@ impl<'a> VersionEndpoint<'a> {
self.version_id.as_ref(),
"assets",
]);
url.query_pairs_mut()
.append_pair("path", path.as_ref())
.append_pair("metadata", "1")
.append_pair("order", "path");
url.append_query_param("path", path.as_ref());
url.append_query_param("metadata", "1");
url.append_query_param("order", "path");
let dirpath = path.to_dir_path();
let mut stream = self.client.paginate::<RawAsset>(url.clone());
while let Some(asset) = stream.try_next().await? {
Expand Down Expand Up @@ -480,8 +480,7 @@ impl<'a> VersionEndpoint<'a> {
"paths",
]);
if let Some(path) = path {
url.query_pairs_mut()
.append_pair("path_prefix", path.as_ref());
url.append_query_param("path_prefix", path.as_ref());
}
self.client.paginate(url)
}
Expand Down
16 changes: 10 additions & 6 deletions src/dandi/streams.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
use super::types::Page;
use super::{DandiClient, DandiError};
use crate::httputil::{Client, HttpError};
use crate::httputil::{Client, HttpError, HttpUrl};
use futures_util::{future::BoxFuture, FutureExt, Stream};
use pin_project::pin_project;
use serde::de::DeserializeOwned;
use serde::{de::DeserializeOwned, Deserialize};
use std::pin::Pin;
use std::task::{ready, Context, Poll};
use url::Url;

// Implementing paginate() as a manually-implemented Stream instead of via
// async_stream lets us save about 4700 bytes on dandidav's top-level Futures.
Expand All @@ -21,13 +19,13 @@ enum PaginateState<T> {
Requesting(BoxFuture<'static, Result<Page<T>, HttpError>>),
Yielding {
results: std::vec::IntoIter<T>,
next: Option<Url>,
next: Option<HttpUrl>,
},
Done,
}

impl<T> Paginate<T> {
pub(super) fn new(client: &DandiClient, url: Url) -> Self {
pub(super) fn new(client: &DandiClient, url: HttpUrl) -> Self {
Paginate {
client: client.inner.clone(),
state: PaginateState::Yielding {
Expand Down Expand Up @@ -78,3 +76,9 @@ where
}
}
}

#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
struct Page<T> {
next: Option<HttpUrl>,
results: Vec<T>,
}
30 changes: 12 additions & 18 deletions src/dandi/types.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
use super::{DandisetId, VersionId};
use crate::httputil::HttpUrl;
use crate::paths::{PureDirPath, PurePath};
use crate::s3::{PrefixedS3Client, S3Entry, S3Folder, S3Location, S3Object};
use serde::Deserialize;
use thiserror::Error;
use time::OffsetDateTime;
use url::Url;

#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
pub(super) struct Page<T> {
pub(super) next: Option<Url>,
pub(super) results: Vec<T>,
}

#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
pub(super) struct RawDandiset {
Expand Down Expand Up @@ -67,7 +61,7 @@ pub(super) struct RawDandisetVersion {
}

impl RawDandisetVersion {
pub(super) fn with_metadata_url(self, metadata_url: Url) -> DandisetVersion {
pub(super) fn with_metadata_url(self, metadata_url: HttpUrl) -> DandisetVersion {
DandisetVersion {
version: self.version,
size: self.size,
Expand All @@ -84,7 +78,7 @@ pub(crate) struct DandisetVersion {
pub(crate) size: i64,
pub(crate) created: OffsetDateTime,
pub(crate) modified: OffsetDateTime,
pub(crate) metadata_url: Url,
pub(crate) metadata_url: HttpUrl,
}

#[derive(Clone, Debug, Eq, PartialEq)]
Expand Down Expand Up @@ -166,7 +160,7 @@ pub(crate) struct BlobAsset {
pub(crate) created: OffsetDateTime,
pub(crate) modified: OffsetDateTime,
pub(crate) metadata: AssetMetadata,
pub(crate) metadata_url: Url,
pub(crate) metadata_url: HttpUrl,
}

impl BlobAsset {
Expand All @@ -178,18 +172,18 @@ impl BlobAsset {
self.metadata.digest.dandi_etag.as_deref()
}

pub(crate) fn archive_url(&self) -> Option<&Url> {
pub(crate) fn archive_url(&self) -> Option<&HttpUrl> {
self.metadata
.content_url
.iter()
.find(|url| S3Location::parse_url(url).is_err())
.find(|url| S3Location::parse_url(url.as_url()).is_err())
}

pub(crate) fn s3_url(&self) -> Option<&Url> {
pub(crate) fn s3_url(&self) -> Option<&HttpUrl> {
self.metadata
.content_url
.iter()
.find(|url| S3Location::parse_url(url).is_ok())
.find(|url| S3Location::parse_url(url.as_url()).is_ok())
}
}

Expand All @@ -202,15 +196,15 @@ pub(crate) struct ZarrAsset {
pub(crate) created: OffsetDateTime,
pub(crate) modified: OffsetDateTime,
pub(crate) metadata: AssetMetadata,
pub(crate) metadata_url: Url,
pub(crate) metadata_url: HttpUrl,
}

impl ZarrAsset {
pub(crate) fn s3location(&self) -> Option<S3Location> {
self.metadata
.content_url
.iter()
.find_map(|url| S3Location::parse_url(url).ok())
.find_map(|url| S3Location::parse_url(url.as_url()).ok())
}

pub(crate) fn make_resource(&self, value: S3Entry) -> DandiResource {
Expand Down Expand Up @@ -246,7 +240,7 @@ impl ZarrAsset {
#[serde(rename_all = "camelCase")]
pub(crate) struct AssetMetadata {
encoding_format: Option<String>,
content_url: Vec<Url>,
content_url: Vec<HttpUrl>,
digest: AssetDigests,
}

Expand Down Expand Up @@ -374,7 +368,7 @@ pub(crate) struct ZarrEntry {
pub(crate) size: i64,
pub(crate) modified: OffsetDateTime,
pub(crate) etag: String,
pub(crate) url: Url,
pub(crate) url: HttpUrl,
}

#[derive(Clone, Debug)]
Expand Down
12 changes: 6 additions & 6 deletions src/dav/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ use super::xml::{PropValue, Property};
use super::VersionSpec;
use crate::consts::{DEFAULT_CONTENT_TYPE, YAML_CONTENT_TYPE};
use crate::dandi::*;
use crate::httputil::HttpUrl;
use crate::paths::{PureDirPath, PurePath};
use crate::zarrman::*;
use enum_dispatch::enum_dispatch;
use serde::{ser::Serializer, Serialize};
use time::OffsetDateTime;
use url::Url;

/// Trait for querying the values of WebDAV properties from WebDAV resources
///
Expand Down Expand Up @@ -282,7 +282,7 @@ pub(super) struct DavCollection {

/// A URL for retrieving the resource's associated metadata (if any) from
/// the Archive instance
pub(super) metadata_url: Option<Url>,
pub(super) metadata_url: Option<HttpUrl>,
}

impl DavCollection {
Expand Down Expand Up @@ -552,7 +552,7 @@ pub(super) struct DavItem {

/// A URL for retrieving the resource's associated metadata (if any) from
/// the Archive instance
pub(super) metadata_url: Option<Url>,
pub(super) metadata_url: Option<HttpUrl>,
}

impl DavItem {
Expand Down Expand Up @@ -727,19 +727,19 @@ pub(super) enum DavContent {
#[derive(Clone, Debug, Eq, PartialEq)]
pub(super) enum Redirect {
/// A single URL to always redirect to
Direct(Url),
Direct(HttpUrl),

/// An S3 URL and an Archive instance URL, to be selected between based on
/// whether `--prefer-s3-redirects` was supplied at program invocation
Alt { s3: Url, archive: Url },
Alt { s3: HttpUrl, archive: HttpUrl },
}

impl Redirect {
/// Resolve to a single URL.
///
/// If `prefer_s3` is `true`, `Alt` variants resolve to their `s3` field;
/// otherwise, they resolve to their `archive` field.
pub(super) fn get_url(&self, prefer_s3: bool) -> &Url {
pub(super) fn get_url(&self, prefer_s3: bool) -> &HttpUrl {
match self {
Redirect::Direct(u) => u,
Redirect::Alt { s3, archive } => {
Expand Down
11 changes: 6 additions & 5 deletions src/dav/util.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use super::VersionSpec;
use crate::consts::DAV_XML_CONTENT_TYPE;
use crate::dandi::DandisetId;
use crate::httputil::HttpUrl;
use crate::paths::PureDirPath;
use axum::{
async_trait,
Expand Down Expand Up @@ -129,14 +130,14 @@ impl AsRef<str> for Href {
}
}

impl From<url::Url> for Href {
fn from(value: url::Url) -> Href {
Href(value.into())
impl From<HttpUrl> for Href {
fn from(value: HttpUrl) -> Href {
Href(value.to_string())
}
}

impl From<&url::Url> for Href {
fn from(value: &url::Url) -> Href {
impl From<&HttpUrl> for Href {
fn from(value: &HttpUrl) -> Href {
Href(value.to_string())
}
}
Expand Down
4 changes: 3 additions & 1 deletion src/dav/xml/multistatus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ pub(crate) enum ToXmlError {
#[cfg(test)]
mod tests {
use super::*;
use crate::httputil::HttpUrl;
use indoc::indoc;
use pretty_assertions::assert_eq;

Expand Down Expand Up @@ -222,7 +223,8 @@ mod tests {
status: "HTTP/1.1 307 TEMPORARY REDIRECT".into(),
}],
location: Some(
url::Url::parse("https://www.example.com/data/quux.dat")
"https://www.example.com/data/quux.dat"
.parse::<HttpUrl>()
.unwrap()
.into(),
),
Expand Down
Loading

0 comments on commit 5872641

Please sign in to comment.