This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs-object-store.git
The following commit(s) were added to refs/heads/main by this push:
new fa40170 `Path` improvements (#546)
fa40170 is described below
commit fa4017018105167c0caebbd9333d5965c5b8a81d
Author: Ruslan Fadeev <[email protected]>
AuthorDate: Thu Dec 18 21:12:11 2025 +0300
`Path` improvements (#546)
* crate::path::Path improvements
* fn prefix -> fn parent
* test: check exact part count
* fix: impl Extend must not add / before the first part
* impl IntoIterator for &Path; add second test for impl Extend
* test: specifically cover adding one segment
* fix: broken intra-doc links
* fix: Path::parent returning None for single segment
---
src/path/mod.rs | 208 +++++++++++++++++++++++++++++++++++++++++++++++++-----
src/path/parts.rs | 36 +++++++++-
2 files changed, 225 insertions(+), 19 deletions(-)
diff --git a/src/path/mod.rs b/src/path/mod.rs
index f8affe8..e8618db 100644
--- a/src/path/mod.rs
+++ b/src/path/mod.rs
@@ -17,7 +17,6 @@
//! Path abstraction for Object Storage
-use itertools::Itertools;
use percent_encoding::percent_decode;
use std::fmt::Formatter;
#[cfg(not(target_arch = "wasm32"))]
@@ -29,9 +28,12 @@ pub const DELIMITER: &str = "/";
/// The path delimiter as a single byte
pub const DELIMITER_BYTE: u8 = DELIMITER.as_bytes()[0];
+/// The path delimiter as a single char
+pub const DELIMITER_CHAR: char = DELIMITER_BYTE as char;
+
mod parts;
-pub use parts::{InvalidPart, PathPart};
+pub use parts::{InvalidPart, PathPart, PathParts};
/// Error returned by [`Path::parse`]
#[derive(Debug, thiserror::Error)]
@@ -157,6 +159,18 @@ pub struct Path {
}
impl Path {
+ /// An empty [`Path`] that points to the root of the store, equivalent to
`Path::from("/")`.
+ ///
+ /// See also [`Path::is_root`].
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use object_store::path::Path;
+ /// assert_eq!(Path::ROOT, Path::from("/"));
+ /// ```
+ pub const ROOT: Self = Self { raw: String::new() };
+
/// Parse a string as a [`Path`], returning a [`Error`] if invalid,
/// as defined on the docstring for [`Path`]
///
@@ -255,14 +269,60 @@ impl Path {
Self::parse(decoded)
}
- /// Returns the [`PathPart`] of this [`Path`]
- pub fn parts(&self) -> impl Iterator<Item = PathPart<'_>> {
- self.raw
- .split_terminator(DELIMITER)
- .map(|s| PathPart { raw: s.into() })
+ /// Returns the number of [`PathPart`]s in this [`Path`]
+ ///
+ /// This is equivalent to calling `.parts().count()` manually.
+ ///
+ /// # Performance
+ ///
+ /// This operation is `O(n)`.
+ #[doc(alias = "len")]
+ pub fn parts_count(&self) -> usize {
+ self.raw.split_terminator(DELIMITER).count()
+ }
+
+ /// True if this [`Path`] points to the root of the store, equivalent to
`Path::from("/")`.
+ ///
+ /// See also [`Path::ROOT`].
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use object_store::path::Path;
+ /// assert!(Path::from("/").is_root());
+ /// assert!(Path::parse("").unwrap().is_root());
+ /// ```
+ pub fn is_root(&self) -> bool {
+ self.raw.is_empty()
+ }
+
+ /// Returns the [`PathPart`]s of this [`Path`]
+ ///
+ /// Equivalent to calling `.into_iter()` on a `&Path`.
+ pub fn parts(&self) -> PathParts<'_> {
+ PathParts::new(&self.raw)
+ }
+
+ /// Returns a copy of this [`Path`] with the last path segment removed
+ ///
+ /// Returns `None` if this path has zero segments.
+ pub fn parent(&self) -> Option<Self> {
+ if self.raw.is_empty() {
+ return None;
+ }
+
+ let Some((prefix, _filename)) = self.raw.rsplit_once(DELIMITER) else {
+ return Some(Self::ROOT);
+ };
+
+ Some(Self {
+ raw: prefix.to_string(),
+ })
}
/// Returns the last path segment containing the filename stored in this
[`Path`]
+ ///
+ /// Returns `None` only if this path is the root path.
pub fn filename(&self) -> Option<&str> {
match self.raw.is_empty() {
true => None,
@@ -285,16 +345,13 @@ impl Path {
/// Returns an iterator of the [`PathPart`] of this [`Path`] after `prefix`
///
- /// Returns `None` if the prefix does not match
+ /// Returns `None` if the prefix does not match.
pub fn prefix_match(&self, prefix: &Self) -> Option<impl Iterator<Item =
PathPart<'_>> + '_> {
let mut stripped = self.raw.strip_prefix(&prefix.raw)?;
if !stripped.is_empty() && !prefix.raw.is_empty() {
stripped = stripped.strip_prefix(DELIMITER)?;
}
- let iter = stripped
- .split_terminator(DELIMITER)
- .map(|x| PathPart { raw: x.into() });
- Some(iter)
+ Some(PathParts::new(stripped))
}
/// Returns true if this [`Path`] starts with `prefix`
@@ -348,13 +405,52 @@ where
I: Into<PathPart<'a>>,
{
fn from_iter<T: IntoIterator<Item = I>>(iter: T) -> Self {
- let raw = T::into_iter(iter)
- .map(|s| s.into())
- .filter(|s| !s.raw.is_empty())
- .map(|s| s.raw)
- .join(DELIMITER);
+ let mut this = Self::ROOT;
+ this.extend(iter);
+ this
+ }
+}
- Self { raw }
+/// See also [`Path::parts`]
+impl<'a> IntoIterator for &'a Path {
+ type Item = PathPart<'a>;
+ type IntoIter = PathParts<'a>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ PathParts::new(&self.raw)
+ }
+}
+
+/// [`Path`] supports appending [`PathPart`]s of one `Path` to another `Path`.
+///
+/// # Examples
+///
+/// Suppose Alice is copying Bob's file to her own user directory.
+/// We could choose the full path of the new file by taking the original
+/// absolute path, making it relative to Bob's home
+///
+/// ```rust
+/// # use object_store::path::Path;
+/// let alice_home = Path::from("Users/alice");
+/// let bob_home = Path::from("Users/bob");
+/// let bob_file = Path::from("Users/bob/documents/file.txt");
+///
+/// let mut alice_file = alice_home;
+/// alice_file.extend(bob_file.prefix_match(&bob_home).unwrap());
+///
+/// assert_eq!(alice_file, Path::from("Users/alice/documents/file.txt"));
+/// ```
+impl<'a, I: Into<PathPart<'a>>> Extend<I> for Path {
+ fn extend<T: IntoIterator<Item = I>>(&mut self, iter: T) {
+ for s in iter {
+ let s = s.into();
+ if !s.raw.is_empty() {
+ if !self.raw.is_empty() {
+ self.raw.push(DELIMITER_CHAR);
+ }
+ self.raw.push_str(&s.raw);
+ }
+ }
}
}
@@ -370,6 +466,11 @@ pub(crate) fn absolute_path_to_url(path: impl
AsRef<std::path::Path>) -> Result<
mod tests {
use super::*;
+ #[test]
+ fn delimiter_char_is_forward_slash() {
+ assert_eq!(DELIMITER_CHAR, '/');
+ }
+
#[test]
fn cloud_prefix_with_trailing_delimiter() {
// Use case: files exist in object storage named `foo/bar.json` and
@@ -469,6 +570,24 @@ mod tests {
assert_eq!(Path::default().parts().count(), 0);
}
+ #[test]
+ fn parts_count() {
+ assert_eq!(Path::ROOT.parts().count(), Path::ROOT.parts_count());
+
+ let path = path("foo/bar/baz");
+ assert_eq!(path.parts_count(), 3);
+ assert_eq!(path.parts_count(), path.parts().count());
+ }
+
+ #[test]
+ fn prefix_matches_raw_content() {
+ assert_eq!(Path::ROOT.parent(), None, "empty path must have no
prefix");
+
+ assert_eq!(path("foo").parent().unwrap(), Path::ROOT);
+ assert_eq!(path("foo/bar").parent().unwrap(), path("foo"));
+ assert_eq!(path("foo/bar/baz").parent().unwrap(), path("foo/bar"));
+ }
+
#[test]
fn prefix_matches() {
let haystack = Path::from_iter(["foo/bar", "baz%2Ftest", "something"]);
@@ -611,4 +730,57 @@ mod tests {
assert_eq!(c.extension(), None);
assert_eq!(d.extension(), Some("qux"));
}
+
+ #[test]
+ fn root_is_root() {
+ assert!(Path::ROOT.is_root());
+ assert!(Path::ROOT.parts().next().is_none());
+ }
+
+ /// Main test for `impl Extend for Path`, covers most cases.
+ #[test]
+ fn impl_extend() {
+ let mut p = Path::ROOT;
+
+ p.extend(&Path::ROOT);
+ assert_eq!(p, Path::ROOT);
+
+ p.extend(&path("foo"));
+ assert_eq!(p, path("foo"));
+
+ p.extend(&path("bar/baz"));
+ assert_eq!(p, path("foo/bar/baz"));
+
+ p.extend(&path("a/b/c"));
+ assert_eq!(p, path("foo/bar/baz/a/b/c"));
+ }
+
+ /// Test for `impl Extend for Path`, specifically covers addition of a
single segment.
+ #[test]
+ fn impl_extend_for_one_segment() {
+ let mut p = Path::ROOT;
+
+ p.extend(&path("foo"));
+ assert_eq!(p, path("foo"));
+
+ p.extend(&path("bar"));
+ assert_eq!(p, path("foo/bar"));
+
+ p.extend(&path("baz"));
+ assert_eq!(p, path("foo/bar/baz"));
+ }
+
+ #[test]
+ fn parent() {
+ assert_eq!(Path::ROOT.parent(), None);
+ assert_eq!(path("foo").parent(), Some(Path::ROOT));
+ assert_eq!(path("foo/bar").parent(), Some(path("foo")));
+ assert_eq!(path("foo/bar/baz").parent(), Some(path("foo/bar")));
+ }
+
+ /// Construct a [`Path`] from a raw `&str`, or panic trying.
+ #[track_caller]
+ fn path(raw: &str) -> Path {
+ Path::parse(raw).unwrap()
+ }
}
diff --git a/src/path/parts.rs b/src/path/parts.rs
index 2170510..5628d3f 100644
--- a/src/path/parts.rs
+++ b/src/path/parts.rs
@@ -16,7 +16,11 @@
// under the License.
use percent_encoding::{AsciiSet, CONTROLS, percent_encode};
-use std::borrow::Cow;
+use std::{
+ borrow::Cow,
+ iter::{self, FusedIterator},
+ str::SplitTerminator,
+};
use crate::path::DELIMITER_BYTE;
@@ -131,6 +135,36 @@ impl AsRef<str> for PathPart<'_> {
}
}
+/// See [`Path::parts`](super::Path::parts)
+#[derive(Debug, Clone)]
+pub struct PathParts<'a>(iter::Map<SplitTerminator<'a, char>, fn(&str) ->
PathPart<'_>>);
+
+impl<'a> PathParts<'a> {
+ /// Create an iterator over the parts of the provided raw
[`Path`](super::Path).
+ pub(super) fn new(raw: &'a str) -> Self {
+ Self(
+ raw.split_terminator(super::DELIMITER_CHAR)
+ .map(|s| PathPart { raw: s.into() }),
+ )
+ }
+}
+
+impl<'a> Iterator for PathParts<'a> {
+ type Item = PathPart<'a>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ self.0.next()
+ }
+}
+
+impl<'a> FusedIterator for PathParts<'a> {}
+
+impl<'a> DoubleEndedIterator for PathParts<'a> {
+ fn next_back(&mut self) -> Option<Self::Item> {
+ self.0.next_back()
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;