From b7807aaf76212b7a648c24d0465002c8a5b9c9a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl?= Date: Sun, 4 Feb 2024 21:56:14 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=8C=20Vendor=20patch-rs=200.7.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.toml | 2 +- modda-cli/Cargo.toml | 2 +- patch-rs/.gitignore | 2 + patch-rs/CHANGELOG.md | 16 + patch-rs/Cargo.toml | 18 + patch-rs/LICENSE | 21 + patch-rs/README.md | 17 + patch-rs/examples/apply.rs | 65 +++ patch-rs/shell.nix | 8 + patch-rs/src/ast.rs | 316 +++++++++++++++ patch-rs/src/lib.rs | 52 +++ patch-rs/src/parser.rs | 589 ++++++++++++++++++++++++++++ patch-rs/tests/parse_patch.rs | 287 ++++++++++++++ patch-rs/tests/parse_samples.rs | 33 ++ patch-rs/tests/regressions.rs | 74 ++++ patch-rs/tests/samples/README.md | 5 + patch-rs/tests/samples/bzr.diff | 34 ++ patch-rs/tests/samples/crlf.diff | 10 + patch-rs/tests/samples/git.diff | 36 ++ patch-rs/tests/samples/hg.diff | 30 ++ patch-rs/tests/samples/sample0.diff | 63 +++ patch-rs/tests/samples/sample1.diff | 39 ++ patch-rs/tests/samples/sample2.diff | 53 +++ patch-rs/tests/samples/sample3.diff | 35 ++ patch-rs/tests/samples/sample4.diff | 33 ++ patch-rs/tests/samples/sample5.diff | 29 ++ patch-rs/tests/samples/sample6.diff | 38 ++ patch-rs/tests/samples/sample7.diff | 29 ++ patch-rs/tests/samples/svn.diff | 33 ++ 29 files changed, 1967 insertions(+), 2 deletions(-) create mode 100644 patch-rs/.gitignore create mode 100644 patch-rs/CHANGELOG.md create mode 100644 patch-rs/Cargo.toml create mode 100644 patch-rs/LICENSE create mode 100644 patch-rs/README.md create mode 100644 patch-rs/examples/apply.rs create mode 100644 patch-rs/shell.nix create mode 100644 patch-rs/src/ast.rs create mode 100644 patch-rs/src/lib.rs create mode 100644 patch-rs/src/parser.rs create mode 100644 patch-rs/tests/parse_patch.rs create mode 100644 patch-rs/tests/parse_samples.rs create mode 100644 patch-rs/tests/regressions.rs create mode 100644 patch-rs/tests/samples/README.md create mode 100644 patch-rs/tests/samples/bzr.diff create mode 100644 patch-rs/tests/samples/crlf.diff create mode 100644 patch-rs/tests/samples/git.diff create mode 100644 patch-rs/tests/samples/hg.diff create mode 100644 patch-rs/tests/samples/sample0.diff create mode 100644 patch-rs/tests/samples/sample1.diff create mode 100644 patch-rs/tests/samples/sample2.diff create mode 100644 patch-rs/tests/samples/sample3.diff create mode 100644 patch-rs/tests/samples/sample4.diff create mode 100644 patch-rs/tests/samples/sample5.diff create mode 100644 patch-rs/tests/samples/sample6.diff create mode 100644 patch-rs/tests/samples/sample7.diff create mode 100644 patch-rs/tests/samples/svn.diff diff --git a/Cargo.toml b/Cargo.toml index c289208..018a35e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,2 +1,2 @@ [workspace] -members = ["modda-cli"] +members = ["modda-cli", "patch-rs"] diff --git a/modda-cli/Cargo.toml b/modda-cli/Cargo.toml index 4153cc7..2573d6b 100644 --- a/modda-cli/Cargo.toml +++ b/modda-cli/Cargo.toml @@ -31,7 +31,7 @@ indoc ="2.0.4" itertools = "0.12.0" lazy_static = "1.4.0" log = "0.4.20" -patch = "0.7.0" +patch = { path = "../patch-rs" } path-clean = "1.0.1" path-absolutize = "3.1.1" percent-encoding = "2.3.1" diff --git a/patch-rs/.gitignore b/patch-rs/.gitignore new file mode 100644 index 0000000..a9d37c5 --- /dev/null +++ b/patch-rs/.gitignore @@ -0,0 +1,2 @@ +target +Cargo.lock diff --git a/patch-rs/CHANGELOG.md b/patch-rs/CHANGELOG.md new file mode 100644 index 0000000..537c263 --- /dev/null +++ b/patch-rs/CHANGELOG.md @@ -0,0 +1,16 @@ +# CHANGELOG + +## [Unreleased] +### Changed + +## [v0.7] +### Breaking +- Filename parsing now expects (and renders) a tab character after the filename instead of a space character, before any metadata. Seems like all diff programs actually follow this convention, and git will even render unquoted filenames with spaces, so the previous parsing would produce incorrect results. Thanks [@keith](https://github.com/keith) for reporting. + +### Fixed +- Don't break (hopefully) on diffs with windows-style CRLF line endings. I don't have samples to verify with, but it Seems Like Maybe It Will Do The Right Thing? (it will still ony render diffs with line endings as `\n`. Please open a feature request if you want this.) Thanks [@jacobtread](https://github.com/jacobtread) for reporting. +- Parse (and save) hunk hints after range info instead of (incorrectly) treating them like Context lines. Thanks [@keith](https://github.com/keith) and [@wfraser](https://github.com/wfraser). + +## [v0.6] +### Changed +- Upgrade nom to 0.7! from [@compiler-errors](https://github.com/compiler-errors) diff --git a/patch-rs/Cargo.toml b/patch-rs/Cargo.toml new file mode 100644 index 0000000..0d7f8f4 --- /dev/null +++ b/patch-rs/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "patch" +version = "0.7.0" +authors = ["phil "] +description = "Parse patches in the unified diff format" +repository = "https://github.com/uniphil/patch-rs" +readme = "README.md" +keywords = ["patch", "diff", "parse", "nom"] +license = "MIT" +edition = "2018" + +[dependencies] +nom = "7.1.0" +nom_locate = "4.0.0" +chrono = "0.4.19" + +[dev-dependencies] +pretty_assertions = "1.0.0" diff --git a/patch-rs/LICENSE b/patch-rs/LICENSE new file mode 100644 index 0000000..9a5991e --- /dev/null +++ b/patch-rs/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2016 uniphil + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/patch-rs/README.md b/patch-rs/README.md new file mode 100644 index 0000000..906aae8 --- /dev/null +++ b/patch-rs/README.md @@ -0,0 +1,17 @@ +# Patch + +[![Checks](https://github.com/uniphil/patch-rs/actions/workflows/checks.yml/badge.svg)](https://github.com/uniphil/patch-rs/actions/workflows/checks.yml) +[![Crates.io Badge](https://img.shields.io/crates/v/patch.svg)](https://crates.io/crates/patch) +[![docs.rs](https://docs.rs/patch/badge.svg)](https://docs.rs/patch) +[![Lines of Code](https://tokei.rs/b1/github/uniphil/patch-rs)](https://github.com/uniphil/patch-rs) + +Rust crate for parsing and producing patch files in the [Unified Format]. + +The parser attempts to be forgiving enough to be compatible with diffs produced +by programs like git. It accomplishes this by ignoring the additional code +context and information provided in the diff by those programs. + +See the **[Documentation]** for more information and for examples. + +[Unified Format]: https://www.gnu.org/software/diffutils/manual/html_node/Unified-Format.html +[Documentation]: https://docs.rs/patch diff --git a/patch-rs/examples/apply.rs b/patch-rs/examples/apply.rs new file mode 100644 index 0000000..3c744ab --- /dev/null +++ b/patch-rs/examples/apply.rs @@ -0,0 +1,65 @@ +//! Demonstrates how to apply a parsed diff to a file + +use patch::{Line, Patch}; + +fn apply(diff: Patch, old: &str) -> String { + let old_lines = old.lines().collect::>(); + let mut out: Vec<&str> = vec![]; + let mut old_line = 0; + for hunk in diff.hunks { + while old_line < hunk.old_range.start - 1 { + out.push(old_lines[old_line as usize]); + old_line += 1; + } + old_line += hunk.old_range.count; + for line in hunk.lines { + match line { + Line::Add(s) | Line::Context(s) => out.push(s), + Line::Remove(_) => {} + } + } + } + out.join("\n") +} + +static LAO: &str = "\ +The Way that can be told of is not the eternal Way; +The name that can be named is not the eternal name. +The Nameless is the origin of Heaven and Earth; +The Named is the mother of all things. +Therefore let there always be non-being, + so we may see their subtlety, +And let there always be being, + so we may see their outcome. +The two are the same, +But after they are produced, + they have different names. +"; + +static RAW_DIFF: &str = "\ +--- lao 2002-02-21 23:30:39.942229878 -0800 ++++ tzu 2002-02-21 23:30:50.442260588 -0800 +@@ -1,7 +1,6 @@ +-The Way that can be told of is not the eternal Way; +-The name that can be named is not the eternal name. + The Nameless is the origin of Heaven and Earth; +-The Named is the mother of all things. ++The named is the mother of all things. ++ + Therefore let there always be non-being, + so we may see their subtlety, + And let there always be being, +@@ -9,3 +8,6 @@ + The two are the same, + But after they are produced, + they have different names. ++They both may be called deep and profound. ++Deeper and more profound, ++The door of all subtleties! +"; + +fn main() { + let diff = Patch::from_single(RAW_DIFF).unwrap(); + let new = apply(diff, LAO); + println!("should be tzu:\n\n{}", new); +} diff --git a/patch-rs/shell.nix b/patch-rs/shell.nix new file mode 100644 index 0000000..0108d44 --- /dev/null +++ b/patch-rs/shell.nix @@ -0,0 +1,8 @@ +with import {}; + +stdenv.mkDerivation { + name = "rust"; + buildInputs = [ + cargo + ]; +} diff --git a/patch-rs/src/ast.rs b/patch-rs/src/ast.rs new file mode 100644 index 0000000..32af882 --- /dev/null +++ b/patch-rs/src/ast.rs @@ -0,0 +1,316 @@ +use std::borrow::Cow; +use std::fmt; + +use chrono::{DateTime, FixedOffset}; + +use crate::parser::{parse_multiple_patches, parse_single_patch, ParseError}; + +/// A complete patch summarizing the differences between two files +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct Patch<'a> { + /// The file information of the `-` side of the diff, line prefix: `---` + pub old: File<'a>, + /// The file information of the `+` side of the diff, line prefix: `+++` + pub new: File<'a>, + /// hunks of differences; each hunk shows one area where the files differ + pub hunks: Vec>, + /// true if the last line of the file ends in a newline character + /// + /// This will only be false if at the end of the patch we encounter the text: + /// `\ No newline at end of file` + pub end_newline: bool, +} + +impl<'a> fmt::Display for Patch<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + // Display implementations typically hold up the invariant that there is no trailing + // newline. This isn't enforced, but it allows them to work well with `println!` + + write!(f, "--- {}", self.old)?; + write!(f, "\n+++ {}", self.new)?; + for hunk in &self.hunks { + write!(f, "\n{}", hunk)?; + } + if !self.end_newline { + write!(f, "\n\\ No newline at end of file")?; + } + Ok(()) + } +} + +impl<'a> Patch<'a> { + #[allow(clippy::tabs_in_doc_comments)] + /// Attempt to parse a patch from the given string. + /// + /// # Example + /// + /// ``` + /// # fn main() -> Result<(), patch::ParseError<'static>> { + /// # use patch::Patch; + /// let sample = "\ + /// --- lao 2002-02-21 23:30:39.942229878 -0800 + /// +++ tzu 2002-02-21 23:30:50.442260588 -0800 + /// @@ -1,7 +1,6 @@ + /// -The Way that can be told of is not the eternal Way; + /// -The name that can be named is not the eternal name. + /// The Nameless is the origin of Heaven and Earth; + /// -The Named is the mother of all things. + /// +The named is the mother of all things. + /// + + /// Therefore let there always be non-being, + /// so we may see their subtlety, + /// And let there always be being, + /// @@ -9,3 +8,6 @@ + /// The two are the same, + /// But after they are produced, + /// they have different names. + /// +They both may be called deep and profound. + /// +Deeper and more profound, + /// +The door of all subtleties! + /// \\ No newline at end of file\n"; + /// + /// let patch = Patch::from_single(sample)?; + /// assert_eq!(&patch.old.path, "lao"); + /// assert_eq!(&patch.new.path, "tzu"); + /// assert_eq!(patch.end_newline, false); + /// # Ok(()) + /// # } + /// ``` + pub fn from_single(s: &'a str) -> Result> { + parse_single_patch(s) + } + + /// Attempt to parse as many patches as possible from the given string. This is useful for when + /// you have a complete diff of many files. String must contain at least one patch. + /// + /// # Example + /// + /// ``` + /// # fn main() -> Result<(), patch::ParseError<'static>> { + /// # use patch::Patch; + /// let sample = "\ + /// diff --git a/src/generator/place_items.rs b/src/generator/place_items.rs + /// index 508f4e9..31a167e 100644 + /// --- a/src/generator/place_items.rs + /// +++ b/src/generator/place_items.rs + /// @@ -233,7 +233,7 @@ impl<'a> GameGenerator<'a> { + /// // oooooooo + /// // + /// // x would pass all of the previous checks but get caught by this one + /// - if grid.adjacent_positions(inner_room_tile).find(|&pt| grid.is_room_entrance(pt)).is_some() { + /// + if grid.adjacent_positions(inner_room_tile).any(|&pt| grid.is_room_entrance(pt)) { + /// return None; + /// } + /// + /// diff --git a/src/ui/level_screen.rs b/src/ui/level_screen.rs + /// index 81fe540..166bb2b 100644 + /// --- a/src/ui/level_screen.rs + /// +++ b/src/ui/level_screen.rs + /// @@ -48,7 +48,7 @@ impl<'a, 'b> LevelScreen<'a, 'b> { + /// // Find the empty position adjacent to this staircase. There should only be one. + /// let map = self.world.read_resource::(); + /// let tile_pos = map.world_to_tile_pos(pos); + /// - let empty = map.grid().adjacent_positions(tile_pos).find(|&p| !map.grid().get(p).is_wall()) + /// + let empty = map.grid().adjacents(tile_pos).find(|t| !t.is_wall()) + /// .expect(\"bug: should be one empty position adjacent to a staircase\"); + /// empty.center(map.tile_size() as i32) + /// } + /// @@ -64,7 +64,7 @@ impl<'a, 'b> LevelScreen<'a, 'b> { + /// // Find the empty position adjacent to this staircase. There should only be one. + /// let map = self.world.read_resource::(); + /// let tile_pos = map.world_to_tile_pos(pos); + /// - let empty = map.grid().adjacent_positions(tile_pos).find(|&p| !map.grid().get(p).is_wall()) + /// + let empty = map.grid().adjacents(tile_pos).find(|t| !t.is_wall()) + /// .expect(\"bug: should be one empty position adjacent to a staircase\"); + /// empty.center(map.tile_size() as i32) + /// }\n"; + /// + /// let patches = Patch::from_multiple(sample)?; + /// assert_eq!(patches.len(), 2); + /// # Ok(()) + /// # } + /// ``` + pub fn from_multiple(s: &'a str) -> Result, ParseError<'a>> { + parse_multiple_patches(s) + } +} + +/// Check if a string needs to be quoted, and format it accordingly +fn maybe_escape_quote(f: &mut fmt::Formatter, s: &str) -> fmt::Result { + let quote = s + .chars() + .any(|ch| matches!(ch, ' ' | '\t' | '\r' | '\n' | '\"' | '\0' | '\\')); + + if quote { + write!(f, "\"")?; + for ch in s.chars() { + match ch { + '\0' => write!(f, r"\0")?, + '\n' => write!(f, r"\n")?, + '\r' => write!(f, r"\r")?, + '\t' => write!(f, r"\t")?, + '"' => write!(f, r#"\""#)?, + '\\' => write!(f, r"\\")?, + _ => write!(f, "{}", ch)?, + } + } + write!(f, "\"") + } else { + write!(f, "{}", s) + } +} + +/// The file path and any additional info of either the old file or the new file +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct File<'a> { + /// The parsed path or file name of the file + /// + /// Avoids allocation if at all possible. Only allocates if the file path is a quoted string + /// literal. String literals are necessary in some cases, for example if the file path has + /// spaces in it. These literals can contain escaped characters which are initially seen as + /// groups of two characters by the parser (e.g. '\\' + 'n'). A newly allocated string is + /// used to unescape those characters (e.g. "\\n" -> '\n'). + /// + /// **Note:** While this string is typically a file path, this library makes no attempt to + /// verify the format of that path. That means that **this field can potentially be any + /// string**. You should verify it before doing anything that may be security-critical. + pub path: Cow<'a, str>, + /// Any additional information provided with the file path + pub meta: Option>, +} + +impl<'a> fmt::Display for File<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + maybe_escape_quote(f, &self.path)?; + if let Some(meta) = &self.meta { + write!(f, "\t{}", meta)?; + } + Ok(()) + } +} + +/// Additional metadata provided with the file path +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum FileMetadata<'a> { + /// A complete datetime, e.g. `2002-02-21 23:30:39.942229878 -0800` + DateTime(DateTime), + /// Any other string provided after the file path, e.g. git hash, unrecognized timestamp, etc. + Other(Cow<'a, str>), +} + +impl<'a> fmt::Display for FileMetadata<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + FileMetadata::DateTime(datetime) => { + write!(f, "{}", datetime.format("%F %T%.f %z")) + } + FileMetadata::Other(data) => maybe_escape_quote(f, data), + } + } +} + +/// One area where the files differ +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct Hunk<'a> { + /// The range of lines in the old file that this hunk represents + pub old_range: Range, + /// The range of lines in the new file that this hunk represents + pub new_range: Range, + /// Any trailing text after the hunk's range information + pub range_hint: &'a str, + /// Each line of text in the hunk, prefixed with the type of change it represents + pub lines: Vec>, +} + +impl<'a> Hunk<'a> { + /// A nicer way to access the optional hint + pub fn hint(&self) -> Option<&str> { + let h = self.range_hint.trim_start(); + if h.is_empty() { + None + } else { + Some(h) + } + } +} + +impl<'a> fmt::Display for Hunk<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "@@ -{} +{} @@{}", + self.old_range, self.new_range, self.range_hint + )?; + + for line in &self.lines { + write!(f, "\n{}", line)?; + } + + Ok(()) + } +} + +/// A range of lines in a given file +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct Range { + /// The start line of the chunk in the old or new file + pub start: u64, + /// The chunk size (number of lines) in the old or new file + pub count: u64, +} + +impl fmt::Display for Range { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{},{}", self.start, self.count) + } +} + +/// A line of the old file, new file, or both +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum Line<'a> { + /// A line added to the old file in the new file + Add(&'a str), + /// A line removed from the old file in the new file + Remove(&'a str), + /// A line provided for context in the diff (unchanged); from both the old and the new file + Context(&'a str), +} + +impl<'a> fmt::Display for Line<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Line::Add(line) => write!(f, "+{}", line), + Line::Remove(line) => write!(f, "-{}", line), + Line::Context(line) => write!(f, " {}", line), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn test_hint_helper() { + let mut h = Hunk { + old_range: Range { start: 0, count: 0 }, + new_range: Range { start: 0, count: 0 }, + range_hint: "", + lines: vec![], + }; + for (input, expected) in vec![ + ("", None), + (" ", None), + (" ", None), + ("x", Some("x")), + (" x", Some("x")), + ("x ", Some("x ")), + (" x ", Some("x ")), + (" abc def ", Some("abc def ")), + ] { + h.range_hint = input; + assert_eq!(h.hint(), expected); + } + } +} diff --git a/patch-rs/src/lib.rs b/patch-rs/src/lib.rs new file mode 100644 index 0000000..75c03ba --- /dev/null +++ b/patch-rs/src/lib.rs @@ -0,0 +1,52 @@ +//! Parse and produce patch files (diffs) in the [Unified Format]. +//! +//! The format is not fully specified, but people like Guido van Rossum [have done the work][spec] +//! to figure out the details. +//! +//! The parser attempts to be forgiving enough to be compatible with diffs produced by programs +//! like git. It accomplishes this by ignoring the additional code context and information provided +//! in the diff by those programs. +//! +//! ## Example +//! +//! ``` +//! # fn main() -> Result<(), patch::ParseError<'static>> { +//! // Make sure you add the `patch` crate to the `[dependencies]` key of your Cargo.toml file. +//! use patch::Patch; +//! +//! let sample = "\ +//! --- before.py +//! +++ path/to/after.py +//! @@ -1,4 +1,4 @@ +//! -bacon +//! -eggs +//! -ham +//! +python +//! +eggy +//! +hamster +//! guido\n"; +//! +//! let patch = Patch::from_single(sample)?; +//! assert_eq!(&patch.old.path, "before.py"); +//! assert_eq!(&patch.new.path, "path/to/after.py"); +//! +//! // Print out the parsed patch file in its Rust representation +//! println!("{:#?}", patch); +//! +//! // Print out the parsed patch file in the Unified Format. For input that was originally in the +//! // Unified Format, this will produce output identical to that original input. +//! println!("{}", patch); // use format!("{}\n", patch) to get this as a String +//! # Ok(()) +//! # } +//! ``` +//! +//! [Unified Format]: https://www.gnu.org/software/diffutils/manual/html_node/Unified-Format.html +//! [spec]: http://www.artima.com/weblogs/viewpost.jsp?thread=164293 + +#![deny(unused_must_use)] + +mod ast; +mod parser; + +pub use ast::*; +pub use parser::ParseError; diff --git a/patch-rs/src/parser.rs b/patch-rs/src/parser.rs new file mode 100644 index 0000000..ebe1679 --- /dev/null +++ b/patch-rs/src/parser.rs @@ -0,0 +1,589 @@ +use std::borrow::Cow; +use std::error::Error; + +use chrono::DateTime; +use nom::*; +use nom::{ + branch::alt, + bytes::complete::{is_not, tag, take_until}, + character::complete::{char, digit1, line_ending, none_of, not_line_ending, one_of}, + combinator::{map, not, opt}, + multi::{many0, many1}, + sequence::{delimited, preceded, terminated, tuple}, +}; + +use crate::ast::*; + +type Input<'a> = nom_locate::LocatedSpan<&'a str>; + +/// Type returned when an error occurs while parsing a patch +#[derive(Debug, Clone)] +pub struct ParseError<'a> { + /// The line where the parsing error occurred + pub line: u32, + /// The offset within the input where the parsing error occurred + pub offset: usize, + /// The failed input + pub fragment: &'a str, + /// The actual parsing error + pub kind: nom::error::ErrorKind, +} + +#[doc(hidden)] +impl<'a> From>>> for ParseError<'a> { + fn from(err: nom::Err>>) -> Self { + match err { + nom::Err::Incomplete(_) => unreachable!("bug: parser should not return incomplete"), + // Unify both error types because at this point the error is not recoverable + nom::Err::Error(error) | nom::Err::Failure(error) => Self { + line: error.input.location_line(), + offset: error.input.location_offset(), + fragment: error.input.fragment(), + kind: error.code, + }, + } + } +} + +impl<'a> std::fmt::Display for ParseError<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "Line {}: Error while parsing: {}", + self.line, self.fragment + ) + } +} + +impl<'a> Error for ParseError<'a> { + fn description(&self) -> &str { + self.kind.description() + } +} + +fn consume_content_line(input: Input<'_>) -> IResult, &str> { + let (input, raw) = terminated(not_line_ending, line_ending)(input)?; + Ok((input, raw.fragment())) +} + +pub(crate) fn parse_single_patch(s: &str) -> Result> { + let (remaining_input, patch) = patch(Input::new(s))?; + // Parser should return an error instead of producing remaining input + assert!( + remaining_input.fragment().is_empty(), + "bug: failed to parse entire input. \ + Remaining: '{}'", + remaining_input.fragment() + ); + Ok(patch) +} + +pub(crate) fn parse_multiple_patches(s: &str) -> Result, ParseError<'_>> { + let (remaining_input, patches) = multiple_patches(Input::new(s))?; + // Parser should return an error instead of producing remaining input + assert!( + remaining_input.fragment().is_empty(), + "bug: failed to parse entire input. \ + Remaining: '{}'", + remaining_input.fragment() + ); + Ok(patches) +} + +fn multiple_patches(input: Input<'_>) -> IResult, Vec> { + many1(patch)(input) +} + +fn patch(input: Input<'_>) -> IResult, Patch> { + let (input, files) = headers(input)?; + let (input, hunks) = chunks(input)?; + let (input, no_newline_indicator) = no_newline_indicator(input)?; + // Ignore trailing empty lines produced by some diff programs + let (input, _) = many0(line_ending)(input)?; + + let (old, new) = files; + Ok(( + input, + Patch { + old, + new, + hunks, + end_newline: !no_newline_indicator, + }, + )) +} + +// Header lines +fn headers(input: Input<'_>) -> IResult, (File, File)> { + // Ignore any preamble lines in produced diffs + let (input, _) = take_until("---")(input)?; + let (input, _) = tag("--- ")(input)?; + let (input, oldfile) = header_line_content(input)?; + let (input, _) = line_ending(input)?; + let (input, _) = tag("+++ ")(input)?; + let (input, newfile) = header_line_content(input)?; + let (input, _) = line_ending(input)?; + Ok((input, (oldfile, newfile))) +} + +fn header_line_content(input: Input<'_>) -> IResult, File> { + let (input, filename) = filename(input)?; + let (input, after) = opt(preceded(char('\t'), file_metadata))(input)?; + + Ok(( + input, + File { + path: filename, + meta: after.and_then(|after| match after { + Cow::Borrowed("") => None, + Cow::Borrowed("\t") => None, + _ => Some( + DateTime::parse_from_str(after.as_ref(), "%F %T%.f %z") + .or_else(|_| DateTime::parse_from_str(after.as_ref(), "%F %T %z")) + .ok() + .map_or_else(|| FileMetadata::Other(after), FileMetadata::DateTime), + ), + }), + }, + )) +} + +// Hunks of the file differences +fn chunks(input: Input<'_>) -> IResult, Vec> { + many1(chunk)(input) +} + +fn chunk(input: Input<'_>) -> IResult, Hunk> { + let (input, ranges) = chunk_header(input)?; + let (input, lines) = many1(chunk_line)(input)?; + + let (old_range, new_range, range_hint) = ranges; + Ok(( + input, + Hunk { + old_range, + new_range, + range_hint, + lines, + }, + )) +} + +fn chunk_header(input: Input<'_>) -> IResult, (Range, Range, &'_ str)> { + let (input, _) = tag("@@ -")(input)?; + let (input, old_range) = range(input)?; + let (input, _) = tag(" +")(input)?; + let (input, new_range) = range(input)?; + let (input, _) = tag(" @@")(input)?; + + // Save hint provided after @@ (git sometimes adds this) + let (input, range_hint) = not_line_ending(input)?; + let (input, _) = line_ending(input)?; + Ok((input, (old_range, new_range, &range_hint))) +} + +fn range(input: Input<'_>) -> IResult, Range> { + let (input, start) = u64_digit(input)?; + let (input, count) = opt(preceded(char(','), u64_digit))(input)?; + let count = count.unwrap_or(1); + Ok((input, Range { start, count })) +} + +fn u64_digit(input: Input<'_>) -> IResult, u64> { + let (input, digits) = digit1(input)?; + let num = digits.fragment().parse::().unwrap(); + Ok((input, num)) +} + +// Looks for lines starting with + or - or space, but not +++ or ---. Not a foolproof check. +// +// For example, if someone deletes a line that was using the pre-decrement (--) operator or adds a +// line that was using the pre-increment (++) operator, this will fail. +// +// Example where this doesn't work: +// +// --- main.c +// +++ main.c +// @@ -1,4 +1,7 @@ +// +#include +// + +// int main() { +// double a; +// --- a; +// +++ a; +// +printf("%d\n", a); +// } +// +// We will fail to parse this entire diff. +// +// By checking for `+++ ` instead of just `+++`, we add at least a little more robustness because +// we know that people typically write `++a`, not `++ a`. That being said, this is still not enough +// to guarantee correctness in all cases. +// +//FIXME: Use the ranges in the chunk header to figure out how many chunk lines to parse. Will need +// to figure out how to count in nom more robustly than many1!(). Maybe using switch!()? +//FIXME: The test_parse_triple_plus_minus_hack test will no longer panic when this is fixed. +fn chunk_line(input: Input<'_>) -> IResult, Line> { + alt(( + map( + preceded(tuple((char('+'), not(tag("++ ")))), consume_content_line), + Line::Add, + ), + map( + preceded(tuple((char('-'), not(tag("-- ")))), consume_content_line), + Line::Remove, + ), + map(preceded(char(' '), consume_content_line), Line::Context), + ))(input) +} + +// Trailing newline indicator +fn no_newline_indicator(input: Input<'_>) -> IResult, bool> { + map( + opt(terminated( + tag("\\ No newline at end of file"), + opt(line_ending), + )), + |matched| matched.is_some(), + )(input) +} + +fn filename(input: Input<'_>) -> IResult, Cow> { + alt((quoted, bare))(input) +} + +fn file_metadata(input: Input<'_>) -> IResult, Cow> { + alt(( + quoted, + map(not_line_ending, |data: Input<'_>| { + Cow::Borrowed(*data.fragment()) + }), + ))(input) +} + +fn quoted(input: Input<'_>) -> IResult, Cow> { + delimited(char('\"'), unescaped_str, char('\"'))(input) +} + +fn bare(input: Input<'_>) -> IResult, Cow> { + map(is_not("\t\r\n"), |data: Input<'_>| { + Cow::Borrowed(*data.fragment()) + })(input) +} + +fn unescaped_str(input: Input<'_>) -> IResult, Cow> { + let (input, raw) = many1(alt((unescaped_char, escaped_char)))(input)?; + Ok((input, raw.into_iter().collect::>())) +} + +// Parses an unescaped character +fn unescaped_char(input: Input<'_>) -> IResult, char> { + none_of("\0\n\r\t\\\"")(input) +} + +// Parses an escaped character and returns its unescaped equivalent +fn escaped_char(input: Input<'_>) -> IResult, char> { + map(preceded(char('\\'), one_of(r#"0nrt"\"#)), |ch| match ch { + '0' => '\0', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + '"' => '"', + '\\' => '\\', + _ => unreachable!(), + })(input) +} + +#[cfg(test)] +mod tests { + use super::*; + + use pretty_assertions::assert_eq; + + type ParseResult<'a, T> = Result>>>; + + // Using a macro instead of a function so that error messages cite the most helpful line number + macro_rules! test_parser { + ($parser:ident($input:expr) -> @($expected_remaining_input:expr, $expected:expr $(,)*)) => { + let (remaining_input, result) = $parser(Input::new($input))?; + assert_eq!(*remaining_input.fragment(), $expected_remaining_input, + "unexpected remaining input after parse"); + assert_eq!(result, $expected); + }; + ($parser:ident($input:expr) -> $expected:expr) => { + test_parser!($parser($input) -> @("", $expected)); + }; + } + + #[test] + fn test_unescape() -> ParseResult<'static, ()> { + test_parser!(unescaped_str("file \\\"name\\\"") -> "file \"name\"".to_string()); + Ok(()) + } + + #[test] + fn test_quoted() -> ParseResult<'static, ()> { + test_parser!(quoted("\"file name\"") -> "file name".to_string()); + Ok(()) + } + + #[test] + fn test_bare() -> ParseResult<'static, ()> { + test_parser!(bare("file-name ") -> @("", "file-name ".to_string())); + test_parser!(bare("file-name\t") -> @("\t", "file-name".to_string())); + test_parser!(bare("file-name\n") -> @("\n", "file-name".to_string())); + Ok(()) + } + + #[test] + fn test_filename() -> ParseResult<'static, ()> { + // bare + test_parser!(filename("asdf\t") -> @("\t", "asdf".to_string())); + + // quoted + test_parser!(filename(r#""a/My Project/src/foo.rs" "#) -> @(" ", "a/My Project/src/foo.rs".to_string())); + test_parser!(filename(r#""\"asdf\" fdsh \\\t\r" "#) -> @(" ", "\"asdf\" fdsh \\\t\r".to_string())); + test_parser!(filename(r#""a s\"\nd\0f" "#) -> @(" ", "a s\"\nd\0f".to_string())); + Ok(()) + } + + #[test] + fn test_header_line_contents() -> ParseResult<'static, ()> { + test_parser!(header_line_content("lao\n") -> @("\n", File { + path: "lao".into(), + meta: None, + })); + + test_parser!(header_line_content("lao\t2002-02-21 23:30:39.942229878 -0800\n") -> @( + "\n", + File { + path: "lao".into(), + meta: Some(FileMetadata::DateTime( + DateTime::parse_from_rfc3339("2002-02-21T23:30:39.942229878-08:00").unwrap() + )), + }, + )); + + test_parser!(header_line_content("lao\t2002-02-21 23:30:39 -0800\n") -> @( + "\n", + File { + path: "lao".into(), + meta: Some(FileMetadata::DateTime( + DateTime::parse_from_rfc3339("2002-02-21T23:30:39-08:00").unwrap() + )), + }, + )); + + test_parser!(header_line_content("lao\t08f78e0addd5bf7b7aa8887e406493e75e8d2b55\n") -> @( + "\n", + File { + path: "lao".into(), + meta: Some(FileMetadata::Other("08f78e0addd5bf7b7aa8887e406493e75e8d2b55".into())) + }, + )); + Ok(()) + } + + #[test] + fn test_headers() -> ParseResult<'static, ()> { + let sample = "\ +--- lao 2002-02-21 23:30:39.942229878 -0800 ++++ tzu 2002-02-21 23:30:50.442260588 -0800\n"; + test_parser!(headers(sample) -> ( + File { + path: "lao".into(), + meta: Some(FileMetadata::DateTime( + DateTime::parse_from_rfc3339("2002-02-21T23:30:39.942229878-08:00").unwrap() + )), + }, + File { + path: "tzu".into(), + meta: Some(FileMetadata::DateTime( + DateTime::parse_from_rfc3339("2002-02-21T23:30:50.442260588-08:00").unwrap() + )), + }, + )); + + let sample2 = "\ +--- lao ++++ tzu\n"; + test_parser!(headers(sample2) -> ( + File {path: "lao".into(), meta: None}, + File {path: "tzu".into(), meta: None}, + )); + + let sample2b = "\ +--- lao ++++ tzu \n"; + test_parser!(headers(sample2b) -> ( + File {path: "lao".into(), meta: None}, + File {path: "tzu".into(), meta: None}, + )); + + let sample3 = "\ +--- lao 08f78e0addd5bf7b7aa8887e406493e75e8d2b55 ++++ tzu e044048282ce75186ecc7a214fd3d9ba478a2816\n"; + test_parser!(headers(sample3) -> ( + File { + path: "lao".into(), + meta: Some(FileMetadata::Other("08f78e0addd5bf7b7aa8887e406493e75e8d2b55".into())), + }, + File { + path: "tzu".into(), + meta: Some(FileMetadata::Other("e044048282ce75186ecc7a214fd3d9ba478a2816".into())), + }, + )); + Ok(()) + } + + #[test] + fn test_headers_crlf() -> ParseResult<'static, ()> { + let sample = "\ +--- lao 2002-02-21 23:30:39.942229878 -0800\r ++++ tzu 2002-02-21 23:30:50.442260588 -0800\r\n"; + test_parser!(headers(sample) -> ( + File { + path: "lao".into(), + meta: Some(FileMetadata::DateTime( + DateTime::parse_from_rfc3339("2002-02-21T23:30:39.942229878-08:00").unwrap() + )), + }, + File { + path: "tzu".into(), + meta: Some(FileMetadata::DateTime( + DateTime::parse_from_rfc3339("2002-02-21T23:30:50.442260588-08:00").unwrap() + )), + }, + )); + Ok(()) + } + + #[test] + fn test_range() -> ParseResult<'static, ()> { + test_parser!(range("1,7") -> Range { start: 1, count: 7 }); + + test_parser!(range("2") -> Range { start: 2, count: 1 }); + Ok(()) + } + + #[test] + fn test_chunk_header() -> ParseResult<'static, ()> { + test_parser!(chunk_header("@@ -1,7 +1,6 @@ foo bar\n") -> ( + Range { start: 1, count: 7 }, + Range { start: 1, count: 6 }, + " foo bar", + )); + Ok(()) + } + + #[test] + fn test_chunk() -> ParseResult<'static, ()> { + let sample = "\ +@@ -1,7 +1,6 @@ +-The Way that can be told of is not the eternal Way; +-The name that can be named is not the eternal name. + The Nameless is the origin of Heaven and Earth; +-The Named is the mother of all things. ++The named is the mother of all things. ++ + Therefore let there always be non-being, + so we may see their subtlety, + And let there always be being,\n"; + let expected = Hunk { + old_range: Range { start: 1, count: 7 }, + new_range: Range { start: 1, count: 6 }, + range_hint: "", + lines: vec![ + Line::Remove("The Way that can be told of is not the eternal Way;"), + Line::Remove("The name that can be named is not the eternal name."), + Line::Context("The Nameless is the origin of Heaven and Earth;"), + Line::Remove("The Named is the mother of all things."), + Line::Add("The named is the mother of all things."), + Line::Add(""), + Line::Context("Therefore let there always be non-being,"), + Line::Context(" so we may see their subtlety,"), + Line::Context("And let there always be being,"), + ], + }; + test_parser!(chunk(sample) -> expected); + Ok(()) + } + + #[test] + fn test_patch() -> ParseResult<'static, ()> { + // https://www.gnu.org/software/diffutils/manual/html_node/Example-Unified.html + let sample = "\ +--- lao 2002-02-21 23:30:39.942229878 -0800 ++++ tzu 2002-02-21 23:30:50.442260588 -0800 +@@ -1,7 +1,6 @@ +-The Way that can be told of is not the eternal Way; +-The name that can be named is not the eternal name. + The Nameless is the origin of Heaven and Earth; +-The Named is the mother of all things. ++The named is the mother of all things. ++ + Therefore let there always be non-being, + so we may see their subtlety, + And let there always be being, +@@ -9,3 +8,6 @@ + The two are the same, + But after they are produced, + they have different names. ++They both may be called deep and profound. ++Deeper and more profound, ++The door of all subtleties!\n"; + + let expected = Patch { + old: File { + path: "lao".into(), + meta: Some(FileMetadata::DateTime( + DateTime::parse_from_rfc3339("2002-02-21T23:30:39.942229878-08:00").unwrap(), + )), + }, + new: File { + path: "tzu".into(), + meta: Some(FileMetadata::DateTime( + DateTime::parse_from_rfc3339("2002-02-21T23:30:50.442260588-08:00").unwrap(), + )), + }, + hunks: vec![ + Hunk { + old_range: Range { start: 1, count: 7 }, + new_range: Range { start: 1, count: 6 }, + range_hint: "", + lines: vec![ + Line::Remove("The Way that can be told of is not the eternal Way;"), + Line::Remove("The name that can be named is not the eternal name."), + Line::Context("The Nameless is the origin of Heaven and Earth;"), + Line::Remove("The Named is the mother of all things."), + Line::Add("The named is the mother of all things."), + Line::Add(""), + Line::Context("Therefore let there always be non-being,"), + Line::Context(" so we may see their subtlety,"), + Line::Context("And let there always be being,"), + ], + }, + Hunk { + old_range: Range { start: 9, count: 3 }, + new_range: Range { start: 8, count: 6 }, + range_hint: "", + lines: vec![ + Line::Context("The two are the same,"), + Line::Context("But after they are produced,"), + Line::Context(" they have different names."), + Line::Add("They both may be called deep and profound."), + Line::Add("Deeper and more profound,"), + Line::Add("The door of all subtleties!"), + ], + }, + ], + end_newline: true, + }; + + test_parser!(patch(sample) -> expected); + + assert_eq!(format!("{}\n", expected), sample); + + Ok(()) + } +} diff --git a/patch-rs/tests/parse_patch.rs b/patch-rs/tests/parse_patch.rs new file mode 100644 index 0000000..80a29bb --- /dev/null +++ b/patch-rs/tests/parse_patch.rs @@ -0,0 +1,287 @@ +use chrono::DateTime; +use patch::{File, FileMetadata, ParseError, Patch}; + +use pretty_assertions::assert_eq; + +#[test] +fn test_parse() -> Result<(), ParseError<'static>> { + let sample = "\ +--- before.py ++++ after.py +@@ -1,4 +1,4 @@ +-bacon +-eggs +-ham ++python ++eggy ++hamster + guido\n"; + let patch = Patch::from_single(sample)?; + assert_eq!( + patch.old, + File { + path: "before.py".into(), + meta: None + } + ); + assert_eq!( + patch.new, + File { + path: "after.py".into(), + meta: None + } + ); + assert!(patch.end_newline); + + assert_eq!(format!("{}\n", patch), sample); + + Ok(()) +} + +#[test] +fn test_parse_no_newline_indicator() -> Result<(), ParseError<'static>> { + let sample = "\ +--- before.py ++++ after.py +@@ -1,4 +1,4 @@ +-bacon +-eggs +-ham ++python ++eggy ++hamster + guido +\\ No newline at end of file\n"; + let patch = Patch::from_single(sample)?; + assert_eq!( + patch.old, + File { + path: "before.py".into(), + meta: None + } + ); + assert_eq!( + patch.new, + File { + path: "after.py".into(), + meta: None + } + ); + assert!(!patch.end_newline); + + assert_eq!(format!("{}\n", patch), sample); + + Ok(()) +} + +#[test] +fn test_parse_timestamps() -> Result<(), ParseError<'static>> { + let sample = "\ +--- before.py\t2002-02-21 23:30:39.942229878 -0800 ++++ after.py\t2002-02-21 23:30:50 -0800 +@@ -1,4 +1,4 @@ +-bacon +-eggs +-ham ++python ++eggy ++hamster + guido +\\ No newline at end of file"; + let patch = Patch::from_single(sample)?; + assert_eq!( + patch.old, + File { + path: "before.py".into(), + meta: Some(FileMetadata::DateTime( + DateTime::parse_from_rfc3339("2002-02-21T23:30:39.942229878-08:00").unwrap() + )), + } + ); + assert_eq!( + patch.new, + File { + path: "after.py".into(), + meta: Some(FileMetadata::DateTime( + DateTime::parse_from_rfc3339("2002-02-21T23:30:50-08:00").unwrap() + )), + } + ); + assert!(!patch.end_newline); + + // to_string() uses Display but adds no trailing newline + assert_eq!(patch.to_string(), sample); + + Ok(()) +} + +#[test] +fn test_parse_other() -> Result<(), ParseError<'static>> { + let sample = "\ +--- before.py\t08f78e0addd5bf7b7aa8887e406493e75e8d2b55 ++++ after.py\te044048282ce75186ecc7a214fd3d9ba478a2816 +@@ -1,4 +1,4 @@ +-bacon +-eggs +-ham ++python ++eggy ++hamster + guido\n"; + let patch = Patch::from_single(sample)?; + assert_eq!( + patch.old, + File { + path: "before.py".into(), + meta: Some(FileMetadata::Other( + "08f78e0addd5bf7b7aa8887e406493e75e8d2b55".into() + )), + } + ); + assert_eq!( + patch.new, + File { + path: "after.py".into(), + meta: Some(FileMetadata::Other( + "e044048282ce75186ecc7a214fd3d9ba478a2816".into() + )), + } + ); + assert!(patch.end_newline); + + assert_eq!(format!("{}\n", patch), sample); + + Ok(()) +} + +#[test] +fn test_parse_escaped() -> Result<(), ParseError<'static>> { + let sample = "\ +--- before.py\t\"asdf \\\\ \\n \\t \\0 \\r \\\" \" ++++ \"My Work/after.py\"\t\"My project is cool! Wow!!; SELECT * FROM USERS;\" +@@ -1,4 +1,4 @@ +-bacon +-eggs +-ham ++python ++eggy ++hamster + guido\n"; + let patch = Patch::from_single(sample)?; + assert_eq!( + patch.old, + File { + path: "before.py".into(), + meta: Some(FileMetadata::Other("asdf \\ \n \t \0 \r \" ".into())), + } + ); + assert_eq!( + patch.new, + File { + path: "My Work/after.py".into(), + meta: Some(FileMetadata::Other( + "My project is cool! Wow!!; SELECT * FROM USERS;".into() + )), + } + ); + assert!(patch.end_newline); + + assert_eq!(format!("{}\n", patch), sample); + + Ok(()) +} + +#[test] +fn test_parse_triple_plus_minus() -> Result<(), ParseError<'static>> { + // Our parser has some hacky rules to make sure that lines starting with +++ or --- aren't + // interpreted as regular addition/removal lines that could be part of a hunk. This test checks + // to make sure that code is working. + let sample = r#"--- main.c ++++ main.c +@@ -1,4 +1,7 @@ ++#include ++ + int main() { + double a; +---a; ++++a; ++printf("%d\n", a); + } +"#; + let patches = Patch::from_multiple(sample).unwrap(); + assert_eq!(patches.len(), 1); + + let patch = &patches[0]; + assert_eq!( + patch.old, + File { + path: "main.c".into(), + meta: None + } + ); + assert_eq!( + patch.new, + File { + path: "main.c".into(), + meta: None + } + ); + assert!(patch.end_newline); + + assert_eq!(patch.hunks.len(), 1); + assert_eq!(patch.hunks[0].lines.len(), 8); + + assert_eq!(format!("{}\n", patch), sample); + + Ok(()) +} + +//FIXME: This test should NOT panic. When we have more sophisticated chunk line parsing that +// actually takes the hunk ranges into account, the #[should_panic] annotation should be removed. +// See the FIXME comment on top of the chunk_line parser. +#[test] +#[should_panic] +fn test_parse_triple_plus_minus_hack() { + // Our parser has some hacky rules to make sure that lines starting with +++ or --- aren't + // interpreted as regular addition/removal lines that could be part of a hunk. This test + // demonstrates that these rules are not foolproof. The only differences between this test and + // test_parse_triple_plus_minus are `--- a` and `+++ a` vs `---a` and `+++a`. In either case, + // we should be able to determine that those lines do not start a new patch based on the ranges + // provided for the hunk. + let sample = r#"--- main.c ++++ main.c +@@ -1,4 +1,7 @@ ++#include ++ + int main() { + double a; +--- a; ++++ a; ++printf("%d\n", a); + } +"#; + let patches = Patch::from_multiple(sample).unwrap(); + assert_eq!(patches.len(), 1); + + let patch = &patches[0]; + assert_eq!( + patch.old, + File { + path: "main.c".into(), + meta: None + } + ); + assert_eq!( + patch.new, + File { + path: "main.c".into(), + meta: None + } + ); + assert!(patch.end_newline); + + assert_eq!(patch.hunks.len(), 1); + assert_eq!(patch.hunks[0].lines.len(), 8); + + assert_eq!(format!("{}\n", patch), sample); +} diff --git a/patch-rs/tests/parse_samples.rs b/patch-rs/tests/parse_samples.rs new file mode 100644 index 0000000..e35acd5 --- /dev/null +++ b/patch-rs/tests/parse_samples.rs @@ -0,0 +1,33 @@ +use std::fs; +use std::path::PathBuf; + +use pretty_assertions::assert_eq; + +use patch::Patch; + +#[test] +fn parse_samples() { + let samples_path = PathBuf::from(file!()).parent().unwrap().join("samples"); + for file in fs::read_dir(samples_path.strip_prefix("patch-rs").unwrap()).unwrap() { + let path = file.unwrap().path(); + if path.extension().unwrap_or_default() != "diff" { + continue; + } + + let data = fs::read_to_string(dbg!(&path)).unwrap(); + let patches = Patch::from_multiple(&data) + .unwrap_or_else(|err| panic!("failed to parse {:?}, error: {}", path, err)); + + // Make sure that the patch file we produce parses to the same information as the original + // patch file. + let patch_file: String = patches.iter().map(|patch| format!("{}\n", patch)).collect(); + println!("{}", patch_file); + let patches2 = Patch::from_multiple(&patch_file).unwrap_or_else(|err| { + panic!( + "failed to re-parse {:?} after formatting, error: {}", + path, err + ) + }); + assert_eq!(patches, patches2); + } +} diff --git a/patch-rs/tests/regressions.rs b/patch-rs/tests/regressions.rs new file mode 100644 index 0000000..8a2338f --- /dev/null +++ b/patch-rs/tests/regressions.rs @@ -0,0 +1,74 @@ +use patch::{File, FileMetadata, Hunk, Line, ParseError, Patch, Range}; + +use pretty_assertions::assert_eq; + +#[test] +fn hunk_header_context_is_not_a_line_15() -> Result<(), ParseError<'static>> { + let sample = "\ +--- old.txt ++++ new.txt +@@ -0,0 +0,0 @@ spoopadoop + x +"; + let patch = Patch::from_single(sample)?; + assert_eq!(patch.hunks[0].lines, [Line::Context("x")]); + Ok(()) +} + +#[test] +fn crlf_breaks_stuff_17() -> Result<(), ParseError<'static>> { + let sample = "\ +--- old.txt\r ++++ new.txt\r +@@ -0,0 +0,0 @@\r + x\r +"; + let patch = Patch::from_single(sample)?; + assert_eq!( + patch, + Patch { + old: File { + path: "old.txt".into(), + meta: None + }, + new: File { + path: "new.txt".into(), + meta: None + }, + hunks: vec![Hunk { + old_range: Range { start: 0, count: 0 }, + new_range: Range { start: 0, count: 0 }, + range_hint: "", + lines: vec![Line::Context("x")], + }], + end_newline: true, + } + ); + Ok(()) +} + +#[test] +fn unquoted_filenames_with_spaces_11() -> Result<(), ParseError<'static>> { + let sample = "\ +--- unquoted no space\t ++++ unquoted no space\twith metadata +@@ -0,0 +0,0 @@ + x +"; + let patch = Patch::from_single(sample)?; + assert_eq!( + patch.old, + File { + path: "unquoted no space".into(), + meta: None, + } + ); + assert_eq!( + patch.new, + File { + path: "unquoted no space".into(), + meta: Some(FileMetadata::Other("with metadata".into())), + } + ); + Ok(()) +} diff --git a/patch-rs/tests/samples/README.md b/patch-rs/tests/samples/README.md new file mode 100644 index 0000000..c9e7ed7 --- /dev/null +++ b/patch-rs/tests/samples/README.md @@ -0,0 +1,5 @@ +# Sample Diffs + +Many of these diffs are copied from the following projects: + +* [python-unidiff](https://github.com/matiasb/python-unidiff) diff --git a/patch-rs/tests/samples/bzr.diff b/patch-rs/tests/samples/bzr.diff new file mode 100644 index 0000000..607bcf8 --- /dev/null +++ b/patch-rs/tests/samples/bzr.diff @@ -0,0 +1,34 @@ +=== added file 'added_file' +--- added_file 1970-01-01 00:00:00 +0000 ++++ added_file 2013-10-13 23:44:04 +0000 +@@ -0,0 +1,4 @@ ++This was missing! ++Adding it now. ++ ++Only for testing purposes. +\ No newline at end of file + +=== modified file 'modified_file' +--- modified_file 2013-10-13 23:53:13 +0000 ++++ modified_file 2013-10-13 23:53:26 +0000 +@@ -1,5 +1,7 @@ + This is the original content. + +-This should be updated. ++This is now updated. ++ ++This is a new line. + + This will stay. +\ No newline at end of file + +=== removed file 'removed_file' +--- removed_file 2013-10-13 23:53:13 +0000 ++++ removed_file 1970-01-01 00:00:00 +0000 +@@ -1,3 +0,0 @@ +-This content shouldn't be here. +- +-This file will be removed. +\ No newline at end of file + + diff --git a/patch-rs/tests/samples/crlf.diff b/patch-rs/tests/samples/crlf.diff new file mode 100644 index 0000000..4d4ae51 --- /dev/null +++ b/patch-rs/tests/samples/crlf.diff @@ -0,0 +1,10 @@ +--- before.py ++++ after.py +@@ -1,4 +1,4 @@ +-bacon +-eggs +-ham ++python ++eggy ++hamster + guido diff --git a/patch-rs/tests/samples/git.diff b/patch-rs/tests/samples/git.diff new file mode 100644 index 0000000..3cfa303 --- /dev/null +++ b/patch-rs/tests/samples/git.diff @@ -0,0 +1,36 @@ +diff --git a/added_file b/added_file +new file mode 100644 +index 0000000..9b710f3 +--- /dev/null ++++ b/added_file +@@ -0,0 +1,4 @@ ++This was missing! ++Adding it now. ++ ++Only for testing purposes. +\ No newline at end of file +diff --git a/modified_file b/modified_file +index c7921f5..8946660 100644 +--- a/modified_file ++++ b/modified_file +@@ -1,5 +1,7 @@ + This is the original content. + +-This should be updated. ++This is now updated. ++ ++This is a new line. + + This will stay. +\ No newline at end of file +diff --git a/removed_file b/removed_file +deleted file mode 100644 +index 1f38447..0000000 +--- a/removed_file ++++ /dev/null +@@ -1,3 +0,0 @@ +-This content shouldn't be here. +- +-This file will be removed. +\ No newline at end of file + diff --git a/patch-rs/tests/samples/hg.diff b/patch-rs/tests/samples/hg.diff new file mode 100644 index 0000000..2b05e20 --- /dev/null +++ b/patch-rs/tests/samples/hg.diff @@ -0,0 +1,30 @@ +diff -r 44299fd3d1a8 added_file +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/added_file Sun Oct 13 20:51:40 2013 -0300 +@@ -0,0 +1,4 @@ ++This was missing! ++Adding it now. ++ ++Only for testing purposes. +\ No newline at end of file +diff -r 44299fd3d1a8 modified_file +--- a/modified_file Sun Oct 13 20:51:07 2013 -0300 ++++ b/modified_file Sun Oct 13 20:51:40 2013 -0300 +@@ -1,5 +1,7 @@ + This is the original content. + +-This should be updated. ++This is now updated. ++ ++This is a new line. + + This will stay. +\ No newline at end of file +diff -r 44299fd3d1a8 removed_file +--- a/removed_file Sun Oct 13 20:51:07 2013 -0300 ++++ /dev/null Thu Jan 01 00:00:00 1970 +0000 +@@ -1,3 +0,0 @@ +-This content shouldn't be here. +- +-This file will be removed. +\ No newline at end of file diff --git a/patch-rs/tests/samples/sample0.diff b/patch-rs/tests/samples/sample0.diff new file mode 100644 index 0000000..a038f37 --- /dev/null +++ b/patch-rs/tests/samples/sample0.diff @@ -0,0 +1,63 @@ +--- /path/to/original ''timestamp'' ++++ /path/to/new ''timestamp'' +@@ -1,3 +1,9 @@ Section Header ++This is an important ++notice! It should ++therefore be located at ++the beginning of this ++document! ++ + This part of the + document has stayed the + same from version to +@@ -5,16 +11,10 @@ + be shown if it doesn't + change. Otherwise, that + would not be helping to +-compress the size of the +-changes. +- +-This paragraph contains +-text that is outdated. +-It will be deleted in the +-near future. ++compress anything. + + It is important to spell +-check this dokument. On ++check this document. On + the other hand, a + misspelled word isn't + the end of the world. +@@ -22,3 +22,7 @@ + this paragraph needs to + be changed. Things can + be added after it. ++ ++This paragraph contains ++important new additions ++to this document. +--- /dev/null ++++ /path/to/another_new +@@ -0,0 +1,9 @@ ++This is an important ++notice! It should ++therefore be located at ++the beginning of this ++document! ++ ++This part of the ++document has stayed the ++same from version to +--- /path/to/existing ++++ /dev/null +@@ -1,9 +0,0 @@ +-This is an important +-notice! It should +-therefore be located at +-the beginning of this +-document! +- +-This part of the +-document has stayed the +-same from version to diff --git a/patch-rs/tests/samples/sample1.diff b/patch-rs/tests/samples/sample1.diff new file mode 100644 index 0000000..e00903a --- /dev/null +++ b/patch-rs/tests/samples/sample1.diff @@ -0,0 +1,39 @@ +--- /path/to/original ''timestamp'' ++++ /path/to/new ''timestamp'' +@@ -1,3 +1,9 @@ ++This is an important ++notice! It should ++therefore be located at ++the beginning of this ++document! ++ + This part of the + document has stayed the + same from version to +@@ -5,16 +11,13 @@ + be shown if it doesn't + change. Otherwise, that + would not be helping to +-compress the size of the +-changes. +- +-This paragraph contains +-text that is outdated. +-It will be deleted in the +-near future. ++compress anything. + + It is important to spell +-check this dokument. On ++check this document. On + the other hand, a + misspelled word isn't + the end of the world. +@@ -22,3 +22,7 @@ + this paragraph needs to + be changed. Things can + be added after it. ++ ++This paragraph contains ++important new additions ++to this document. diff --git a/patch-rs/tests/samples/sample2.diff b/patch-rs/tests/samples/sample2.diff new file mode 100644 index 0000000..857997d --- /dev/null +++ b/patch-rs/tests/samples/sample2.diff @@ -0,0 +1,53 @@ +# HG changeset patch +# Parent 13ba6cbdb304cd251fbc22466cadb21019ee817f +# User Bill McCloskey + +diff --git a/content/base/src/nsContentUtils.cpp b/content/base/src/nsContentUtils.cpp +--- a/content/base/src/nsContentUtils.cpp ++++ b/content/base/src/nsContentUtils.cpp +@@ -6369,17 +6369,17 @@ public: + nsCycleCollectionParticipant* helper) + { + } + + NS_IMETHOD_(void) NoteNextEdgeName(const char* name) + { + } + +- NS_IMETHOD_(void) NoteWeakMapping(void* map, void* key, void* val) ++ NS_IMETHOD_(void) NoteWeakMapping(void* map, void* key, void* kdelegate, void* val) + { + } + + bool mFound; + + private: + void* mWrapper; + }; +diff --git a/js/src/jsfriendapi.cpp b/js/src/jsfriendapi.cpp +--- a/js/src/jsfriendapi.cpp ++++ b/js/src/jsfriendapi.cpp +@@ -527,16 +527,24 @@ js::VisitGrayWrapperTargets(JSCompartmen + { + for (WrapperMap::Enum e(comp->crossCompartmentWrappers); !e.empty(); e.popFront()) { + gc::Cell *thing = e.front().key.wrapped; + if (thing->isMarked(gc::GRAY)) + callback(closure, thing); + } + } + ++JS_FRIEND_API(JSObject *) ++js::GetWeakmapKeyDelegate(JSObject *key) ++{ ++ if (JSWeakmapKeyDelegateOp op = key->getClass()->ext.weakmapKeyDelegateOp) ++ return op(key); ++ return NULL; ++} ++ + JS_FRIEND_API(void) + JS_SetAccumulateTelemetryCallback(JSRuntime *rt, JSAccumulateTelemetryDataCallback callback) + { + rt->telemetryCallback = callback; + } + + JS_FRIEND_API(JSObject *) diff --git a/patch-rs/tests/samples/sample3.diff b/patch-rs/tests/samples/sample3.diff new file mode 100644 index 0000000..6e7c72c --- /dev/null +++ b/patch-rs/tests/samples/sample3.diff @@ -0,0 +1,35 @@ +=== added file 'added_file' +--- added_file 1970-01-01 00:00:00 +0000 ++++ added_file 2013-10-13 23:44:04 +0000 +@@ -0,0 +1,4 @@ ++This was missing! ++holá mundo! ++ ++Only for testing purposes. +\ No newline at end of file + +=== modified file 'modified_file' +--- modified_file 2013-10-13 23:53:13 +0000 ++++ modified_file 2013-10-13 23:53:26 +0000 +@@ -1,5 +1,7 @@ + This is the original content. + +-This should be updated. ++This is now updated. ++ ++This is a new line. + +-This will stay. +\ No newline at end of file ++This will stay. + +=== removed file 'removed_file' +--- removed_file 2013-10-13 23:53:13 +0000 ++++ removed_file 1970-01-01 00:00:00 +0000 +@@ -1,3 +0,0 @@ +-This content shouldn't be here. +- +-This file will be removed. +\ No newline at end of file + + diff --git a/patch-rs/tests/samples/sample4.diff b/patch-rs/tests/samples/sample4.diff new file mode 100644 index 0000000..0fc0ad2 --- /dev/null +++ b/patch-rs/tests/samples/sample4.diff @@ -0,0 +1,33 @@ +=== added file 'added_file' +--- added_file 1970-01-01 00:00:00 +0000 ++++ added_file 2013-10-13 23:44:04 +0000 +@@ -0,0 +1,4 @@ ++This was missing! ++holá mundo! ++ ++Only for testing purposes. +\ No newline at end of file + +=== modified file 'modified_file' +--- modified_file 2013-10-13 23:53:13 +0000 ++++ modified_file 2013-10-13 23:53:26 +0000 +@@ -1,5 +1,7 @@ + This is the original content. + +-This should be updated. ++This is now updated. ++ ++This is a new line. + + This will stay. +\ No newline at end of file + +=== removed file 'removed_file' +--- removed_file 2013-10-13 23:53:13 +0000 ++++ removed_file 1970-01-01 00:00:00 +0000 +@@ -1,3 +0,0 @@ +-This content shouldn't be here. +- +-This file will be removed. +\ No newline at end of file + diff --git a/patch-rs/tests/samples/sample5.diff b/patch-rs/tests/samples/sample5.diff new file mode 100644 index 0000000..ec0eeb8 --- /dev/null +++ b/patch-rs/tests/samples/sample5.diff @@ -0,0 +1,29 @@ +=== modified file 'modified_file1' +--- modified_file1 2013-10-13 23:53:13 +0000 ++++ modified_file1 2013-10-13 23:53:26 +0000 +@@ -1,5 +1,7 @@ + This is the original content. + +-This should be updated. ++This is now updated. ++ ++This is a new line. + + This will stay. +\ No newline at end of file + +=== modified file 'modified_file2' +--- modified_file2 2013-10-13 23:53:13 +0000 ++++ modified_file2 2013-10-13 23:53:26 +0000 +@@ -1,5 +1,7 @@ + This is the original content. + +-This should be updated. ++This is now updated. ++ ++This is a new line. + + This will stay. +\ No newline at end of file + + diff --git a/patch-rs/tests/samples/sample6.diff b/patch-rs/tests/samples/sample6.diff new file mode 100644 index 0000000..bb244f4 --- /dev/null +++ b/patch-rs/tests/samples/sample6.diff @@ -0,0 +1,38 @@ +--- /path/to/original ''timestamp'' ++++ /path/to/new ''timestamp'' +@@ -1,3 +1,9 @@ ++This is an important ++notice! It should ++therefore be located at ++the beginning of this ++document! ++ + This part of the + document has stayed the + same from version to +@@ -5,16 +11,13 @@ + be shown if it doesn't + change. Otherwise, that + would not be helping to +-compress the size of the +-changes. +- +-This paragraph contains +-text that is outdated. +-It will be deleted in the +-near future. ++compress anything. + + It is important to spell +-check this dokument. On ++check this document. On + the other hand, a + misspelled word isn't + the end of the world. + this paragraph needs to + be changed. Things can + be added after it. ++ ++This paragraph contains ++important new additions ++to this document. diff --git a/patch-rs/tests/samples/sample7.diff b/patch-rs/tests/samples/sample7.diff new file mode 100644 index 0000000..94f8340 --- /dev/null +++ b/patch-rs/tests/samples/sample7.diff @@ -0,0 +1,29 @@ +--- /path/to/original ''timestamp'' ++++ /path/to/new ''timestamp'' +@@ -1,3 +1,9 @@ ++This is an important ++notice! It should ++therefore be located at ++the beginning of this ++document! ++ + This part of the + document has stayed the + same from version to +@@ -5,16 +11,13 @@ + be shown if it doesn't + change. Otherwise, that + would not be helping to +-compress the size of the +-changes. +- +-This paragraph contains +-text that is outdated. ++compress anything. + + It is important to spell +-check this dokument. On ++check this document. On + the other hand, a + misspelled word isn't + the end of the world. diff --git a/patch-rs/tests/samples/svn.diff b/patch-rs/tests/samples/svn.diff new file mode 100644 index 0000000..8803d7b --- /dev/null +++ b/patch-rs/tests/samples/svn.diff @@ -0,0 +1,33 @@ +Index: modified_file +=================================================================== +--- modified_file (revision 191) ++++ modified_file (working copy) +@@ -1,5 +1,7 @@ + This is the original content. + +-This should be updated. ++This is now updated. + ++This is a new line. ++ + This will stay. +\ No newline at end of file +Index: removed_file +=================================================================== +--- removed_file (revision 188) ++++ removed_file (working copy) +@@ -1,3 +0,0 @@ +-This content shouldn't be here. +- +-This file will be removed. +\ No newline at end of file +Index: added_file +=================================================================== +--- added_file (revision 0) ++++ added_file (revision 0) +@@ -0,0 +1,4 @@ ++This was missing! ++Adding it now. ++ ++Only for testing purposes. +\ No newline at end of file