Skip to content

Commit

Permalink
Merge #371
Browse files Browse the repository at this point in the history
371: ufmt: add support for hex printing and width specifiers r=jrvanwhy a=hudson-ayers

This change adds ufmt support for hex printing of numbers and for fixed-width printing up to widths of 18 using 0s or spaces
as padding. 0 padding is only supported for numbers. Custom fill characters and specified alignment is not supported. This change does decrease the size savings of ufmt relative to core::fmt, but brings them much closer to feature parity.

This change has not been submitted to the upstream ufmt repo, as it includes variations on several changes already submitted as PRs to that repo which have not been accepted: japaric/ufmt#25 and japaric/ufmt#26 .

A number of tests were added to verify that ufmt formatting is identical to core::fmt formatting for the supported additions. These can be run using `cargo test --features=std` from the ufmt directory.

In a closed-source codebase, switching apps to use ufmt instead of core::fmt reduced code size by a total of 7200 bytes across 4 apps, with the only loss in functionality being two locations that used fixed alignment printing.

In the same codebase switching to unmodified ufmt saved 12,500 bytes, at the expense of
- not being able to print any numbers in hexadecimal
- not being able to print fixed width fields

Notably, this support is *somewhat* pay-for-what-you-use: The savings were ~10kB before I switched back to hex/fixed width formatting where it had been used before. Accordingly, for users of ufmt who do not need/want any of this functionality, the overhead is about a 20% increase in code size. I suspect I can get that number somewhat lower, but wanted to submit this as-is for feedback before I spend too much time optimizing.

Co-authored-by: Hudson Ayers <[email protected]>
  • Loading branch information
bors[bot] and hudson-ayers committed Feb 14, 2022
2 parents 2b56743 + 70e839c commit 4218c9a
Show file tree
Hide file tree
Showing 8 changed files with 690 additions and 94 deletions.
4 changes: 3 additions & 1 deletion ufmt/macros/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ proc-macro = true
proc-macro-hack = "0.5.11"
proc-macro2 = "1"
quote = "1"
regex = "1.5.4"
lazy_static = "1.4.0"

[dependencies.syn]
features = ["full"]
version = "1"
version = "1"
164 changes: 156 additions & 8 deletions ufmt/macros/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use core::mem;
use proc_macro::TokenStream;
use std::borrow::Cow;

use lazy_static::lazy_static;
use proc_macro2::{Literal, Span};
use proc_macro_hack::proc_macro_hack;
use quote::quote;
Expand Down Expand Up @@ -229,8 +230,35 @@ fn write(input: TokenStream, newline: bool) -> TokenStream {
pats.push(quote!(#pat));

match piece {
Piece::Display => {
exprs.push(quote!(ufmt::uDisplay::fmt(#pat, f)?;));
Piece::Display {
pretty,
hex,
width,
pad,
} => {
exprs.push(if let Some(w) = width {
if let Some(case) = hex {
if case == Hex::Lower {
quote!(f.fixed_width(#pretty, Some(true), Some(#w), #pad, |f| ufmt::uDisplay::fmt(#pat, f))?;)
} else {
quote!(f.fixed_width(#pretty, Some(false), Some(#w), #pad, |f| ufmt::uDisplay::fmt(#pat, f))?;)
}
} else {
quote!(f.fixed_width(#pretty, None, Some(#w), #pad, |f| ufmt::uDisplay::fmt(#pat, f))?;)
}
} else if hex == Some(Hex::Lower) && pretty {
quote!(f.hex(true, true, |f| ufmt::uDisplay::fmt(#pat, f))?;)
} else if hex == Some(Hex::Upper) && pretty {
quote!(f.hex(false, true, |f| ufmt::uDisplay::fmt(#pat, f))?;)
} else if hex == Some(Hex::Lower) {
quote!(f.hex(true, false, |f| ufmt::uDisplay::fmt(#pat, f))?;)
} else if hex == Some(Hex::Upper) {
quote!(f.hex(false, false, |f| ufmt::uDisplay::fmt(#pat, f))?;)
} else if pretty {
quote!(f.pretty(|f| ufmt::uDisplay::fmt(#pat, f))?;)
} else {
quote!(ufmt::uDisplay::fmt(#pat, f)?;)
});
}

Piece::Debug { pretty } => {
Expand Down Expand Up @@ -293,10 +321,23 @@ impl Parse for Input {
}
}

#[derive(Debug, PartialEq)]
pub(crate) enum Hex {
Upper,
Lower,
}

#[derive(Debug, PartialEq)]
enum Piece<'a> {
Debug { pretty: bool },
Display,
Debug {
pretty: bool,
},
Display {
pretty: bool,
hex: Option<Hex>,
width: Option<u8>,
pad: char,
},
Str(Cow<'a, str>),
}

Expand Down Expand Up @@ -379,10 +420,17 @@ fn parse<'l>(mut literal: &'l str, span: Span) -> parse::Result<Vec<Piece<'l>>>
const DISPLAY: &str = "}";
const ESCAPED_BRACE: &str = "{";

use regex::Regex;
lazy_static! {
static ref WIDTH_REGEX: Regex =
Regex::new(r"^:(#?)(0?)([0-9]*)([x|X]?)}").unwrap();
}
let head = head.unwrap_or("");
if tail.starts_with(DEBUG)
|| tail.starts_with(DEBUG_PRETTY)
|| tail.starts_with(DISPLAY)
|| WIDTH_REGEX.is_match(tail)
// for width specifiers
{
if buf.is_empty() {
if !head.is_empty() {
Expand All @@ -405,8 +453,53 @@ fn parse<'l>(mut literal: &'l str, span: Span) -> parse::Result<Vec<Piece<'l>>>
pieces.push(Piece::Debug { pretty: true });

literal = &tail[DEBUG_PRETTY.len()..];
} else if let Some(cap) = WIDTH_REGEX.captures(tail) {
let leading_pound = cap[1].eq("#");
let leading_zero = cap[2].eq("0");
let width = if cap[3].len() > 0 {
Some(cap[3].parse::<u8>().map_err(|_| {
parse::Error::new(
span,
"invalid width specifier: expected valid u8",
)
})?)
} else {
None
};
// 18 is the smallest buffer used by any numeric uDebug impl
// By increasing that buffer size, we could increase this maximum value,
// at the cost of cycles and a small amount of size
if let Some(w) = width {
if w > 18 {
return Err(parse::Error::new(
span,
"invalid width specifier: maximum allowed specifier is 18",
));
}
}
let hex = if cap[4].eq("x") {
Some(Hex::Lower)
} else if cap[4].eq("X") {
Some(Hex::Upper)
} else {
None
};
pieces.push(Piece::Display {
pretty: leading_pound,
hex,
width,
pad: if leading_zero { '0' } else { ' ' },
});

// first capture is entire match
literal = &tail[cap[0].len()..];
} else {
pieces.push(Piece::Display);
pieces.push(Piece::Display {
pretty: false,
hex: None,
width: None,
pad: ' ',
});

literal = &tail[DISPLAY.len()..];
}
Expand All @@ -418,7 +511,7 @@ fn parse<'l>(mut literal: &'l str, span: Span) -> parse::Result<Vec<Piece<'l>>>
} else {
return Err(parse::Error::new(
span,
"invalid format string: expected `{{`, `{}`, `{:?}` or `{:#?}`",
"invalid format string: expected `{{`, `{}`, `{:?}`, `{:#?}`, `{:x}`, `{:#x}`, or a width specifier}",
));
}
}
Expand All @@ -434,6 +527,7 @@ mod tests {

use proc_macro2::Span;

use crate::Hex;
use crate::Piece;

#[test]
Expand All @@ -445,7 +539,12 @@ mod tests {
super::parse("The answer is {}", span).ok(),
Some(vec![
Piece::Str(Cow::Borrowed("The answer is ")),
Piece::Display
Piece::Display {
pretty: false,
hex: None,
width: None,
pad: ' '
}
]),
);

Expand All @@ -459,6 +558,56 @@ mod tests {
Some(vec![Piece::Debug { pretty: true }]),
);

assert_eq!(
super::parse("{:x}", span).ok(),
Some(vec![Piece::Display {
pretty: false,
hex: Some(Hex::Lower),
width: None,
pad: ' ',
}]),
);

assert_eq!(
super::parse("{:X}", span).ok(),
Some(vec![Piece::Display {
pretty: false,
hex: Some(Hex::Upper),
width: None,
pad: ' ',
}]),
);

assert_eq!(
super::parse("{:10x}", span).ok(),
Some(vec![Piece::Display {
pretty: false,
hex: Some(Hex::Lower),
width: Some(10),
pad: ' ',
}]),
);

assert_eq!(
super::parse("{:08x}", span).ok(),
Some(vec![Piece::Display {
pretty: false,
hex: Some(Hex::Lower),
width: Some(8),
pad: '0',
}]),
);

assert_eq!(
super::parse("{:#08x}", span).ok(),
Some(vec![Piece::Display {
pretty: true,
hex: Some(Hex::Lower),
width: Some(8),
pad: '0',
}]),
);

// escaped braces
assert_eq!(
super::parse("{{}} is not an argument", span).ok(),
Expand All @@ -470,7 +619,6 @@ mod tests {
assert!(super::parse(" {", span).is_err());
assert!(super::parse("{ ", span).is_err());
assert!(super::parse("{ {", span).is_err());
assert!(super::parse("{:x}", span).is_err());
}

#[test]
Expand Down
62 changes: 31 additions & 31 deletions ufmt/src/impls/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,38 +59,38 @@ where
}
}

// FIXME this (`escape_debug`) contains a panicking branch
// impl uDebug for str {
// fn fmt<W>(&self, f: &mut Formatter<'_, W>) -> Result<(), W::Error>
// where
// W: uWrite + ?Sized,
// {
// f.write_str("\"")?;

// let mut from = 0;
// for (i, c) in self.char_indices() {
// let esc = c.escape_debug();

// // If char needs escaping, flush backlog so far and write, else skip
// if esc.len() != 1 {
// f.write_str(
// self.get(from..i)
// .unwrap_or_else(|| unsafe { assume_unreachable!() }),
// )?;
// for c in esc {
// f.write_char(c)?;
// }
// from = i + c.len_utf8();
// }
// }
// Below borrowed from https://github.com/japaric/ufmt/pull/25
impl uDebug for str {
fn fmt<W>(&self, f: &mut Formatter<'_, W>) -> Result<(), W::Error>
where
W: uWrite + ?Sized,
{
f.write_str("\"")?;
let mut from = 0;
for (i, c) in self.char_indices() {
let esc = c.escape_debug();

// If char needs escaping, flush backlog so far and write, else skip
if esc.len() != 1 {
// SAFETY: `char_indices()` guarantees that `i` is always the index of utf-8 boundary of `c`.
// In the first iteration `from` is zero and therefore also the index of the bounardy of `c`.
// In the following iterations `from` either keeps its value or is set to `i + c.len_utf8()`
// (with last rounds `i` and `c`), which means `from` is again `i` (this round), the index
// of this rounds `c`. Notice that this also implies `from <= i`.
f.write_str(unsafe { self.get_unchecked(from..i) })?;
for c in esc {
f.write_char(c)?;
}
from = i + c.len_utf8();
}
}

// f.write_str(
// self.get(from..)
// .unwrap_or_else(|| unsafe { assume_unreachable!() }),
// )?;
// f.write_str("\"")
// }
// }
// SAFETY: As seen above, `from` is the index of an utf-8 boundary in `self`.
// This also means that from is in bounds of `self`: `from <= self.len()`.
f.write_str(unsafe { self.get_unchecked(from..) })?;
f.write_str("\"")
}
}

impl uDisplay for str {
#[inline(always)]
Expand Down
Loading

0 comments on commit 4218c9a

Please sign in to comment.