Merge #371

371: ufmt: add support for hex printing and width specifiers r=jrvanwhy a=hudson-ayers This change adds ufmt support for hex printing of numbers and for fixed-width printing up to widths of 18 using 0s or spaces as padding. 0 padding is only supported for numbers. Custom fill characters and specified alignment is not supported. This change does decrease the size savings of ufmt relative to core::fmt, but brings them much closer to feature parity. This change has not been submitted to the upstream ufmt repo, as it includes variations on several changes already submitted as PRs to that repo which have not been accepted: japaric/ufmt#25 and japaric/ufmt#26 . A number of tests were added to verify that ufmt formatting is identical to core::fmt formatting for the supported additions. These can be run using `cargo test --features=std` from the ufmt directory. In a closed-source codebase, switching apps to use ufmt instead of core::fmt reduced code size by a total of 7200 bytes across 4 apps, with the only loss in functionality being two locations that used fixed alignment printing. In the same codebase switching to unmodified ufmt saved 12,500 bytes, at the expense of - not being able to print any numbers in hexadecimal - not being able to print fixed width fields Notably, this support is *somewhat* pay-for-what-you-use: The savings were ~10kB before I switched back to hex/fixed width formatting where it had been used before. Accordingly, for users of ufmt who do not need/want any of this functionality, the overhead is about a 20% increase in code size. I suspect I can get that number somewhat lower, but wanted to submit this as-is for feedback before I spend too much time optimizing. Co-authored-by: Hudson Ayers <[email protected]>
tock · Feb 14, 2022 · 4218c9a · 4218c9a
2 parents 2b56743 + 70e839c
commit 4218c9a
Show file tree

Hide file tree

Showing 8 changed files with 690 additions and 94 deletions.
diff --git a/ufmt/macros/Cargo.toml b/ufmt/macros/Cargo.toml
@@ -16,7 +16,9 @@ proc-macro = true
 proc-macro-hack = "0.5.11"
 proc-macro2 = "1"
 quote = "1"
+regex = "1.5.4"
+lazy_static = "1.4.0"
 
 [dependencies.syn]
 features = ["full"]
-version = "1"
+version = "1"
diff --git a/ufmt/macros/src/lib.rs b/ufmt/macros/src/lib.rs
@@ -11,6 +11,7 @@ use core::mem;
 use proc_macro::TokenStream;
 use std::borrow::Cow;
 
+use lazy_static::lazy_static;
 use proc_macro2::{Literal, Span};
 use proc_macro_hack::proc_macro_hack;
 use quote::quote;
@@ -229,8 +230,35 @@ fn write(input: TokenStream, newline: bool) -> TokenStream {
             pats.push(quote!(#pat));
 
             match piece {
-                Piece::Display => {
-                    exprs.push(quote!(ufmt::uDisplay::fmt(#pat, f)?;));
+                Piece::Display {
+                    pretty,
+                    hex,
+                    width,
+                    pad,
+                } => {
+                    exprs.push(if let Some(w) = width {
+                        if let Some(case) = hex {
+                            if case == Hex::Lower {
+                                quote!(f.fixed_width(#pretty, Some(true), Some(#w), #pad, |f| ufmt::uDisplay::fmt(#pat, f))?;)
+                            } else {
+                                quote!(f.fixed_width(#pretty, Some(false), Some(#w), #pad, |f| ufmt::uDisplay::fmt(#pat, f))?;)
+                            }
+                        } else {
+                            quote!(f.fixed_width(#pretty, None, Some(#w), #pad, |f| ufmt::uDisplay::fmt(#pat, f))?;)
+                        }
+                    } else if hex == Some(Hex::Lower) && pretty {
+                        quote!(f.hex(true, true, |f| ufmt::uDisplay::fmt(#pat, f))?;)
+                    } else if hex == Some(Hex::Upper) && pretty {
+                        quote!(f.hex(false, true, |f| ufmt::uDisplay::fmt(#pat, f))?;)
+                    } else if hex == Some(Hex::Lower) {
+                        quote!(f.hex(true, false, |f| ufmt::uDisplay::fmt(#pat, f))?;)
+                    } else if hex == Some(Hex::Upper) {
+                        quote!(f.hex(false, false, |f| ufmt::uDisplay::fmt(#pat, f))?;)
+                    } else if pretty {
+                        quote!(f.pretty(|f| ufmt::uDisplay::fmt(#pat, f))?;)
+                    } else {
+                        quote!(ufmt::uDisplay::fmt(#pat, f)?;)
+                    });
                 }
 
                 Piece::Debug { pretty } => {
@@ -293,10 +321,23 @@ impl Parse for Input {
     }
 }
 
+#[derive(Debug, PartialEq)]
+pub(crate) enum Hex {
+    Upper,
+    Lower,
+}
+
 #[derive(Debug, PartialEq)]
 enum Piece<'a> {
-    Debug { pretty: bool },
-    Display,
+    Debug {
+        pretty: bool,
+    },
+    Display {
+        pretty: bool,
+        hex: Option<Hex>,
+        width: Option<u8>,
+        pad: char,
+    },
     Str(Cow<'a, str>),
 }
 
@@ -379,10 +420,17 @@ fn parse<'l>(mut literal: &'l str, span: Span) -> parse::Result<Vec<Piece<'l>>>
                 const DISPLAY: &str = "}";
                 const ESCAPED_BRACE: &str = "{";
 
+                use regex::Regex;
+                lazy_static! {
+                    static ref WIDTH_REGEX: Regex =
+                        Regex::new(r"^:(#?)(0?)([0-9]*)([x|X]?)}").unwrap();
+                }
                 let head = head.unwrap_or("");
                 if tail.starts_with(DEBUG)
                     || tail.starts_with(DEBUG_PRETTY)
                     || tail.starts_with(DISPLAY)
+                    || WIDTH_REGEX.is_match(tail)
+                // for width specifiers
                 {
                     if buf.is_empty() {
                         if !head.is_empty() {
@@ -405,8 +453,53 @@ fn parse<'l>(mut literal: &'l str, span: Span) -> parse::Result<Vec<Piece<'l>>>
                         pieces.push(Piece::Debug { pretty: true });
 
                         literal = &tail[DEBUG_PRETTY.len()..];
+                    } else if let Some(cap) = WIDTH_REGEX.captures(tail) {
+                        let leading_pound = cap[1].eq("#");
+                        let leading_zero = cap[2].eq("0");
+                        let width = if cap[3].len() > 0 {
+                            Some(cap[3].parse::<u8>().map_err(|_| {
+                                parse::Error::new(
+                                    span,
+                                    "invalid width specifier: expected valid u8",
+                                )
+                            })?)
+                        } else {
+                            None
+                        };
+                        // 18 is the smallest buffer used by any numeric uDebug impl
+                        // By increasing that buffer size, we could increase this maximum value,
+                        // at the cost of cycles and a small amount of size
+                        if let Some(w) = width {
+                            if w > 18 {
+                                return Err(parse::Error::new(
+                                    span,
+                                    "invalid width specifier: maximum allowed specifier is 18",
+                                ));
+                            }
+                        }
+                        let hex = if cap[4].eq("x") {
+                            Some(Hex::Lower)
+                        } else if cap[4].eq("X") {
+                            Some(Hex::Upper)
+                        } else {
+                            None
+                        };
+                        pieces.push(Piece::Display {
+                            pretty: leading_pound,
+                            hex,
+                            width,
+                            pad: if leading_zero { '0' } else { ' ' },
+                        });
+
+                        // first capture is entire match
+                        literal = &tail[cap[0].len()..];
                     } else {
-                        pieces.push(Piece::Display);
+                        pieces.push(Piece::Display {
+                            pretty: false,
+                            hex: None,
+                            width: None,
+                            pad: ' ',
+                        });
 
                         literal = &tail[DISPLAY.len()..];
                     }
@@ -418,7 +511,7 @@ fn parse<'l>(mut literal: &'l str, span: Span) -> parse::Result<Vec<Piece<'l>>>
                 } else {
                     return Err(parse::Error::new(
                         span,
-                        "invalid format string: expected `{{`, `{}`, `{:?}` or `{:#?}`",
+                        "invalid format string: expected `{{`, `{}`, `{:?}`, `{:#?}`, `{:x}`, `{:#x}`, or a width specifier}",
                     ));
                 }
             }
@@ -434,6 +527,7 @@ mod tests {
 
     use proc_macro2::Span;
 
+    use crate::Hex;
     use crate::Piece;
 
     #[test]
@@ -445,7 +539,12 @@ mod tests {
             super::parse("The answer is {}", span).ok(),
             Some(vec![
                 Piece::Str(Cow::Borrowed("The answer is ")),
-                Piece::Display
+                Piece::Display {
+                    pretty: false,
+                    hex: None,
+                    width: None,
+                    pad: ' '
+                }
             ]),
         );
 
@@ -459,6 +558,56 @@ mod tests {
             Some(vec![Piece::Debug { pretty: true }]),
         );
 
+        assert_eq!(
+            super::parse("{:x}", span).ok(),
+            Some(vec![Piece::Display {
+                pretty: false,
+                hex: Some(Hex::Lower),
+                width: None,
+                pad: ' ',
+            }]),
+        );
+
+        assert_eq!(
+            super::parse("{:X}", span).ok(),
+            Some(vec![Piece::Display {
+                pretty: false,
+                hex: Some(Hex::Upper),
+                width: None,
+                pad: ' ',
+            }]),
+        );
+
+        assert_eq!(
+            super::parse("{:10x}", span).ok(),
+            Some(vec![Piece::Display {
+                pretty: false,
+                hex: Some(Hex::Lower),
+                width: Some(10),
+                pad: ' ',
+            }]),
+        );
+
+        assert_eq!(
+            super::parse("{:08x}", span).ok(),
+            Some(vec![Piece::Display {
+                pretty: false,
+                hex: Some(Hex::Lower),
+                width: Some(8),
+                pad: '0',
+            }]),
+        );
+
+        assert_eq!(
+            super::parse("{:#08x}", span).ok(),
+            Some(vec![Piece::Display {
+                pretty: true,
+                hex: Some(Hex::Lower),
+                width: Some(8),
+                pad: '0',
+            }]),
+        );
+
         // escaped braces
         assert_eq!(
             super::parse("{{}} is not an argument", span).ok(),
@@ -470,7 +619,6 @@ mod tests {
         assert!(super::parse(" {", span).is_err());
         assert!(super::parse("{ ", span).is_err());
         assert!(super::parse("{ {", span).is_err());
-        assert!(super::parse("{:x}", span).is_err());
     }
 
     #[test]

diff --git a/ufmt/src/impls/core.rs b/ufmt/src/impls/core.rs
@@ -59,38 +59,38 @@ where
     }
 }
 
-// FIXME this (`escape_debug`) contains a panicking branch
-// impl uDebug for str {
-//     fn fmt<W>(&self, f: &mut Formatter<'_, W>) -> Result<(), W::Error>
-//     where
-//         W: uWrite + ?Sized,
-//     {
-//         f.write_str("\"")?;
-
-//         let mut from = 0;
-//         for (i, c) in self.char_indices() {
-//             let esc = c.escape_debug();
-
-//             // If char needs escaping, flush backlog so far and write, else skip
-//             if esc.len() != 1 {
-//                 f.write_str(
-//                     self.get(from..i)
-//                         .unwrap_or_else(|| unsafe { assume_unreachable!() }),
-//                 )?;
-//                 for c in esc {
-//                     f.write_char(c)?;
-//                 }
-//                 from = i + c.len_utf8();
-//             }
-//         }
+// Below borrowed from https://github.com/japaric/ufmt/pull/25
+impl uDebug for str {
+    fn fmt<W>(&self, f: &mut Formatter<'_, W>) -> Result<(), W::Error>
+    where
+        W: uWrite + ?Sized,
+    {
+        f.write_str("\"")?;
+        let mut from = 0;
+        for (i, c) in self.char_indices() {
+            let esc = c.escape_debug();
+
+            // If char needs escaping, flush backlog so far and write, else skip
+            if esc.len() != 1 {
+                // SAFETY: `char_indices()` guarantees that `i` is always the index of utf-8 boundary of `c`.
+                // In the first iteration `from` is zero and therefore also the index of the bounardy of `c`.
+                // In the following iterations `from` either keeps its value or is set to `i + c.len_utf8()`
+                // (with last rounds `i` and `c`), which means `from` is again `i` (this round), the index
+                // of this rounds `c`. Notice that this also implies `from <= i`.
+                f.write_str(unsafe { self.get_unchecked(from..i) })?;
+                for c in esc {
+                    f.write_char(c)?;
+                }
+                from = i + c.len_utf8();
+            }
+        }
 
-//         f.write_str(
-//             self.get(from..)
-//                 .unwrap_or_else(|| unsafe { assume_unreachable!() }),
-//         )?;
-//         f.write_str("\"")
-//     }
-// }
+        // SAFETY: As seen above, `from` is the index of an utf-8 boundary in `self`.
+        // This also means that from is in bounds of `self`: `from <= self.len()`.
+        f.write_str(unsafe { self.get_unchecked(from..) })?;
+        f.write_str("\"")
+    }
+}
 
 impl uDisplay for str {
     #[inline(always)]