fixes (#17)

* fix end-of-token information in lexer/parser for better empty line retention * move several postfix comment splitters up the AST for better fidelity with original source * fix several cases of spurious end-of-list commas when only a single list item was present * fix several cases of spurious over-indent * more accurate line length prediction, fixing several cases of spurious line breaks * command syntax now prefers additional arguments on a new line if it doesn't fit on a single line * exported fields are considered simple and don't use one-per-line formatting * fix import/except formatting
arnetheduck · Dec 18, 2023 · 387507f · 387507f
1 parent f57fcc1
commit 387507f
Show file tree

Hide file tree

Showing 29 changed files with 1,713 additions and 2,364 deletions.
diff --git a/README.md b/README.md
@@ -62,11 +62,13 @@ command line options are supported ;)
 * diffs are kept at a minimum
   * diff-inducing constructs such as vertical alignment are avoided, for more
     productive merges
+  * formatting the same code again results in no differences
 * it broadly follows the [Status Nim style guide](https://status-im.github.io/nim-style-guide/)
   and [NEP1](https://nim-lang.org/docs/nep1.html)
   * this is tool aimed at making collaboration easier, with others and your
     future self
-  * where NEP1 contradicts itself, see above
+  * where NEP1 contradicts itself or these priorities, these priorities have
+    precedence
 
 The formatting rules are loosely derived from other formatters that already have
 gone through the journey of debating what "pleasing to read" might mean while
@@ -75,14 +77,16 @@ making adaptations for both features and quirks of the Nim parser.
 If in doubt, formatting that works well for descriptive identifiers and avoids
 putting too much information in a single like will be preferred.
 
+If something breaks the above guidelines, it's _likely_ a bug.
+
 ## FAQ
 
 ### Why use a formatter?
 
 A formatter removes the tedium of manually adding structure to code to make it
 more readable - overlong lines, inconsistent indentation, lack of visual
 structure and other small distractions quickly nibble away at the mental budget
-available for writing code while a formatter solves this many many other things
+available for writing code while a formatter solves this and many other things
 at the press of a button.
 
 When you work with others, debates and nitpicking over style go away and
@@ -210,10 +214,7 @@ formatting diffs all the time.
 
 Because it is based on it, of course! As a starting point this is fine but the
 code would benefit greatly from being rewritten with a dedicated formatting
-AST.
-
-In particular, the comment handling is done in a hand-wavy manner with bits and
-pieces of the old code mixed with new heuristics resulting in quite the mess.
+AST - and here we are.
 
 ### Should it be upstreamed?
 
@@ -239,14 +240,38 @@ extra is needed here is an open question but 10% seems like a good start for a
 language like Nim which defaults to 2-space significant indent and a naive
 module system that encourages globally unique identifiers with longer names.
 
-### What about comments?
+Automated formatting keeps most code well below this limit but the extra 10%
+allows gives it some lenience - think of it as those cases where a prorgammer
+would use their judgement and common sense to override a style guide
+recommendation.
 
-`nph` currently touches comments as little as possible - specifically, they
-are not re-flowed or re-aligned and the aim is to make them sticky to where they
-were originally written.
+### What about comments?
 
-Improvements in this area are much welcome - the compiler AST was not really
-written with comment preservation in mind and `nph`'s handling is not great.
+Comments may appear in many different places that are not represented in the
+Nim AST. When `nph` reformats code, it may have to move comments around in order
+to maintain line lengths and introduce or remove indentation.
+
+`nph` uses heuristics to place comments into one of several categories which
+broadly play by similar rules that code does - in particular, indentation is
+used to determine "ownership" over the comment.
+
+The implementation currently tracks several comment categories:
+
+* comment statement nodes - comments that appear with regular indent in
+  statement list contexts (such as the body of a `proc`) as represented as such,
+  ie as statement nodes and get treated similar to how regular code would
+* node attachments - comments that are anchored to an AST node depending on
+  their location in the code relative to that node:
+  * prefix - anything leading up to a particular AST node - for example less
+    indented or otherwise appearing before the node
+  * mid - at midpoints in composite nodes - between the `:` and the body of an
+    `if` for example
+  * postfix - appearing after the node, meaning on the same line or more
+    indented than the node
+
+When rendering the code, `nph` will use these categories to guide where the
+comment text should go, maintaining comment output in such a way that parsing
+the file again results in equivalent comment placement.
 
 ### How are blank lines handled?
 

diff --git a/src/astyaml.nim b/src/astyaml.nim
@@ -35,12 +35,12 @@ proc flagsToStr[T](flags: set[T]): string =
   if flags == {}:
     result = "[]"
   else:
-    result = ""
+    result = "["
     for x in items(flags):
       if result != "":
         result.add(", ")
       result.addYamlString($x)
-    result = "[" & result & "]"
+    result.add "]"
 
 proc lineInfoToStr(conf: ConfigRef; info: TLineInfo): string =
   result.add "["
@@ -96,7 +96,7 @@ proc symToYamlAux(
     if card(n.flags) > 0:
       res.addf("\n$1flags: $2", [istr, flagsToStr(n.flags)])
     res.addf("\n$1magic: $2", [istr, makeYamlString($n.magic)])
-    res.addf("\n$1ast: ", [istr])
+    res.addf("\n$1ast:\n$1    ", [istr])
     res.treeToYamlAux(conf, n.ast, marker, indent + 1, maxRecDepth - 1)
     res.addf("\n$1options: $2", [istr, flagsToStr(n.options)])
     res.addf("\n$1position: $2", [istr, $n.position])
@@ -105,7 +105,7 @@ proc symToYamlAux(
     if card(n.loc.flags) > 0:
       res.addf("\n$1flags: $2", [istr, makeYamlString($n.loc.flags)])
     res.addf("\n$1r: $2", [istr, n.loc.r])
-    res.addf("\n$1lode: $2", [istr])
+    res.addf("\n$1lode:\n$1    ", [istr])
     res.treeToYamlAux(conf, n.loc.lode, marker, indent + 1, maxRecDepth - 1)
 
 proc typeToYamlAux(
@@ -123,9 +123,9 @@ proc typeToYamlAux(
   else:
     let istr = spaces(indent * 4)
     res.addf("kind: $2", [istr, makeYamlString($n.kind)])
-    res.addf("\n$1sym: ")
+    res.addf("\n$1sym:\n$1    ", [istr])
     res.symToYamlAux(conf, n.sym, marker, indent + 1, maxRecDepth - 1)
-    res.addf("\n$1n: ")
+    res.addf("\n$1n:\n$1    ", [istr])
     res.treeToYamlAux(conf, n.n, marker, indent + 1, maxRecDepth - 1)
     if card(n.flags) > 0:
       res.addf("\n$1flags: $2", [istr, flagsToStr(n.flags)])
@@ -135,7 +135,7 @@ proc typeToYamlAux(
     if n.len > 0:
       res.addf("\n$1sons:")
       for s in n.sons:
-        res.addf("\n  - ")
+        res.addf("\n$1  - ", [istr])
         res.typeToYamlAux(conf, s, marker, indent + 1, maxRecDepth - 1)
 
 proc treeToYamlAux(
@@ -158,7 +158,7 @@ proc treeToYamlAux(
       case n.kind
       of nkCharLit..nkUInt64Lit:
         res.addf("\n$1intVal: $2", [istr, $(n.intVal)])
-      of nkFloatLit, nkFloat32Lit, nkFloat64Lit:
+      of nkFloatLit..nkFloat128Lit:
         res.addf("\n$1floatVal: $2", [istr, n.floatVal.toStrMaxPrecision])
       of nkStrLit..nkTripleStrLit:
         res.addf("\n$1strVal: $2", [istr, makeYamlString(n.strVal)])
@@ -172,7 +172,7 @@ proc treeToYamlAux(
           res.addf("\n$1ident: null", [istr])
       else:
         if n.len > 0:
-          res.addf("\n$1sons: ", [istr])
+          res.addf("\n$1sons:", [istr])
           for i in 0..<n.len:
             res.addf("\n$1  - ", [istr])
             res.treeToYamlAux(conf, n[i], marker, indent + 1, maxRecDepth - 1)

diff --git a/src/nph.nim b/src/nph.nim
@@ -4,7 +4,7 @@
 
 import
   "."/[
-    astcmp, astyaml, phast, phastalgo, phmsgs, phlineinfos, phoptions, phparser,
+    astcmp, astyaml, phast, phastyaml, phmsgs, phlineinfos, phoptions, phparser,
     phrenderer
   ]
 

diff --git a/src/phast.nim b/src/phast.nim
@@ -1105,8 +1105,8 @@ type
     path*: PNode # can be a string literal!
 
   CompilesId* = int
-      ## id that is used for the caching logic within
-      ## ``system.compiles``. See the seminst module.
+    ## id that is used for the caching logic within
+    ## ``system.compiles``. See the seminst module.
   TInstantiation* = object
     sym*: PSym
     concreteTypes*: seq[PType]
@@ -1181,6 +1181,7 @@ type
       cname*: string
         # resolved C declaration name in importc decl, e.g.:
         # proc fun() {.importc: "$1aux".} => cname = funaux
+
     constraint*: PNode
       # additional constraints like 'lit|result'; also
       # misused for the codegenDecl and virtual pragmas in the hope
@@ -1648,9 +1649,9 @@ when false:
   var x: CountTable[string]
 
   addQuitProc proc () {.noconv.} =
-        for k, v in pairs(x):
-          echo k
-          echo v
+      for k, v in pairs(x):
+        echo k
+        echo v
 
 proc newSym*(
     symKind: TSymKind;