From ddf85d4a157226f0d11e4c673d8d6fabcafa4498 Mon Sep 17 00:00:00 2001 From: Ian Lopshire Date: Fri, 3 Apr 2020 15:02:39 -0400 Subject: [PATCH 1/4] Add alignment and padding character to struct tags --- format.go | 32 ++++++++++++++++++++++++++++++ tags.go | 56 +++++++++++++++++++++++++++++++++++++++++++--------- tags_test.go | 40 +++++++++++++++++++++++-------------- 3 files changed, 104 insertions(+), 24 deletions(-) create mode 100644 format.go diff --git a/format.go b/format.go new file mode 100644 index 0000000..9c83611 --- /dev/null +++ b/format.go @@ -0,0 +1,32 @@ +package fixedwidth + +const ( + defaultAlignment alignment = "default" + right alignment = "right" + left alignment = "left" +) + +const ( + defaultPadChar = ' ' +) + +var defaultFormat = format{ + alignment: defaultAlignment, + padChar: defaultPadChar, +} + +type format struct { + alignment alignment + padChar byte +} + +type alignment string + +func (a alignment) Valid() bool { + switch a { + case defaultAlignment, right, left: + return true + default: + return false + } +} diff --git a/tags.go b/tags.go index 8377daf..004f68c 100644 --- a/tags.go +++ b/tags.go @@ -7,26 +7,52 @@ import ( "sync" ) -// parseTag splits a struct fields fixed tag into its start and end positions. +// parseTagWithFormat splits a struct fields fixed tag into its start position, end +// position, format, and padding character. +// // If the tag is not valid, ok will be false. -func parseTag(tag string) (startPos, endPos int, ok bool) { +func parseTag(tag string) (startPos, endPos int, format format, ok bool) { parts := strings.Split(tag, ",") - if len(parts) != 2 { - return startPos, endPos, false + if len(parts) < 2 || len(parts) > 4 { + return 0, 0, defaultFormat, false } var err error if startPos, err = strconv.Atoi(parts[0]); err != nil { - return startPos, endPos, false + return 0, 0, defaultFormat, false + } if endPos, err = strconv.Atoi(parts[1]); err != nil { - return startPos, endPos, false + return 0, 0, defaultFormat, false + } if startPos > endPos || (startPos == 0 && endPos == 0) { - return startPos, endPos, false + return 0, 0, defaultFormat, false + + } + + format = defaultFormat + + if len(parts) >= 3 { + alignment := alignment(parts[2]) + if alignment.Valid() { + format.alignment = alignment + } } - return startPos, endPos, true + if len(parts) >= 4 { + v := parts[3] + switch { + case v == "_": + format.padChar = ' ' + case parts[3] == "__": + format.padChar = '_' + case len(v) > 0: + format.padChar = v[0] + } + } + + return startPos, endPos, format, true } type structSpec struct { @@ -39,6 +65,7 @@ type fieldSpec struct { startPos, endPos int encoder valueEncoder setter valueSetter + format format ok bool } @@ -48,10 +75,21 @@ func buildStructSpec(t reflect.Type) structSpec { } for i := 0; i < t.NumField(); i++ { f := t.Field(i) - ss.fieldSpecs[i].startPos, ss.fieldSpecs[i].endPos, ss.fieldSpecs[i].ok = parseTag(f.Tag.Get("fixed")) + + startPos, endPos, format, ok := parseTag(f.Tag.Get("fixed")) + if !ok { + continue + } + + ss.fieldSpecs[i].startPos = startPos + ss.fieldSpecs[i].endPos = endPos + ss.fieldSpecs[i].format = format + ss.fieldSpecs[i].ok = ok + if ss.fieldSpecs[i].endPos > ss.ll { ss.ll = ss.fieldSpecs[i].endPos } + ss.fieldSpecs[i].encoder = newValueEncoder(f.Type) ss.fieldSpecs[i].setter = newValueSetter(f.Type) } diff --git a/tags_test.go b/tags_test.go index 4bea88c..c4a0500 100644 --- a/tags_test.go +++ b/tags_test.go @@ -1,6 +1,7 @@ package fixedwidth import ( + "reflect" "testing" ) @@ -10,33 +11,42 @@ func TestParseTag(t *testing.T) { tag string startPos int endPos int + format format ok bool }{ - {"Valid Tag", "0,10", 0, 10, true}, - {"Valid Tag Single position", "5,5", 5, 5, true}, - {"Tag Empty", "", 0, 0, false}, - {"Tag Too short", "0", 0, 0, false}, - {"Tag Too Long", "2,10,11", 0, 0, false}, - {"StartPos Not Integer", "hello,3", 0, 0, false}, - {"EndPos Not Integer", "3,hello", 0, 0, false}, - {"Tag Contains a Space", "4, 11", 0, 0, false}, - {"Tag Interval Invalid", "14,5", 0, 0, false}, - {"Tag Both Positions Zero", "0,0", 0, 0, false}, + {"Valid Tag", "0,10", 0, 10, defaultFormat, true}, + {"Valid Tag Single position", "5,5", 5, 5, defaultFormat, true}, + {"Valid Tag w/ Alignment", "0,10,right", 0, 10, format{right, defaultPadChar}, true}, + {"Valid Tag w/ Padding Character", "0,10,default,0", 0, 10, format{defaultAlignment, '0'}, true}, + {"Tag Empty", "", 0, 0, defaultFormat, false}, + {"Tag Too short", "0", 0, 0, defaultFormat, false}, + {"Tag Too Long", "2,10,default,_,foo", 0, 0, defaultFormat, false}, + {"StartPos Not Integer", "hello,3", 0, 0, defaultFormat, false}, + {"EndPos Not Integer", "3,hello", 0, 0, defaultFormat, false}, + {"Tag Contains a Space", "4, 11", 0, 0, defaultFormat, false}, + {"Tag Interval Invalid", "14,5", 0, 0, defaultFormat, false}, + {"Tag Both Positions Zero", "0,0", 0, 0, defaultFormat, false}, + {"Space Padding Character", "0,0,default, ", 0, 0, defaultFormat, false}, + {"Space Padding Character (_)", "0,0,default,_", 0, 0, defaultFormat, false}, + {"Underscore Padding Character (__)", "0,0,default,__", 0, 0, defaultFormat, false}, + {"Multi-byte Padding Character", "0,0,default,00", 0, 0, defaultFormat, false}, } { t.Run(tt.name, func(t *testing.T) { - startPos, endPos, ok := parseTag(tt.tag) + startPos, endPos, format, ok := parseTag(tt.tag) if tt.ok != ok { - t.Errorf("parseTag() ok want %v, have %v", tt.ok, ok) + t.Errorf("parseTagWithFormat() ok want %v, have %v", tt.ok, ok) } // only check startPos and endPos if valid tags are expected if tt.ok { if tt.startPos != startPos { - t.Errorf("parseTag() startPos want %v, have %v", tt.startPos, startPos) + t.Errorf("parseTagWithFormat() startPos want %v, have %v", tt.startPos, startPos) } - if tt.endPos != endPos { - t.Errorf("parseTag() endPos want %v, have %v", tt.endPos, endPos) + t.Errorf("parseTagWithFormat() endPos want %v, have %v", tt.endPos, endPos) + } + if !reflect.DeepEqual(tt.format, format) { + t.Errorf("parseTagWithFormat() format want %+v, have %+v", tt.format, format) } } }) From 1cd098017c33b8ce0e19746819a31989ff6639f0 Mon Sep 17 00:00:00 2001 From: Ian Lopshire Date: Sat, 4 Apr 2020 12:12:13 -0400 Subject: [PATCH 2/4] Update encoder to support configurable formatting --- encode.go | 34 ++++++++++++++++-- encode_test.go | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+), 2 deletions(-) diff --git a/encode.go b/encode.go index 5f82065..9311bed 100644 --- a/encode.go +++ b/encode.go @@ -156,6 +156,36 @@ func newValueEncoder(t reflect.Type) valueEncoder { return unknownTypeEncoder(t) } +func (ve valueEncoder) Write(v reflect.Value, dst []byte, format format) error { + value, err := ve(v) + if err != nil { + return err + } + + if len(value) < len(dst) { + switch { + case format.alignment == right: + padding := bytes.Repeat([]byte{format.padChar}, len(dst)-len(value)) + copy(dst, padding) + copy(dst[len(padding):], value) + return nil + + // The second case in this block is a special case to maintain backward + // compatibility. In previous versions of the library, only len(value) bytes were + // written to dst. This means overlapping intervals can, in effect, be used to + // coalesce a value. + case format.alignment == left, format.alignment == defaultAlignment && format.padChar != ' ': + padding := bytes.Repeat([]byte{format.padChar}, len(dst)-len(value)) + copy(dst, value) + copy(dst[len(value):], padding) + return nil + } + } + + copy(dst, value) + return nil +} + func structEncoder(v reflect.Value) ([]byte, error) { ss := cachedStructSpec(v.Type()) dst := bytes.Repeat([]byte(" "), ss.ll) @@ -165,12 +195,12 @@ func structEncoder(v reflect.Value) ([]byte, error) { continue } - val, err := spec.encoder(v.Field(i)) + err := spec.encoder.Write(v.Field(i), dst[spec.startPos-1:spec.endPos:spec.endPos], spec.format) if err != nil { return nil, err } - copy(dst[spec.startPos-1:spec.endPos:spec.endPos], val) } + return dst, nil } diff --git a/encode_test.go b/encode_test.go index 718965b..df7512f 100644 --- a/encode_test.go +++ b/encode_test.go @@ -30,6 +30,26 @@ func ExampleMarshal() { // 1 Ian Lopshire 99.50 } +func ExampleMarshal_configurableFormatting() { + // define some data to encode + people := []struct { + ID int `fixed:"1,5,right,#"` + FirstName string `fixed:"6,15,right,#"` + LastName string `fixed:"16,25,right,#"` + Grade float64 `fixed:"26,30,right,#"` + }{ + {1, "Ian", "Lopshire", 99.5}, + } + + data, err := Marshal(people) + if err != nil { + log.Fatal(err) + } + fmt.Printf("%s", data) + // Output: + // ####1#######Ian##Lopshire99.50 +} + func TestMarshal(t *testing.T) { type H struct { F1 interface{} `fixed:"1,5"` @@ -70,6 +90,80 @@ func TestMarshal(t *testing.T) { } } +func TestMarshal_format(t *testing.T) { + type H struct { + F1 string `fixed:"1,5,left"` + F2 string `fixed:"6,10,left,#"` + F3 string `fixed:"11,15,right"` + F4 string `fixed:"16,20,right,#"` + F5 string `fixed:"21,25,default"` + F6 string `fixed:"26,30,default,#"` + } + + for _, tt := range []struct { + name string + v interface{} + want []byte + shouldErr bool + }{ + { + name: "base case", + v: H{"foo", "bar", "biz", "baz", "bor", "box"}, + want: []byte(`foo ` + `bar##` + ` biz` + `##baz` + `bor ` + `box##`), + shouldErr: false, + }, + { + name: "empty", + v: H{"", "", "", "", "", ""}, + want: []byte(` ` + `#####` + ` ` + `#####` + ` ` + `#####`), + shouldErr: false, + }, + { + name: "overflow", + v: H{"12345678", "12345678", "12345678", "12345678", "12345678", "12345678"}, + want: []byte(`12345` + `12345` + `12345` + `12345` + `12345` + `12345`), + shouldErr: false, + }, + } { + t.Run(tt.name, func(t *testing.T) { + have, err := Marshal(tt.v) + if tt.shouldErr != (err != nil) { + t.Errorf("Marshal() err want %v, have %v (%v)", tt.shouldErr, err != nil, err) + } + if !bytes.Equal(tt.want, have) { + t.Errorf("Marshal() want %q, have %q", string(tt.want), string(have)) + } + }) + } +} + +func TestMarshal_backwardCompatibility(t *testing.T) { + // Overlapping intervals can, in effect, be used to coalesce a value. This tests + // ensures this special does not break. + t.Run("interval overlap coalesce", func(t *testing.T) { + type H struct { + F1 string `fixed:"1,5"` + F2 string `fixed:"1,5"` + } + + have, err := Marshal(H{F1: "val"}) + if err != nil { + t.Fatalf("Marshal() unexpected error: %v", err) + } + if want := []byte(`val `); !bytes.Equal(have, want) { + t.Errorf("Marshal() want %q, have %q", string(want), string(have)) + } + + have, err = Marshal(H{F2: "val"}) + if err != nil { + t.Fatalf("Marshal() unexpected error: %v", err) + } + if want := []byte(`val `); !bytes.Equal(have, want) { + t.Errorf("Marshal() want %q, have %q", string(want), string(have)) + } + }) +} + func TestNewValueEncoder(t *testing.T) { for _, tt := range []struct { name string From b83eacbeabae8115b4d946ed555ad06daa6ec20f Mon Sep 17 00:00:00 2001 From: Ian Lopshire Date: Sat, 4 Apr 2020 12:38:49 -0400 Subject: [PATCH 3/4] Update decoder to support configurable formatting --- decode.go | 25 +++++++++++++++++++++---- decode_test.go | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 4 deletions(-) diff --git a/decode.go b/decode.go index f050d56..09bb6a6 100644 --- a/decode.go +++ b/decode.go @@ -230,7 +230,24 @@ func (d *Decoder) readLine(v reflect.Value) (err error, ok bool) { return valueSetter(v, rawValue), true } -func rawValueFromLine(value rawValue, startPos, endPos int) rawValue { +func rawValueFromLine(value rawValue, startPos, endPos int, format format) rawValue { + var trimFunc func(string) string + + switch format.alignment { + case left: + trimFunc = func(s string) string { + return strings.TrimRight(s, string(format.padChar)) + } + case right: + trimFunc = func(s string) string { + return strings.TrimLeft(s, string(format.padChar)) + } + default: + trimFunc = func(s string) string { + return strings.Trim(s, string(format.padChar)) + } + } + if value.codepointIndices != nil { if len(value.codepointIndices) == 0 || startPos > len(value.codepointIndices) { return rawValue{data: ""} @@ -245,7 +262,7 @@ func rawValueFromLine(value rawValue, startPos, endPos int) rawValue { lineData = value.data[relevantIndices[0]:value.codepointIndices[endPos]] } return rawValue{ - data: strings.TrimSpace(lineData), + data: trimFunc(lineData), codepointIndices: relevantIndices, } } else { @@ -256,7 +273,7 @@ func rawValueFromLine(value rawValue, startPos, endPos int) rawValue { endPos = len(value.data) } return rawValue{ - data: strings.TrimSpace(value.data[startPos-1 : endPos]), + data: trimFunc(value.data[startPos-1 : endPos]), } } } @@ -299,7 +316,7 @@ func structSetter(t reflect.Type) valueSetter { if !fieldSpec.ok { continue } - rawValue := rawValueFromLine(raw, fieldSpec.startPos, fieldSpec.endPos) + rawValue := rawValueFromLine(raw, fieldSpec.startPos, fieldSpec.endPos, fieldSpec.format) err := fieldSpec.setter(v.Field(i), rawValue) if err != nil { sf := t.Field(i) diff --git a/decode_test.go b/decode_test.go index 1cdbed0..e3ac497 100644 --- a/decode_test.go +++ b/decode_test.go @@ -163,6 +163,57 @@ func TestUnmarshal(t *testing.T) { }) } +func TestUnmarshal_format(t *testing.T) { + type H struct { + F1 string `fixed:"1,5,left"` + F2 string `fixed:"6,10,left,#"` + F3 string `fixed:"11,15,right"` + F4 string `fixed:"16,20,right,#"` + F5 string `fixed:"21,25,default"` + F6 string `fixed:"26,30,default,#"` + } + + for _, tt := range []struct { + name string + rawValue []byte + target interface{} + expected interface{} + shouldErr bool + }{ + { + name: "base case", + rawValue: []byte(`foo ` + `bar##` + ` baz` + `##biz` + ` bor ` + `#box#`), + target: &[]H{}, + expected: &[]H{{"foo", "bar", "baz", "biz", "bor", "box"}}, + shouldErr: false, + }, + { + name: "keep spaces", + rawValue: []byte(` foo` + ` ##` + `baz ` + `## ` + ` bor ` + `#####`), + target: &[]H{}, + expected: &[]H{{" foo", " ", "baz ", " ", "bor", ""}}, + shouldErr: false, + }, + { + name: "empty", + rawValue: []byte(` ` + `#####` + ` ` + `#####` + ` ` + `#####`), + target: &[]H{}, + expected: &[]H{{"", "", "", "", "", ""}}, + shouldErr: false, + }, + } { + t.Run(tt.name, func(t *testing.T) { + err := Unmarshal(tt.rawValue, tt.target) + if tt.shouldErr != (err != nil) { + t.Errorf("Unmarshal() err want %v, have %v (%v)", tt.shouldErr, err != nil, err) + } + if !tt.shouldErr && !reflect.DeepEqual(tt.target, tt.expected) { + t.Errorf("Unmarshal() want %+v, have %+v", tt.expected, tt.target) + } + }) + } +} + func TestNewValueSetter(t *testing.T) { for _, tt := range []struct { name string From 9b40237fc0694fcfb23280d91819ec8248fa0909 Mon Sep 17 00:00:00 2001 From: Ian Lopshire Date: Sat, 4 Apr 2020 13:13:48 -0400 Subject: [PATCH 4/4] Update readme with info about configurable formatting --- README.md | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index db39cf4..62715b7 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,16 @@ Package fixedwidth provides encoding and decoding for fixed-width formatted Data ## Usage ### Struct Tags -Position within a line is controlled via struct tags. -The tags should be formatted as `fixed:"{startPos},{endPos}"` where `startPos` and `endPos` are both positive integers greater than 0. -Positions start at 1. The interval is inclusive. Fields without tags are ignored. + +The struct tag schema schema used by fixedwidth is: `fixed:"{startPos},{endPos},[{alignment},[{padChar}]]"`[1](#f1). + +The `startPos` and `endPos` arguments control the position within a line. `startPos` and `endPos` must both be positive integers greater than 0. Positions start at 1. The interval is inclusive. + +The `alignment` argument controls the alignment of the value within it's interval. The valid options are `default`[2](#f2), `right`, and `left`. The `alignment` is optional and can be omitted. + +The `padChar` argument controls the character that will be used to pad any empty characters in the interval after writing the value. The default padding character is a space. The `padChar` is optional and can be omitted. + +Fields without tags are ignored. ### Encode ```go @@ -89,5 +96,17 @@ decoder.SetUseCodepointIndices(true) // Decode as usual now ``` +### Alignment Behavior + +| Alignment | Encoding | Decoding | +| --------- | -------- | -------- | +| `default` | Field is left aligned | The padding character is trimmed from both right and left of value | +| `left` | Field is left aligned | The padding character is trimmed from right of value | +| `right` | Field is right aligned | The padding character is trimmed from left of value | + +## Notes +1. `{}` indicates an argument. `[]` indicates and optional segment [^](#a1) +2. The `default` alignment is similar to `left` but has slightly different behavior required to maintain backwards compatibility [^](#a2) + ## Licence MIT