Skip to content

Commit

Permalink
Merge pull request #22 from alex/optimize-computing-codepoints
Browse files Browse the repository at this point in the history
Optimize computing codepoing indices with all ASCII data
  • Loading branch information
ianlopshire committed Aug 27, 2019
2 parents 37aafdf + 16e98b7 commit 92aa187
Showing 1 changed file with 25 additions and 18 deletions.
43 changes: 25 additions & 18 deletions decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,33 +140,40 @@ func newRawValue(bytes []byte, useCodepointIndices bool) (rawValue, error) {
bytes: bytes,
}
if useCodepointIndices {
bytesIdx := 0
// Lazily allocate this only if the value actually contains a
// multi-byte character.
codepointIndices := []int(nil)
for bytesIdx < len(bytes) {
_, codepointSize := utf8.DecodeRune(bytes[bytesIdx:])
if codepointSize == 0 {
return rawValue{}, errors.New("fixedwidth: Invalid codepoint")
bytesIdx := findFirstMultiByteChar(bytes)
// If we've got multi-byte characters, fill in the rest of codepointIndices.
if bytesIdx < len(bytes) {
codepointIndices := make([]int, bytesIdx)
for i := 0; i < bytesIdx; i++ {
codepointIndices[i] = i
}
// We have a multi-byte codepoint, we need to allocate
// codepointIndices
if codepointIndices == nil && codepointSize > 1 {
codepointIndices = make([]int, bytesIdx)
for i := 0; i < bytesIdx; i++ {
codepointIndices[i] = i
for bytesIdx < len(bytes) {
_, codepointSize := utf8.DecodeRune(bytes[bytesIdx:])
if codepointSize == 0 {
return rawValue{}, errors.New("fixedwidth: Invalid codepoint")
}
}
if codepointIndices != nil {
codepointIndices = append(codepointIndices, bytesIdx)
bytesIdx += codepointSize
}
bytesIdx += codepointSize
value.codepointIndices = codepointIndices
}
value.codepointIndices = codepointIndices
}
return value, nil
}

// Scans bytes, looking for multi-byte characters, returns either the index of
// the first multi-byte chracter or the length of the string if there are none.
func findFirstMultiByteChar(bytes []byte) int {
for bytesIdx, b := range bytes {
// We have a multi-byte codepoint, we need to allocate
// codepointIndices
if b&0x80 == 0x80 {
return bytesIdx
}
}
return len(bytes)
}

func (d *Decoder) readLine(v reflect.Value) (err error, ok bool) {
var line []byte
line, err = d.data.ReadBytes('\n')
Expand Down

0 comments on commit 92aa187

Please sign in to comment.