Skip to content

Commit

Permalink
feat: rework implementation, fix architectural issues with range gene…
Browse files Browse the repository at this point in the history
…rator, add progressbar to CLI

Further improvements are improved state file handling, retrying HTTP requests in case retrieving the body fails and many more...
  • Loading branch information
FlorianLoch committed Feb 12, 2024
1 parent 58dc24d commit 6b845e0
Show file tree
Hide file tree
Showing 7 changed files with 279 additions and 89 deletions.
35 changes: 34 additions & 1 deletion cmd/cli/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,45 @@ package main

import (
"fmt"
"github.com/k0kubun/go-ansi"
"github.com/schollz/progressbar/v3"
"os"
"time"

hibpsync "github.com/exaring/go-hibp-sync"
)

func main() {
if err := hibpsync.Sync(); err != nil {
stateFile, err := os.OpenFile(hibpsync.DefaultStateFile, os.O_RDWR|os.O_CREATE, 0644)
if err != nil {
fmt.Printf("opening state file error: %q", err)
}

bar := progressbar.NewOptions(0xFFFFF,
progressbar.OptionSetWriter(ansi.NewAnsiStdout()),
progressbar.OptionEnableColorCodes(true),
progressbar.OptionSetDescription("[cyan]Syncing HIBP data...[reset]"),
progressbar.OptionShowCount(),
progressbar.OptionShowIts(),
progressbar.OptionSetItsString("prefixes"),
progressbar.OptionThrottle(100*time.Millisecond),
progressbar.OptionSetPredictTime(false),
progressbar.OptionSetElapsedTime(true),
progressbar.OptionSetTheme(progressbar.Theme{
Saucer: "[green]=[reset]",
SaucerHead: "[green]>[reset]",
SaucerPadding: " ",
BarStart: "[",
BarEnd: "]",
}))

updateProgressBar := func(lowest, current, _ int64) error {
_ = bar.Set64(current)

return nil
}

if err := hibpsync.Sync(hibpsync.WithProgressFn(updateProgressBar), hibpsync.WithStateFile(stateFile)); err != nil {
fmt.Printf("sync error: %q", err)
}
}
15 changes: 13 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,18 @@ module github.com/exaring/go-hibp-sync
go 1.21.6

require (
github.com/alitto/pond v1.8.3 // indirect
github.com/alitto/pond v1.8.3
github.com/deckarep/golang-set/v2 v2.6.0
github.com/hashicorp/go-retryablehttp v0.7.5
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213
github.com/schollz/progressbar/v3 v3.14.1
)

require (
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/hashicorp/go-retryablehttp v0.7.5 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/rivo/uniseg v0.4.7 // indirect
golang.org/x/sys v0.17.0 // indirect
golang.org/x/term v0.17.0 // indirect
)
27 changes: 27 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,10 +1,37 @@
github.com/alitto/pond v1.8.3 h1:ydIqygCLVPqIX/USe5EaV/aSRXTRXDEI9JwuDdu+/xs=
github.com/alitto/pond v1.8.3/go.mod h1:CmvIIGd5jKLasGI3D87qDkQxjzChdKMmnXMg3fG6M6Q=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/deckarep/golang-set/v2 v2.6.0 h1:XfcQbWM1LlMB8BsJ8N9vW5ehnnPVIw0je80NsVHagjM=
github.com/deckarep/golang-set/v2 v2.6.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4=
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI=
github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
github.com/hashicorp/go-retryablehttp v0.7.5 h1:bJj+Pj19UZMIweq/iie+1u5YCdGrnxCT9yvm0e+Nd5M=
github.com/hashicorp/go-retryablehttp v0.7.5/go.mod h1:Jy/gPYAdjqffZ/yFGCFV2doI5wjtH1ewM9u8iYVjtX8=
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213 h1:qGQQKEcAR99REcMpsXCp3lJ03zYT1PkRd3kQGPn9GVg=
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/schollz/progressbar/v3 v3.14.1 h1:VD+MJPCr4s3wdhTc7OEJ/Z3dAeBzJ7yKH/P4lC5yRTI=
github.com/schollz/progressbar/v3 v3.14.1/go.mod h1:Zc9xXneTzWXF81TGoqL71u0sBPjULtEHYtj/WVgVy8E=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.14.0/go.mod h1:TySc+nGkYR6qt8km8wUhuFRTVSMIX3XPR58y2lC8vww=
golang.org/x/term v0.17.0 h1:mkTF7LCd6WGJNL3K1Ad7kwxNfYAW6a8a8QqtMblp/4U=
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
177 changes: 133 additions & 44 deletions lib.go
Original file line number Diff line number Diff line change
@@ -1,24 +1,32 @@
package hibpsync

import (
"bytes"
"errors"
"fmt"

"github.com/alitto/pond"
"github.com/hashicorp/go-retryablehttp"
"io"
"os"
"strconv"
"sync"
)

const (
defaultDataDir = "./.hibp-data"
defaultEndpoint = "https://api.pwnedpasswords.com/range/"
defaultCheckETag = true
defaultWorkers = 100
defaultWorkers = 50
DefaultStateFile = "./.hibp-data/state"
)

type syncConfig struct {
dataDir string
endpoint string
checkETag bool
worker int
dataDir string
endpoint string
checkETag bool
minWorkers int
progressFn ProgressFunc
stateFile io.ReadWriteSeeker
}

type SyncOption func(*syncConfig)
Expand All @@ -41,81 +49,162 @@ func WithCheckETag(checkETag bool) SyncOption {
}
}

func WithWorkers(workers int) SyncOption {
func WithMinWorkers(workers int) SyncOption {
return func(c *syncConfig) {
c.worker = workers
c.minWorkers = workers
}
}

func WithStateFile(stateFile io.ReadWriteSeeker) SyncOption {
return func(c *syncConfig) {
c.stateFile = stateFile
}
}

func WithProgressFn(progressFn ProgressFunc) SyncOption {
return func(c *syncConfig) {
c.progressFn = progressFn
}
}

func Sync(options ...SyncOption) error {
config := &syncConfig{
dataDir: defaultDataDir,
endpoint: defaultEndpoint,
checkETag: defaultCheckETag,
worker: defaultWorkers,
dataDir: defaultDataDir,
endpoint: defaultEndpoint,
checkETag: defaultCheckETag,
minWorkers: defaultWorkers,
progressFn: func(_, _, _ int64) error { return nil },
}

for _, option := range options {
option(config)
}

rG, err := newRangeGenerator(0x00000, 0xFFFFF, "")
if err != nil {
return fmt.Errorf("creating range generator: %w", err)
from := int64(0x00000)

if config.stateFile != nil {
lastState, err := readStateFile(config.stateFile)
if err != nil {
return fmt.Errorf("error reading state file: %w", err)
}

from = lastState
innerProgressFn := config.progressFn

config.progressFn = func(lowest, current, to int64) error {
err := func() error {
if lowest < lastState+1000 {
return nil
}

if _, err := config.stateFile.Seek(0, io.SeekStart); err != nil {
return fmt.Errorf("seeking to beginning of state file: %w", err)
}

if _, err := config.stateFile.Write([]byte(fmt.Sprintf("%d", lowest))); err != nil {
return fmt.Errorf("writing state file: %w", err)
}

lastState = lowest

return nil
}()

if err != nil {
fmt.Printf("updating state file: %v\n", err)
}

return innerProgressFn(lowest, current, to)
}
}

retryClient := retryablehttp.NewClient() //TODO: add dnscache, timeout
rG := newRangeGenerator(from, 0xFFFFF+1, config.progressFn)

retryClient := retryablehttp.NewClient()
retryClient.RetryMax = 10
retryClient.Logger = nil

hc := hibpClient{
endpoint: config.endpoint,
httpClient: retryClient.StandardClient(),
maxRetries: 2,
}

storage := fsStorage{
dataDir: config.dataDir,
}

pool := pond.New(config.worker, 0, pond.MinWorkers(config.worker))
pool := pond.New(config.minWorkers, 0, pond.MinWorkers(config.minWorkers))
defer pool.Stop()

for {
rangeIndex, ok, err := rG.Next()
if err != nil {
return fmt.Errorf("getting next range: %w", err)
}
var (
outerErr error
errLock sync.Mutex
)

if !ok {
break
}
for !pool.Stopped() {
pool.Submit(func() {
keepGoing, err := rG.Next(func(r int64) error {
rangePrefix := toRangeString(r)

if rangeIndex%100 == 0 || rangeIndex < 10 {
fmt.Printf("processing range %d\n", rangeIndex)
}
etag, _ := storage.LoadETag(rangePrefix)
// TODO: Log error with debug level

pool.Submit(func() {
rangePrefix := toRangeString(rangeIndex)
etag, err := storage.LoadETag(rangePrefix)
if err != nil {
fmt.Printf("error loading etag for range %q: %v\n", rangePrefix, err)
return
}
resp, err := hc.RequestRange(rangePrefix, etag)
if err != nil {
return fmt.Errorf("error requesting range %q: %w", rangePrefix, err)
}

resp, err := hc.RequestRange(rangePrefix, etag)
if resp.NotModified {
return nil
}

if err := storage.Save(rangePrefix, resp.ETag, resp.Data); err != nil {
return fmt.Errorf("error saving range %q: %w", rangePrefix, err)
}

return nil
})
if err != nil {
fmt.Printf("error requesting range %q: %v\n", rangePrefix, err)
return
}
errLock.Lock()
defer errLock.Unlock()

if resp.NotModified {
return
outerErr = errors.Join(fmt.Errorf("processing range: %w", err))
}
if err := storage.Save(rangePrefix, resp.ETag, resp.Data); err != nil {
fmt.Printf("error saving range %q: %v\n", rangePrefix, err)

if !keepGoing {
pool.Stop()
}
})
}

return nil
return outerErr
}

func readStateFile(stateFile io.ReadWriteSeeker) (int64, error) {
state, err := io.ReadAll(stateFile)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return 0, nil
}

return 0, fmt.Errorf("reading state file: %w", err)
}

state = bytes.TrimSpace(state)

if len(state) == 0 {
return 0, nil
}

lastState, err := strconv.ParseInt(string(state), 10, 64)
if err != nil {
return 0, fmt.Errorf("parsing state file: %w", err)
}

if _, err := stateFile.Seek(0, io.SeekStart); err != nil {
return 0, fmt.Errorf("seeking to beginning of state file: %w", err)
}

return lastState, nil
}
Loading

0 comments on commit 6b845e0

Please sign in to comment.