Skip to content

Commit

Permalink
[bugfix] API returning incorrect number of descendants at times
Browse files Browse the repository at this point in the history
  • Loading branch information
alexferrari88 committed Oct 25, 2022
1 parent 692a340 commit 47babc9
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 9 deletions.
2 changes: 1 addition & 1 deletion pkg/gohn/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,5 @@ Package GoHN is a wrapper for the Hacker News API: https://github.com/HackerNews
package gohn

const (
Version = "0.7.4"
Version = "0.7.5"
)
28 changes: 20 additions & 8 deletions pkg/gohn/items.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,19 @@ func (s *ItemsService) FetchAllDescendants(ctx context.Context, item *Item, fn I
mapCommentById = make(ItemsIndex, commentsNumToFetch)
commentsChan = make(chan *Item, commentsNumToFetch)
kidsQueue = make(chan int, commentsNumToFetch)
wg.Add(commentsNumToFetch)
} else {
commentsNumToFetch = len(*item.Kids)
mapCommentById = make(ItemsIndex)
commentsChan = make(chan *Item)
kidsQueue = make(chan int, commentsNumToFetch)
wg.Add(commentsNumToFetch)
}

wg.Add(len(*item.Kids))

// the following variables are used to signal the start of the processing
start := make(chan struct{})
var startOnce sync.Once

// initialize kidsQueue so that the fetching in the for loop can start
go func() {
for _, kid := range *item.Kids {
Expand All @@ -125,14 +129,26 @@ func (s *ItemsService) FetchAllDescendants(ctx context.Context, item *Item, fn I

// goroutine to close the done channel when all the items are fetched and processed
go func() {
// wait for the first kid to be in the queue
// in this way, we don't close the done channel before the processing has started
<-start
// the first kid has been added to the queue
// it is safe now to start waiting
wg.Wait()
close(done)
}()
L:
for {
select {
case <-ctx.Done():
return nil, ctx.Err()
// fetch the item and send it to commentsChan
case currentId := <-kidsQueue:
// once the first kid has been added to the queue, signal the start of the processing
// this is done to avoid closing the done channel before the processing has started
startOnce.Do(func() {
close(start)
})
go func(wg *sync.WaitGroup, currentId int) {
it, err := s.Get(ctx, currentId)
if err != nil {
Expand All @@ -145,15 +161,10 @@ L:
if err != nil && excludeKids {
// TODO: add better error handling
wg.Done()
if it.Kids != nil {
for range *it.Kids {
// TODO: doesn't remove kids' kids from the waitgroup
wg.Done()
}
}
return
} else if err != nil && !excludeKids {
if it.Kids != nil {
wg.Add(len(*it.Kids))
for _, kid := range *it.Kids {
kidsQueue <- kid
}
Expand All @@ -170,6 +181,7 @@ L:
if comment.ID != nil {
mapCommentById[*comment.ID] = comment
if comment.Kids != nil && len(*comment.Kids) > 0 {
wg.Add(len(*comment.Kids))
go func(comment *Item) {
for _, kid := range *comment.Kids {
kidsQueue <- kid
Expand Down
61 changes: 61 additions & 0 deletions test/gohn/items_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -540,3 +540,64 @@ func TestFetchAllDescendants_Processor_includeKids(t *testing.T) {
}
}
}

// When a post is fairly active, the actual number of descendants can be
// higher than the number of descendants returned by the API.
// This test checks that we are still getting all comments, even if
// the number of descendants returned by the API is lower than the actual number of descendants.
func TestFetchAllDescendants_NumDescendantsLowerThanActual(t *testing.T) {
client, mux, _, teardown := setup.Init()
defer teardown()

mockID := 1
numDescendants := 1
actualDescendants := 6
mockType := "story"
mockParent := &gohn.Item{ID: &mockID, Type: &mockType, Kids: &[]int{2, 3, 4}, Descendants: &numDescendants}

mux.HandleFunc("/item/1.json", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprint(w, `{"id": 1, "type": "story", "kids": [2, 3, 4], "descendants": `+fmt.Sprint(numDescendants)+`}`)
})
mux.HandleFunc("/item/2.json", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprint(w, `{"id": 2, "type": "comment", "kids": [5, 6]}`)
})
mux.HandleFunc("/item/3.json", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprint(w, `{"id": 3, "type": "comment", "kids": [7]}`)
})
mux.HandleFunc("/item/4.json", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprint(w, `{"id": 4, "type": "comment"}`)
})
mux.HandleFunc("/item/5.json", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprint(w, `{"id": 5, "type": "comment"}`)
})
mux.HandleFunc("/item/6.json", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprint(w, `{"id": 6, "type": "comment"}`)
})
mux.HandleFunc("/item/7.json", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprint(w, `{"id": 7, "type": "comment"}`)
})

ctx := context.Background()
got, err := client.Items.FetchAllDescendants(ctx, mockParent, nil)

if err != nil {
t.Fatalf("unexpected error getting item: %v", err)
}

if got == nil {
t.Fatalf("expected item to be %v, got nil", 1)
}

if len(got) != actualDescendants {
t.Errorf("expected %d items, got %v", actualDescendants, len(got))
}

for _, id := range []int{2, 3, 4, 5, 6, 7} {
if got[id] == nil {
t.Fatalf("expected item %v to be %v, got nil", id, id)
}
if *got[id].ID != id {
t.Errorf("expected item ID %d, got %d", id, *got[id].ID)
}
}
}

0 comments on commit 47babc9

Please sign in to comment.