-
Notifications
You must be signed in to change notification settings - Fork 0
/
cleo.go
75 lines (63 loc) · 2.04 KB
/
cleo.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
/*
* Copyright (c) 2011 [email protected], 2015 Robin Orheden
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
// Package cleo provides a fast search algorithm for prefix matching large amounts of text.
package cleo
import (
"sort"
)
type Index struct {
forward *forwardIndex
inverted *invertedIndex
ScoreFn ScoreFn
}
func NewIndex() *Index {
return &Index{
forward: NewForwardIndex(),
inverted: NewInvertedIndex(),
ScoreFn: JacaardScoring,
}
}
func (i *Index) Add(id string, value string) {
i.inverted.Add(id, value)
i.forward.Add(id, value)
}
//Iterates through all of the 8 bytes (64 bits) and tests
//each bit that is set to 1 in the query's filter against
//the bit in the comparison's filter. If the bit is not
// also 1, you do not have a match.
func testBytesFromQuery(bf int, qBloom int) bool {
for i := uint(0); i < 64; i++ {
//a & (1 << idx) == b & (1 << idx)
if (bf&(1<<i) != (1 << i)) && qBloom&(1<<i) == (1<<i) {
return false
}
}
return true
}
func (ix *Index) Search(query string) []rankedResult {
results := make([]rankedResult, 0, 0)
candidates := ix.inverted.Search(query) //First get candidates from Inverted Index
qBloom := computeBloomFilter(query)
for _, i := range candidates {
if testBytesFromQuery(i.bloom, qBloom) { //Filter using Bloom Filter
c := ix.forward.itemAt(i.id) //Get whole document from Forward Index
score := ix.ScoreFn(query, c) //Score the Forward Index between 0-1
results = append(results, rankedResult{i.id, c, score})
}
}
sort.Sort(byScore{results})
return results
}