forked from mfonda/simhash
-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathfeature_test.go
More file actions
151 lines (133 loc) · 43.2 KB
/
feature_test.go
File metadata and controls
151 lines (133 loc) · 43.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
// Copyright 2013 Matthew Fonda. All rights reserved.
// Use of this source code is governed by the MIT
// license that can be found in the LICENSE file.
package simhash
import (
"fmt"
"testing"
)
func TestNewFeature(t *testing.T) {
expected := uint64(8811532157352841348)
f := NewFeature([]byte("test string"))
if f.Weight() != 1 {
t.Errorf("feature.Weight(): expected 1, actual %d", f.Weight())
}
if f.Sum() != expected {
t.Errorf("feature.Sum(): expected %d, actual %d", expected, f.Sum())
}
}
func TestNewFeatureWithWeight(t *testing.T) {
weight := 10
expected := uint64(8811532157352841348)
f := NewFeatureWithWeight([]byte("test string"), weight)
if f.Weight() != weight {
t.Errorf("feature.Weight(): expected %d, actual %d", weight, f.Weight())
}
if f.Sum() != expected {
t.Errorf("feature.Sum(): expected %d, actual %d", expected, f.Sum())
}
}
func TestFeatureSet(t *testing.T) {
sh := NewSimhash()
text := []byte("here's a test string.")
fs := sh.NewWordFeatureSet(text)
expected := []Feature{
NewFeature([]byte("here's")),
NewFeature([]byte("a")),
NewFeature([]byte("test")),
NewFeature([]byte("string")),
}
actual := fs.GetFeatures()
for i := 0; i < len(actual); i++ {
if actual[i].Sum() != expected[i].Sum() {
t.Errorf("feature.Sum(): expected %d, actual %d", expected[i].Sum(), actual[i].Sum())
}
if actual[i].Weight() != expected[i].Weight() {
t.Errorf("feature.Weight(): expected %d, actual %d", expected[i].Weight(), actual[i].Weight())
}
}
}
func ExampleNewWordFeatureSet() {
sh := NewSimhash()
text := []byte("a a abc abc test test string.")
fs := sh.NewWordFeatureSet(text)
fmt.Printf("WordFeatureSet: %#v\n", fs)
actual := fs.GetFeatures()
fmt.Printf("Features: %#v\n", actual)
fmt.Printf("Hash: %v\n", sh.Fingerprint(sh.Vectorize(actual)))
// Output:
// WordFeatureSet: &simhash.WordFeatureSet{B:[]uint8{0x61, 0x20, 0x61, 0x20, 0x61, 0x62, 0x63, 0x20, 0x61, 0x62, 0x63, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x2e}}
// Features: []simhash.Feature{simhash.feature{sum:0xaf63bd4c8601b7be, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7be, weight:1}, simhash.feature{sum:0xd8dcca186bafadcb, weight:1}, simhash.feature{sum:0xd8dcca186bafadcb, weight:1}, simhash.feature{sum:0x8c093f7e9fccbf69, weight:1}, simhash.feature{sum:0x8c093f7e9fccbf69, weight:1}, simhash.feature{sum:0x9926dcde0a17d48e, weight:1}}
// Hash: 10108821242876116971
}
func TestGetFeatures(t *testing.T) {
actual := DoGetFeatures([]byte("test string"), boundaries)
expected := []Feature{
NewFeature([]byte("test")),
NewFeature([]byte("string"))}
if len(actual) != len(expected) {
t.Errorf("DoGetFeatures returned wrong number of features")
}
for i := 0; i < len(actual); i++ {
if actual[i].Sum() != expected[i].Sum() {
t.Errorf("feature.Sum(): expected %d, actual %d", expected[i].Sum(), actual[i].Sum())
}
if actual[i].Weight() != expected[i].Weight() {
t.Errorf("feature.Weight(): expected %d, actual %d", expected[i].Weight(), actual[i].Weight())
}
}
}
func ExampleNewWordFeatureSet_cars() {
sh := NewSimhash()
for _, d := range testDoc {
fmt.Printf("%s\n", string(sh.NewWordFeatureSet(d).B))
}
// Output:
// ford f-150. lariat do not buy. truck has been in the shop 50 days so far. it has had a vibration since day one and ford cannot get rid of it. the have done everything possible to the underside of this truck and it is… 11,000km | automatic
// 2016 ford mustang 2016 ford mustang white with black stripes, this car is in showroom shape and it only has 14,000kms. this beast has never been in an accident nor does it have one scratch on the body. i purchased 20… 14,000km | automatic
// 2013 ford fiesta sedan - 22,116 kms body is in perfect condition. no mechanical problems. oil change and maintenance package done in march/17. registered inspection done in april/16. $10,000 firm (sales tax is extra). call … 22,120km | automatic
// 2015 ford explorer sport suv, crossover this vehicle is a real beauty and a pleasure to drive. it is in excellent condition and has been store inside since purchased in 2015. it has not been driven in winter other then to go for service.!… 18,600km | automatic
// 2013 ford fiesta sedan - 22,116 kms body is in perfect condition. no mechanical problems. oil change and maintenance package done in march/17. registered inspection done in april/16. $10,000 firm (sales tax is extra). call … 22,120km | automatic
// 2015 ford explorer sport suv, crossover this vehicle is a real beauty and a pleasure to drive. it is in excellent condition and has been store inside since purchased in 2015. it has not been driven in winter other then to go for service.!… 18,600km | automatic
// ford f-150. lariat do not buy. truck has been in the shop 50 days so far. it has had a vibration since day one and ford cannot get rid of it. the have done everything possible to the underside of this truck and it is… 11,000km | automatic
// 2016 ford mustang 2016 ford mustang white with black stripes, this car is in showroom shape and it only has 14,000kms. this beast has never been in an accident nor does it have one scratch on the body. i purchased 20… 14,000km | automatic
}
func ExampleGetFeatures() {
sh := NewSimhash()
for _, d := range testDoc {
fmt.Printf("%#v\n", sh.NewWordFeatureSet(d).GetFeatures())
}
// Output:
// []simhash.Feature{simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7b9, weight:1}, simhash.feature{sum:0xd98001186c3a6c5d, weight:1}, simhash.feature{sum:0x7a37c1ae2e57fa88, weight:1}, simhash.feature{sum:0x8326407b4eb32ae, weight:1}, simhash.feature{sum:0xd8b0a7186b8a3730, weight:1}, simhash.feature{sum:0xd8d9b1186bad4d2f, weight:1}, simhash.feature{sum:0x2c5b792934c8464e, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0x26feff7ef74c67b7, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xd89cc2186b79bc7e, weight:1}, simhash.feature{sum:0x93104c7ea350e1e1, weight:1}, simhash.feature{sum:0x8329307b4eb82ae, weight:1}, simhash.feature{sum:0x14dfbd7eecce8288, weight:1}, simhash.feature{sum:0x8325507b4eb192b, weight:1}, simhash.feature{sum:0xd8cbcd186ba13ffc, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c18, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7be, weight:1}, simhash.feature{sum:0x214486cdc2d73f89, weight:1}, simhash.feature{sum:0x3d52262f868f65ad, weight:1}, simhash.feature{sum:0xd8d299186ba70599, weight:1}, simhash.feature{sum:0xd8adc6186b88367f, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0x58bc5a1361284f0c, weight:1}, simhash.feature{sum:0xd8c8ad186b9ed323, weight:1}, simhash.feature{sum:0xd8a2cd186b7e3a1e, weight:1}, simhash.feature{sum:0x8325907b4eb2076, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xd89cc2186b79bc7e, weight:1}, simhash.feature{sum:0xf160267ed875749b, weight:1}, simhash.feature{sum:0x150fb27eecf79469, weight:1}, simhash.feature{sum:0x8a8c7bb9849d48f6, weight:1}, simhash.feature{sum:0x34e6e73324cc4c1c, weight:1}, simhash.feature{sum:0x8325407b4eb17fe, weight:1}, simhash.feature{sum:0xd89cc2186b79bc7e, weight:1}, simhash.feature{sum:0xbc78285d51f8f350, weight:1}, simhash.feature{sum:0x8325907b4eb2076, weight:1}, simhash.feature{sum:0x8c1a417e9fdb35c5, weight:1}, simhash.feature{sum:0x2c5b792934c8464e, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x8329707b4eb895b, weight:1}, simhash.feature{sum:0x6e5bc95ed67d4b67, weight:1}, simhash.feature{sum:0x3035a365e168961e, weight:1}}
// []simhash.Feature{simhash.feature{sum:0xc5c6ff7fe1f34c8a, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0x3075dfaf5552d79e, weight:1}, simhash.feature{sum:0xc5c6ff7fe1f34c8a, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0x3075dfaf5552d79e, weight:1}, simhash.feature{sum:0x192cc0ca1d77458, weight:1}, simhash.feature{sum:0x6f5db37e8ecc76fd, weight:1}, simhash.feature{sum:0xdbfd3fbe6190d762, weight:1}, simhash.feature{sum:0x54c9ed4b266da2a5, weight:1}, simhash.feature{sum:0x8c1a417e9fdb35c5, weight:1}, simhash.feature{sum:0xd8d5c1186ba97fdd, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xda392f7af918887b, weight:1}, simhash.feature{sum:0x357ef82f825da4b8, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xb77e117eb8748afb, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0x8329707b4eb895e, weight:1}, simhash.feature{sum:0x342932676e72076, weight:1}, simhash.feature{sum:0x8c1a417e9fdb35c5, weight:1}, simhash.feature{sum:0x8fc1c6be36e055d6, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0x2b94c0591a2848b9, weight:1}, simhash.feature{sum:0x26feff7ef74c67b7, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0x8326707b4eb37b4, weight:1}, simhash.feature{sum:0x91a1dacc76ac782e, weight:1}, simhash.feature{sum:0xd8b0a7186b8a3736, weight:1}, simhash.feature{sum:0x150fbd7eecf7a6ce, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xf160267ed875749b, weight:1}, simhash.feature{sum:0xd8adc6186b88367f, weight:1}, simhash.feature{sum:0xc4e8fa88937cb69, weight:1}, simhash.feature{sum:0x8325907b4eb207e, weight:1}, simhash.feature{sum:0xd89cc2186b79bc7e, weight:1}, simhash.feature{sum:0x27132a7ef75d598d, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7b6, weight:1}, simhash.feature{sum:0x873cad20b5b03ae4, weight:1}, simhash.feature{sum:0x8329607b4eb8787, weight:1}, simhash.feature{sum:0x8329707b4eb895e, weight:1}, simhash.feature{sum:0x6e5bc95ed67d4b67, weight:1}, simhash.feature{sum:0x3035a365e168961e, weight:1}}
// []simhash.Feature{simhash.feature{sum:0xc5c6ff7fe1f34c8f, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0xfd0c9853db565f2f, weight:1}, simhash.feature{sum:0xd7f4302f4de077d2, weight:1}, simhash.feature{sum:0x8329607b4eb8785, weight:1}, simhash.feature{sum:0xd97ffd186c3a6597, weight:1}, simhash.feature{sum:0xd8bac5186b92beb0, weight:1}, simhash.feature{sum:0x27132a7ef75d598d, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0x12d6ee02dfea32b8, weight:1}, simhash.feature{sum:0x6976a39422c2abd8, weight:1}, simhash.feature{sum:0x8325a07b4eb21ac, weight:1}, simhash.feature{sum:0xde2e60d07d4ebdb0, weight:1}, simhash.feature{sum:0xfff236c5f092af95, weight:1}, simhash.feature{sum:0xd8adc1186b882df7, weight:1}, simhash.feature{sum:0x656c734f40ac6679, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0x5d60a51e6eb33462, weight:1}, simhash.feature{sum:0xe98a0708a4b03ab7, weight:1}, simhash.feature{sum:0x150fb27eecf79469, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0x8fde7a602c8faa3a, weight:1}, simhash.feature{sum:0x8329707b4eb895d, weight:1}, simhash.feature{sum:0xb8019c1cc35ecab1, weight:1}, simhash.feature{sum:0x37cbb6f821eaff03, weight:1}, simhash.feature{sum:0x150fb27eecf79469, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xa4fb0fc51c2e551b, weight:1}, simhash.feature{sum:0x8329707b4eb895c, weight:1}, simhash.feature{sum:0x8329707b4eb895a, weight:1}, simhash.feature{sum:0xd98382186c3d7d47, weight:1}, simhash.feature{sum:0x3716c7ee2e72321, weight:1}, simhash.feature{sum:0xf8b0c02f5fe0b257, weight:1}, simhash.feature{sum:0xd89cb9186b79aca8, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x4be610a6aef6c731, weight:1}, simhash.feature{sum:0x1cb6df7ef1041835, weight:1}, simhash.feature{sum:0x8329607b4eb8785, weight:1}, simhash.feature{sum:0xa924f955da8cd334, weight:1}, simhash.feature{sum:0x3035a365e168961e, weight:1}}
// []simhash.Feature{simhash.feature{sum:0xc5c6ff7fe1f34c89, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0x1cbe3da5da62b610, weight:1}, simhash.feature{sum:0x7aa9362fa9816155, weight:1}, simhash.feature{sum:0xd89fad186b7bce35, weight:1}, simhash.feature{sum:0x12b142e963d1682d, weight:1}, simhash.feature{sum:0x8c1a417e9fdb35c5, weight:1}, simhash.feature{sum:0x38b39e054e1c1b67, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7be, weight:1}, simhash.feature{sum:0x9ab4937ea75b5c59, weight:1}, simhash.feature{sum:0x268587373f1f77b5, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7be, weight:1}, simhash.feature{sum:0x41ef33d8e01cb16c, weight:1}, simhash.feature{sum:0x8325407b4eb17fe, weight:1}, simhash.feature{sum:0xec8584acc12fcf27, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0x7306888cb4e8ab75, weight:1}, simhash.feature{sum:0x6976a39422c2abd8, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0x26feff7ef74c67b7, weight:1}, simhash.feature{sum:0x5a079a2f9797da68, weight:1}, simhash.feature{sum:0x9e8e79746e7ee735, weight:1}, simhash.feature{sum:0x3d52262f868f65ad, weight:1}, simhash.feature{sum:0x873cad20b5b03ae4, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xc5c6ff7fe1f34c89, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0xd8b0a7186b8a3730, weight:1}, simhash.feature{sum:0x26feff7ef74c67b7, weight:1}, simhash.feature{sum:0x16af988c443cff2b, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xd88a107ce8dad5a6, weight:1}, simhash.feature{sum:0x5bf18352ec4156d9, weight:1}, simhash.feature{sum:0x8c1a4d7e9fdb4a74, weight:1}, simhash.feature{sum:0x8325407b4eb17fe, weight:1}, simhash.feature{sum:0x8326107b4eb2dc7, weight:1}, simhash.feature{sum:0xd8cbc7186ba1352e, weight:1}, simhash.feature{sum:0xf90ceea98fba79f6, weight:1}, simhash.feature{sum:0x8329707b4eb8952, weight:1}, simhash.feature{sum:0x99b89d2c34968f75, weight:1}, simhash.feature{sum:0x3035a365e168961e, weight:1}}
// []simhash.Feature{simhash.feature{sum:0xc5c6ff7fe1f34c8f, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0xfd0c9853db565f2f, weight:1}, simhash.feature{sum:0xd7f4302f4de077d2, weight:1}, simhash.feature{sum:0x8329607b4eb8785, weight:1}, simhash.feature{sum:0xd97ffd186c3a6597, weight:1}, simhash.feature{sum:0xd8bac5186b92beb0, weight:1}, simhash.feature{sum:0x27132a7ef75d598d, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0x12d6ee02dfea32b8, weight:1}, simhash.feature{sum:0x6976a39422c2abd8, weight:1}, simhash.feature{sum:0x8325a07b4eb21ac, weight:1}, simhash.feature{sum:0xde2e60d07d4ebdb0, weight:1}, simhash.feature{sum:0xfff236c5f092af95, weight:1}, simhash.feature{sum:0xd8adc1186b882df7, weight:1}, simhash.feature{sum:0x656c734f40ac6679, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0x5d60a51e6eb33462, weight:1}, simhash.feature{sum:0xe98a0708a4b03ab7, weight:1}, simhash.feature{sum:0x150fb27eecf79469, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0x8fde7a602c8faa3a, weight:1}, simhash.feature{sum:0x8329707b4eb895d, weight:1}, simhash.feature{sum:0xb8019c1cc35ecab1, weight:1}, simhash.feature{sum:0x37cbb6f821eaff03, weight:1}, simhash.feature{sum:0x150fb27eecf79469, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xa4fb0fc51c2e551b, weight:1}, simhash.feature{sum:0x8329707b4eb895c, weight:1}, simhash.feature{sum:0x8329707b4eb895a, weight:1}, simhash.feature{sum:0xd98382186c3d7d47, weight:1}, simhash.feature{sum:0x3716c7ee2e72321, weight:1}, simhash.feature{sum:0xf8b0c02f5fe0b257, weight:1}, simhash.feature{sum:0xd89cb9186b79aca8, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x4be610a6aef6c731, weight:1}, simhash.feature{sum:0x1cb6df7ef1041835, weight:1}, simhash.feature{sum:0x8329607b4eb8785, weight:1}, simhash.feature{sum:0xa924f955da8cd334, weight:1}, simhash.feature{sum:0x3035a365e168961e, weight:1}}
// []simhash.Feature{simhash.feature{sum:0xc5c6ff7fe1f34c89, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0x1cbe3da5da62b610, weight:1}, simhash.feature{sum:0x7aa9362fa9816155, weight:1}, simhash.feature{sum:0xd89fad186b7bce35, weight:1}, simhash.feature{sum:0x12b142e963d1682d, weight:1}, simhash.feature{sum:0x8c1a417e9fdb35c5, weight:1}, simhash.feature{sum:0x38b39e054e1c1b67, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7be, weight:1}, simhash.feature{sum:0x9ab4937ea75b5c59, weight:1}, simhash.feature{sum:0x268587373f1f77b5, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7be, weight:1}, simhash.feature{sum:0x41ef33d8e01cb16c, weight:1}, simhash.feature{sum:0x8325407b4eb17fe, weight:1}, simhash.feature{sum:0xec8584acc12fcf27, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0x7306888cb4e8ab75, weight:1}, simhash.feature{sum:0x6976a39422c2abd8, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0x26feff7ef74c67b7, weight:1}, simhash.feature{sum:0x5a079a2f9797da68, weight:1}, simhash.feature{sum:0x9e8e79746e7ee735, weight:1}, simhash.feature{sum:0x3d52262f868f65ad, weight:1}, simhash.feature{sum:0x873cad20b5b03ae4, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xc5c6ff7fe1f34c89, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0xd8b0a7186b8a3730, weight:1}, simhash.feature{sum:0x26feff7ef74c67b7, weight:1}, simhash.feature{sum:0x16af988c443cff2b, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xd88a107ce8dad5a6, weight:1}, simhash.feature{sum:0x5bf18352ec4156d9, weight:1}, simhash.feature{sum:0x8c1a4d7e9fdb4a74, weight:1}, simhash.feature{sum:0x8325407b4eb17fe, weight:1}, simhash.feature{sum:0x8326107b4eb2dc7, weight:1}, simhash.feature{sum:0xd8cbc7186ba1352e, weight:1}, simhash.feature{sum:0xf90ceea98fba79f6, weight:1}, simhash.feature{sum:0x8329707b4eb8952, weight:1}, simhash.feature{sum:0x99b89d2c34968f75, weight:1}, simhash.feature{sum:0x3035a365e168961e, weight:1}}
// []simhash.Feature{simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7b9, weight:1}, simhash.feature{sum:0xd98001186c3a6c5d, weight:1}, simhash.feature{sum:0x7a37c1ae2e57fa88, weight:1}, simhash.feature{sum:0x8326407b4eb32ae, weight:1}, simhash.feature{sum:0xd8b0a7186b8a3730, weight:1}, simhash.feature{sum:0xd8d9b1186bad4d2f, weight:1}, simhash.feature{sum:0x2c5b792934c8464e, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0x26feff7ef74c67b7, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xd89cc2186b79bc7e, weight:1}, simhash.feature{sum:0x93104c7ea350e1e1, weight:1}, simhash.feature{sum:0x8329307b4eb82ae, weight:1}, simhash.feature{sum:0x14dfbd7eecce8288, weight:1}, simhash.feature{sum:0x8325507b4eb192b, weight:1}, simhash.feature{sum:0xd8cbcd186ba13ffc, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c18, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7be, weight:1}, simhash.feature{sum:0x214486cdc2d73f89, weight:1}, simhash.feature{sum:0x3d52262f868f65ad, weight:1}, simhash.feature{sum:0xd8d299186ba70599, weight:1}, simhash.feature{sum:0xd8adc6186b88367f, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0x58bc5a1361284f0c, weight:1}, simhash.feature{sum:0xd8c8ad186b9ed323, weight:1}, simhash.feature{sum:0xd8a2cd186b7e3a1e, weight:1}, simhash.feature{sum:0x8325907b4eb2076, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xd89cc2186b79bc7e, weight:1}, simhash.feature{sum:0xf160267ed875749b, weight:1}, simhash.feature{sum:0x150fb27eecf79469, weight:1}, simhash.feature{sum:0x8a8c7bb9849d48f6, weight:1}, simhash.feature{sum:0x34e6e73324cc4c1c, weight:1}, simhash.feature{sum:0x8325407b4eb17fe, weight:1}, simhash.feature{sum:0xd89cc2186b79bc7e, weight:1}, simhash.feature{sum:0xbc78285d51f8f350, weight:1}, simhash.feature{sum:0x8325907b4eb2076, weight:1}, simhash.feature{sum:0x8c1a417e9fdb35c5, weight:1}, simhash.feature{sum:0x2c5b792934c8464e, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x8329707b4eb895b, weight:1}, simhash.feature{sum:0x6e5bc95ed67d4b67, weight:1}, simhash.feature{sum:0x3035a365e168961e, weight:1}}
// []simhash.Feature{simhash.feature{sum:0xc5c6ff7fe1f34c8a, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0x3075dfaf5552d79e, weight:1}, simhash.feature{sum:0xc5c6ff7fe1f34c8a, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0x3075dfaf5552d79e, weight:1}, simhash.feature{sum:0x192cc0ca1d77458, weight:1}, simhash.feature{sum:0x6f5db37e8ecc76fd, weight:1}, simhash.feature{sum:0xdbfd3fbe6190d762, weight:1}, simhash.feature{sum:0x54c9ed4b266da2a5, weight:1}, simhash.feature{sum:0x8c1a417e9fdb35c5, weight:1}, simhash.feature{sum:0xd8d5c1186ba97fdd, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xda392f7af918887b, weight:1}, simhash.feature{sum:0x357ef82f825da4b8, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xb77e117eb8748afb, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0x8329707b4eb895e, weight:1}, simhash.feature{sum:0x342932676e72076, weight:1}, simhash.feature{sum:0x8c1a417e9fdb35c5, weight:1}, simhash.feature{sum:0x8fc1c6be36e055d6, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0x2b94c0591a2848b9, weight:1}, simhash.feature{sum:0x26feff7ef74c67b7, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0x8326707b4eb37b4, weight:1}, simhash.feature{sum:0x91a1dacc76ac782e, weight:1}, simhash.feature{sum:0xd8b0a7186b8a3736, weight:1}, simhash.feature{sum:0x150fbd7eecf7a6ce, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xf160267ed875749b, weight:1}, simhash.feature{sum:0xd8adc6186b88367f, weight:1}, simhash.feature{sum:0xc4e8fa88937cb69, weight:1}, simhash.feature{sum:0x8325907b4eb207e, weight:1}, simhash.feature{sum:0xd89cc2186b79bc7e, weight:1}, simhash.feature{sum:0x27132a7ef75d598d, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7b6, weight:1}, simhash.feature{sum:0x873cad20b5b03ae4, weight:1}, simhash.feature{sum:0x8329607b4eb8787, weight:1}, simhash.feature{sum:0x8329707b4eb895e, weight:1}, simhash.feature{sum:0x6e5bc95ed67d4b67, weight:1}, simhash.feature{sum:0x3035a365e168961e, weight:1}}
}
func ExampleBuildFeatures() {
for _, d := range testDoc {
fmt.Printf("%#v\n", BuildFeatures(string(d), Doc2words))
}
// Output:
// []simhash.Feature{simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7b9, weight:1}, simhash.feature{sum:0xd98001186c3a6c5d, weight:1}, simhash.feature{sum:0x7a37c1ae2e57fa88, weight:1}, simhash.feature{sum:0x8326407b4eb32ae, weight:1}, simhash.feature{sum:0xd8b0a7186b8a3730, weight:1}, simhash.feature{sum:0xd8d9b1186bad4d2f, weight:1}, simhash.feature{sum:0x2c5b792934c8464e, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0x26feff7ef74c67b7, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xd89cc2186b79bc7e, weight:1}, simhash.feature{sum:0x93104c7ea350e1e1, weight:1}, simhash.feature{sum:0x8329307b4eb82ae, weight:1}, simhash.feature{sum:0x14dfbd7eecce8288, weight:1}, simhash.feature{sum:0x8325507b4eb192b, weight:1}, simhash.feature{sum:0xd8cbcd186ba13ffc, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c18, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7be, weight:1}, simhash.feature{sum:0x214486cdc2d73f89, weight:1}, simhash.feature{sum:0x3d52262f868f65ad, weight:1}, simhash.feature{sum:0xd8d299186ba70599, weight:1}, simhash.feature{sum:0xd8adc6186b88367f, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0x58bc5a1361284f0c, weight:1}, simhash.feature{sum:0xd8c8ad186b9ed323, weight:1}, simhash.feature{sum:0xd8a2cd186b7e3a1e, weight:1}, simhash.feature{sum:0x8325907b4eb2076, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xd89cc2186b79bc7e, weight:1}, simhash.feature{sum:0xf160267ed875749b, weight:1}, simhash.feature{sum:0x150fb27eecf79469, weight:1}, simhash.feature{sum:0x8a8c7bb9849d48f6, weight:1}, simhash.feature{sum:0x34e6e73324cc4c1c, weight:1}, simhash.feature{sum:0x8325407b4eb17fe, weight:1}, simhash.feature{sum:0xd89cc2186b79bc7e, weight:1}, simhash.feature{sum:0xbc78285d51f8f350, weight:1}, simhash.feature{sum:0x8325907b4eb2076, weight:1}, simhash.feature{sum:0x8c1a417e9fdb35c5, weight:1}, simhash.feature{sum:0x2c5b792934c8464e, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x1c7c2e0d9eb9677d, weight:1}, simhash.feature{sum:0x3035a365e168961e, weight:1}}
// []simhash.Feature{simhash.feature{sum:0xc5c6ff7fe1f34c8a, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0x3075dfaf5552d79e, weight:1}, simhash.feature{sum:0xc5c6ff7fe1f34c8a, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0x3075dfaf5552d79e, weight:1}, simhash.feature{sum:0x192cc0ca1d77458, weight:1}, simhash.feature{sum:0x6f5db37e8ecc76fd, weight:1}, simhash.feature{sum:0xdbfd3fbe6190d762, weight:1}, simhash.feature{sum:0x54c9ed4b266da2a5, weight:1}, simhash.feature{sum:0x8c1a417e9fdb35c5, weight:1}, simhash.feature{sum:0xd8d5c1186ba97fdd, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xda392f7af918887b, weight:1}, simhash.feature{sum:0x357ef82f825da4b8, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xb77e117eb8748afb, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0x22b4b6630fb27c45, weight:1}, simhash.feature{sum:0x8c1a417e9fdb35c5, weight:1}, simhash.feature{sum:0x8fc1c6be36e055d6, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0x2b94c0591a2848b9, weight:1}, simhash.feature{sum:0x26feff7ef74c67b7, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0x8326707b4eb37b4, weight:1}, simhash.feature{sum:0x91a1dacc76ac782e, weight:1}, simhash.feature{sum:0xd8b0a7186b8a3736, weight:1}, simhash.feature{sum:0x150fbd7eecf7a6ce, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xf160267ed875749b, weight:1}, simhash.feature{sum:0xd8adc6186b88367f, weight:1}, simhash.feature{sum:0xc4e8fa88937cb69, weight:1}, simhash.feature{sum:0x8325907b4eb207e, weight:1}, simhash.feature{sum:0xd89cc2186b79bc7e, weight:1}, simhash.feature{sum:0x27132a7ef75d598d, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7b6, weight:1}, simhash.feature{sum:0x873cad20b5b03ae4, weight:1}, simhash.feature{sum:0x8329607b4eb8787, weight:1}, simhash.feature{sum:0x2e047881b2f11bf2, weight:1}, simhash.feature{sum:0x3035a365e168961e, weight:1}}
// []simhash.Feature{simhash.feature{sum:0xc5c6ff7fe1f34c8f, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0xfd0c9853db565f2f, weight:1}, simhash.feature{sum:0xd7f4302f4de077d2, weight:1}, simhash.feature{sum:0xf2b15d4ce63f5477, weight:1}, simhash.feature{sum:0xd8bac5186b92beb0, weight:1}, simhash.feature{sum:0x27132a7ef75d598d, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0x12d6ee02dfea32b8, weight:1}, simhash.feature{sum:0x6976a39422c2abd8, weight:1}, simhash.feature{sum:0x8325a07b4eb21ac, weight:1}, simhash.feature{sum:0xde2e60d07d4ebdb0, weight:1}, simhash.feature{sum:0xfff236c5f092af95, weight:1}, simhash.feature{sum:0xd8adc1186b882df7, weight:1}, simhash.feature{sum:0x656c734f40ac6679, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0x5d60a51e6eb33462, weight:1}, simhash.feature{sum:0xe98a0708a4b03ab7, weight:1}, simhash.feature{sum:0x150fb27eecf79469, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0x8fde7a602c8faa3a, weight:1}, simhash.feature{sum:0x8329707b4eb895d, weight:1}, simhash.feature{sum:0xb8019c1cc35ecab1, weight:1}, simhash.feature{sum:0x37cbb6f821eaff03, weight:1}, simhash.feature{sum:0x150fb27eecf79469, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xa4fb0fc51c2e551b, weight:1}, simhash.feature{sum:0x8329707b4eb895c, weight:1}, simhash.feature{sum:0x246c8b28007c1970, weight:1}, simhash.feature{sum:0x3716c7ee2e72321, weight:1}, simhash.feature{sum:0xf8b0c02f5fe0b257, weight:1}, simhash.feature{sum:0xd89cb9186b79aca8, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x4be610a6aef6c731, weight:1}, simhash.feature{sum:0x1cb6df7ef1041835, weight:1}, simhash.feature{sum:0xfcdeddf9a175b394, weight:1}, simhash.feature{sum:0x3035a365e168961e, weight:1}}
// []simhash.Feature{simhash.feature{sum:0xc5c6ff7fe1f34c89, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0x1cbe3da5da62b610, weight:1}, simhash.feature{sum:0x7aa9362fa9816155, weight:1}, simhash.feature{sum:0xd89fad186b7bce35, weight:1}, simhash.feature{sum:0x12b142e963d1682d, weight:1}, simhash.feature{sum:0x8c1a417e9fdb35c5, weight:1}, simhash.feature{sum:0x38b39e054e1c1b67, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7be, weight:1}, simhash.feature{sum:0x9ab4937ea75b5c59, weight:1}, simhash.feature{sum:0x268587373f1f77b5, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7be, weight:1}, simhash.feature{sum:0x41ef33d8e01cb16c, weight:1}, simhash.feature{sum:0x8325407b4eb17fe, weight:1}, simhash.feature{sum:0xec8584acc12fcf27, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0x7306888cb4e8ab75, weight:1}, simhash.feature{sum:0x6976a39422c2abd8, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0x26feff7ef74c67b7, weight:1}, simhash.feature{sum:0x5a079a2f9797da68, weight:1}, simhash.feature{sum:0x9e8e79746e7ee735, weight:1}, simhash.feature{sum:0x3d52262f868f65ad, weight:1}, simhash.feature{sum:0x873cad20b5b03ae4, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xc5c6ff7fe1f34c89, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0xd8b0a7186b8a3730, weight:1}, simhash.feature{sum:0x26feff7ef74c67b7, weight:1}, simhash.feature{sum:0x16af988c443cff2b, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xd88a107ce8dad5a6, weight:1}, simhash.feature{sum:0x5bf18352ec4156d9, weight:1}, simhash.feature{sum:0x8c1a4d7e9fdb4a74, weight:1}, simhash.feature{sum:0x8325407b4eb17fe, weight:1}, simhash.feature{sum:0x8326107b4eb2dc7, weight:1}, simhash.feature{sum:0xd8cbc7186ba1352e, weight:1}, simhash.feature{sum:0xf90ceea98fba79f6, weight:1}, simhash.feature{sum:0xe8860067f74f9fbc, weight:1}, simhash.feature{sum:0x3035a365e168961e, weight:1}}
// []simhash.Feature{simhash.feature{sum:0xc5c6ff7fe1f34c8f, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0xfd0c9853db565f2f, weight:1}, simhash.feature{sum:0xd7f4302f4de077d2, weight:1}, simhash.feature{sum:0xf2b15d4ce63f5477, weight:1}, simhash.feature{sum:0xd8bac5186b92beb0, weight:1}, simhash.feature{sum:0x27132a7ef75d598d, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0x12d6ee02dfea32b8, weight:1}, simhash.feature{sum:0x6976a39422c2abd8, weight:1}, simhash.feature{sum:0x8325a07b4eb21ac, weight:1}, simhash.feature{sum:0xde2e60d07d4ebdb0, weight:1}, simhash.feature{sum:0xfff236c5f092af95, weight:1}, simhash.feature{sum:0xd8adc1186b882df7, weight:1}, simhash.feature{sum:0x656c734f40ac6679, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0x5d60a51e6eb33462, weight:1}, simhash.feature{sum:0xe98a0708a4b03ab7, weight:1}, simhash.feature{sum:0x150fb27eecf79469, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0x8fde7a602c8faa3a, weight:1}, simhash.feature{sum:0x8329707b4eb895d, weight:1}, simhash.feature{sum:0xb8019c1cc35ecab1, weight:1}, simhash.feature{sum:0x37cbb6f821eaff03, weight:1}, simhash.feature{sum:0x150fb27eecf79469, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xa4fb0fc51c2e551b, weight:1}, simhash.feature{sum:0x8329707b4eb895c, weight:1}, simhash.feature{sum:0x246c8b28007c1970, weight:1}, simhash.feature{sum:0x3716c7ee2e72321, weight:1}, simhash.feature{sum:0xf8b0c02f5fe0b257, weight:1}, simhash.feature{sum:0xd89cb9186b79aca8, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x4be610a6aef6c731, weight:1}, simhash.feature{sum:0x1cb6df7ef1041835, weight:1}, simhash.feature{sum:0xfcdeddf9a175b394, weight:1}, simhash.feature{sum:0x3035a365e168961e, weight:1}}
// []simhash.Feature{simhash.feature{sum:0xc5c6ff7fe1f34c89, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0x1cbe3da5da62b610, weight:1}, simhash.feature{sum:0x7aa9362fa9816155, weight:1}, simhash.feature{sum:0xd89fad186b7bce35, weight:1}, simhash.feature{sum:0x12b142e963d1682d, weight:1}, simhash.feature{sum:0x8c1a417e9fdb35c5, weight:1}, simhash.feature{sum:0x38b39e054e1c1b67, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7be, weight:1}, simhash.feature{sum:0x9ab4937ea75b5c59, weight:1}, simhash.feature{sum:0x268587373f1f77b5, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7be, weight:1}, simhash.feature{sum:0x41ef33d8e01cb16c, weight:1}, simhash.feature{sum:0x8325407b4eb17fe, weight:1}, simhash.feature{sum:0xec8584acc12fcf27, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0x7306888cb4e8ab75, weight:1}, simhash.feature{sum:0x6976a39422c2abd8, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0x26feff7ef74c67b7, weight:1}, simhash.feature{sum:0x5a079a2f9797da68, weight:1}, simhash.feature{sum:0x9e8e79746e7ee735, weight:1}, simhash.feature{sum:0x3d52262f868f65ad, weight:1}, simhash.feature{sum:0x873cad20b5b03ae4, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xc5c6ff7fe1f34c89, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0xd8b0a7186b8a3730, weight:1}, simhash.feature{sum:0x26feff7ef74c67b7, weight:1}, simhash.feature{sum:0x16af988c443cff2b, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xd88a107ce8dad5a6, weight:1}, simhash.feature{sum:0x5bf18352ec4156d9, weight:1}, simhash.feature{sum:0x8c1a4d7e9fdb4a74, weight:1}, simhash.feature{sum:0x8325407b4eb17fe, weight:1}, simhash.feature{sum:0x8326107b4eb2dc7, weight:1}, simhash.feature{sum:0xd8cbc7186ba1352e, weight:1}, simhash.feature{sum:0xf90ceea98fba79f6, weight:1}, simhash.feature{sum:0xe8860067f74f9fbc, weight:1}, simhash.feature{sum:0x3035a365e168961e, weight:1}}
// []simhash.Feature{simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7b9, weight:1}, simhash.feature{sum:0xd98001186c3a6c5d, weight:1}, simhash.feature{sum:0x7a37c1ae2e57fa88, weight:1}, simhash.feature{sum:0x8326407b4eb32ae, weight:1}, simhash.feature{sum:0xd8b0a7186b8a3730, weight:1}, simhash.feature{sum:0xd8d9b1186bad4d2f, weight:1}, simhash.feature{sum:0x2c5b792934c8464e, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0x26feff7ef74c67b7, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xd89cc2186b79bc7e, weight:1}, simhash.feature{sum:0x93104c7ea350e1e1, weight:1}, simhash.feature{sum:0x8329307b4eb82ae, weight:1}, simhash.feature{sum:0x14dfbd7eecce8288, weight:1}, simhash.feature{sum:0x8325507b4eb192b, weight:1}, simhash.feature{sum:0xd8cbcd186ba13ffc, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c18, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7be, weight:1}, simhash.feature{sum:0x214486cdc2d73f89, weight:1}, simhash.feature{sum:0x3d52262f868f65ad, weight:1}, simhash.feature{sum:0xd8d299186ba70599, weight:1}, simhash.feature{sum:0xd8adc6186b88367f, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0x58bc5a1361284f0c, weight:1}, simhash.feature{sum:0xd8c8ad186b9ed323, weight:1}, simhash.feature{sum:0xd8a2cd186b7e3a1e, weight:1}, simhash.feature{sum:0x8325907b4eb2076, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xd89cc2186b79bc7e, weight:1}, simhash.feature{sum:0xf160267ed875749b, weight:1}, simhash.feature{sum:0x150fb27eecf79469, weight:1}, simhash.feature{sum:0x8a8c7bb9849d48f6, weight:1}, simhash.feature{sum:0x34e6e73324cc4c1c, weight:1}, simhash.feature{sum:0x8325407b4eb17fe, weight:1}, simhash.feature{sum:0xd89cc2186b79bc7e, weight:1}, simhash.feature{sum:0xbc78285d51f8f350, weight:1}, simhash.feature{sum:0x8325907b4eb2076, weight:1}, simhash.feature{sum:0x8c1a417e9fdb35c5, weight:1}, simhash.feature{sum:0x2c5b792934c8464e, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x1c7c2e0d9eb9677d, weight:1}, simhash.feature{sum:0x3035a365e168961e, weight:1}}
// []simhash.Feature{simhash.feature{sum:0xc5c6ff7fe1f34c8a, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0x3075dfaf5552d79e, weight:1}, simhash.feature{sum:0xc5c6ff7fe1f34c8a, weight:1}, simhash.feature{sum:0x3787c7ee2ed5d4e, weight:1}, simhash.feature{sum:0x3075dfaf5552d79e, weight:1}, simhash.feature{sum:0x192cc0ca1d77458, weight:1}, simhash.feature{sum:0x6f5db37e8ecc76fd, weight:1}, simhash.feature{sum:0xdbfd3fbe6190d762, weight:1}, simhash.feature{sum:0x54c9ed4b266da2a5, weight:1}, simhash.feature{sum:0x8c1a417e9fdb35c5, weight:1}, simhash.feature{sum:0xd8d5c1186ba97fdd, weight:1}, simhash.feature{sum:0x8325f07b4eb2a31, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0xda392f7af918887b, weight:1}, simhash.feature{sum:0x357ef82f825da4b8, weight:1}, simhash.feature{sum:0xd8dcc6186bafa6b8, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xb77e117eb8748afb, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0x22b4b6630fb27c45, weight:1}, simhash.feature{sum:0x8c1a417e9fdb35c5, weight:1}, simhash.feature{sum:0x8fc1c6be36e055d6, weight:1}, simhash.feature{sum:0xd8c4c1186b9b0c0f, weight:1}, simhash.feature{sum:0x2b94c0591a2848b9, weight:1}, simhash.feature{sum:0x26feff7ef74c67b7, weight:1}, simhash.feature{sum:0x8325f07b4eb2a2c, weight:1}, simhash.feature{sum:0x8326707b4eb37b4, weight:1}, simhash.feature{sum:0x91a1dacc76ac782e, weight:1}, simhash.feature{sum:0xd8b0a7186b8a3736, weight:1}, simhash.feature{sum:0x150fbd7eecf7a6ce, weight:1}, simhash.feature{sum:0x8325f07b4eb2a36, weight:1}, simhash.feature{sum:0xf160267ed875749b, weight:1}, simhash.feature{sum:0xd8adc6186b88367f, weight:1}, simhash.feature{sum:0xc4e8fa88937cb69, weight:1}, simhash.feature{sum:0x8325907b4eb207e, weight:1}, simhash.feature{sum:0xd89cc2186b79bc7e, weight:1}, simhash.feature{sum:0x27132a7ef75d598d, weight:1}, simhash.feature{sum:0xaf63bd4c8601b7b6, weight:1}, simhash.feature{sum:0x873cad20b5b03ae4, weight:1}, simhash.feature{sum:0x8329607b4eb8787, weight:1}, simhash.feature{sum:0x2e047881b2f11bf2, weight:1}, simhash.feature{sum:0x3035a365e168961e, weight:1}}
}
var testDoc = [][]byte{
[]byte("Ford F-150. Lariat DO NOT BUY. Truck has been in the shop 50 days so far. It has had a vibration since day one and Ford cannot get rid of it. The have done everything possible to the underside of this truck and it is… 11,000km | Automatic"),
[]byte("2016 Ford Mustang 2016 Ford Mustang white with black stripes, this car is in showroom shape and it only has 14,000kms. this beast has never been in an accident nor does it have one scratch on the body. i purchased 20… 14,000km | Automatic"),
[]byte("2013 Ford Fiesta Sedan - 22,116 kms Body is in perfect condition. No mechanical problems. Oil change and maintenance package done in March/17. Registered inspection done in April/16. $10,000 firm (sales tax is extra). Call … 22,120km | Automatic"),
[]byte("2015 Ford Explorer Sport SUV, Crossover This vehicle is a real beauty and a pleasure to drive. It is in excellent condition and has been store inside since purchased in 2015. It has not been driven in winter other then to go for service.!… 18,600km | Automatic"),
[]byte("2013 Ford Fiesta Sedan - 22,116 kms Body is in perfect condition. No mechanical problems. Oil change and maintenance package done in March/17. Registered inspection done in April/16. $10,000 firm (sales tax is extra). Call … 22,120km | Automatic"),
[]byte("2015 Ford Explorer Sport SUV, Crossover This vehicle is a real beauty and a pleasure to drive. It is in excellent condition and has been store inside since purchased in 2015. It has not been driven in winter other then to go for service.!… 18,600km | Automatic"),
[]byte("Ford F-150. Lariat DO NOT BUY. Truck has been in the shop 50 days so far. It has had a vibration since day one and Ford cannot get rid of it. The have done everything possible to the underside of this truck and it is… 11,000km | Automatic"),
[]byte("2016 Ford Mustang 2016 Ford Mustang white with black stripes, this car is in showroom shape and it only has 14,000kms. this beast has never been in an accident nor does it have one scratch on the body. i purchased 20… 14,000km | Automatic"),
}