|
4 | 4 | [codes.clj.docs.extractor.core :as core] |
5 | 5 | [codes.clj.docs.extractor.datalevin :as datalevin] |
6 | 6 | [datalevin.core :as d] |
| 7 | + [datalevin.interpret :refer [inter-fn]] |
7 | 8 | [datalevin.search-utils :as su] |
8 | 9 | [datalevin.util :as util]) |
9 | 10 | (:import [java.io File])) |
|
58 | 59 | db (d/db conn) |
59 | 60 |
|
60 | 61 | datoms (->> (d/fulltext-datoms db |
61 | | - "ass" |
| 62 | + "." |
62 | 63 | {:top 30 |
63 | 64 | :domains ["definition-name" |
64 | 65 | "namespace-name" |
|
143 | 144 |
|
144 | 145 | ; tests with fulltext and analyzer |
145 | 146 | (let [query-analyzer (su/create-analyzer |
146 | | - {:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+") |
| 147 | + {:tokenizer (datalevin/merge-tokenizers |
| 148 | + (inter-fn [s] [[s 0 0]]) |
| 149 | + (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")) |
147 | 150 | :token-filters [su/lower-case-token-filter]}) |
148 | 151 |
|
149 | 152 | analyzer (su/create-analyzer |
150 | | - {:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+") |
| 153 | + {:tokenizer (datalevin/merge-tokenizers |
| 154 | + (inter-fn [s] [[s 0 0]]) |
| 155 | + (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")) |
151 | 156 | :token-filters [su/lower-case-token-filter |
152 | 157 | su/prefix-token-filter]}) |
153 | 158 |
|
154 | | - dir "/tmp/mydb" |
| 159 | + dir (str "/tmp/mydb-" (random-uuid)) |
155 | 160 | conn (d/create-conn dir |
156 | 161 | {:text {:db/valueType :db.type/string |
157 | 162 | :db/fulltext true |
|
169 | 174 | {:text "associative?"} |
170 | 175 | {:text "b"} |
171 | 176 | {:text "ba"} |
172 | | - {:text "bas"}] |
| 177 | + {:text "bas"} |
| 178 | + {:text "*"} |
| 179 | + {:text "/"} |
| 180 | + {:text "->"} |
| 181 | + {:text "->>"} |
| 182 | + {:text "as->"} |
| 183 | + {:text "."} |
| 184 | + {:text "as->banana"}] |
173 | 185 |
|
174 | 186 | _transact (d/transact! conn data) |
175 | 187 |
|
176 | | - result (->> (d/q '[:find ?i |
| 188 | + result (->> (d/q '[:find ?e ?v |
177 | 189 | :in $ ?q |
178 | 190 | :where |
179 | | - [(fulltext $ ?q {:top 20}) [[?e]]] |
180 | | - [?e :text ?i]] |
| 191 | + [(fulltext $ ?q {:top 20}) [[?e ?a ?v]]]] |
181 | 192 | (d/db conn) |
182 | | - "assoc-me") |
| 193 | + "as") |
183 | 194 | doall)] |
184 | 195 |
|
185 | 196 | (d/close conn) |
186 | 197 | (util/delete-files dir) |
187 | 198 |
|
188 | 199 | result) |
189 | 200 |
|
190 | | -; tests with fulltext and analyzer on a raw query |
| 201 | + ; tests with fulltext and analyzer on a raw query |
191 | 202 | (let [query-analyzer (su/create-analyzer |
192 | | - {:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+") |
| 203 | + {:tokenizer (datalevin/merge-tokenizers |
| 204 | + (inter-fn [s] [[s 0 0]]) |
| 205 | + (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")) |
193 | 206 | :token-filters [su/lower-case-token-filter]}) |
194 | 207 |
|
195 | 208 | analyzer (su/create-analyzer |
196 | | - {:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+") |
| 209 | + {:tokenizer (datalevin/merge-tokenizers |
| 210 | + (inter-fn [s] [[s 0 0]]) |
| 211 | + (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")) |
197 | 212 | :token-filters [su/lower-case-token-filter |
198 | 213 | su/prefix-token-filter]}) |
199 | 214 |
|
200 | | - lmdb (d/open-kv "/tmp/mydb") |
| 215 | + dir (str "/tmp/lmdb-" (random-uuid)) |
| 216 | + lmdb (d/open-kv dir) |
201 | 217 |
|
202 | 218 | engine (d/new-search-engine lmdb {:query-analyzer query-analyzer |
203 | 219 | :analyzer analyzer |
|
213 | 229 | 7 "associative?" |
214 | 230 | 8 "b" |
215 | 231 | 9 "ba" |
216 | | - 10 "bas"} |
| 232 | + 10 "bas" |
| 233 | + 11 "->" |
| 234 | + 12 "->>" |
| 235 | + 13 "as->" |
| 236 | + 14 "as->banana" |
| 237 | + 15 "/" |
| 238 | + 16 "*" |
| 239 | + 17 "."} |
217 | 240 |
|
218 | 241 | _transact (doseq [[k v] input] |
219 | 242 | (d/add-doc engine k v)) |
220 | 243 |
|
221 | | - result (doall (d/search engine "assoc-m" {:top 20 :display :texts}))] |
| 244 | + result (doall (d/search engine "->" {:top 20 :display :texts}))] |
222 | 245 |
|
223 | 246 | (d/close-kv lmdb) |
| 247 | + (util/delete-files dir) |
224 | 248 |
|
225 | 249 | result)) |
0 commit comments