Skip to content

Commit 49a8f03

Browse files
committed
fix: consider full symbol as index
1 parent d9c61d6 commit 49a8f03

File tree

4 files changed

+52
-18
lines changed

4 files changed

+52
-18
lines changed

.clj-kondo/config.edn

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{:lint-as {datalevin.interpret/inter-fn clojure.core/fn}}

.lsp/config.edn

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@
55
flow [[:block 1]]
66
flow-with-defaults [[:block 1]]
77
flow-as-of [[:block 1]]
8-
flow-without-validation [[:block 1]]}}}
8+
flow-without-validation [[:block 1]]
9+
inter-fn [[:inner 0] [:inner 1]]}}}

dev/playground.clj

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
db (d/db conn)
5959

6060
datoms (->> (d/fulltext-datoms db
61-
"ass"
61+
"->"
6262
{:top 30
6363
:domains ["definition-name"
6464
"namespace-name"
@@ -143,15 +143,19 @@
143143

144144
; tests with fulltext and analyzer
145145
(let [query-analyzer (su/create-analyzer
146-
{:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")
146+
{:tokenizer (datalevin/merge-tokenizers
147+
(su/create-regexp-tokenizer #"[.]+")
148+
(su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+"))
147149
:token-filters [su/lower-case-token-filter]})
148150

149151
analyzer (su/create-analyzer
150-
{:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")
152+
{:tokenizer (datalevin/merge-tokenizers
153+
(su/create-regexp-tokenizer #"[.]+")
154+
(su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+"))
151155
:token-filters [su/lower-case-token-filter
152156
su/prefix-token-filter]})
153157

154-
dir "/tmp/mydb"
158+
dir (str "/tmp/mydb-" (random-uuid))
155159
conn (d/create-conn dir
156160
{:text {:db/valueType :db.type/string
157161
:db/fulltext true
@@ -169,35 +173,45 @@
169173
{:text "associative?"}
170174
{:text "b"}
171175
{:text "ba"}
172-
{:text "bas"}]
176+
{:text "bas"}
177+
{:text "*"}
178+
{:text "/"}
179+
{:text "->"}
180+
{:text "->>"}
181+
{:text "as->"}
182+
{:text "as->banana"}]
173183

174184
_transact (d/transact! conn data)
175185

176-
result (->> (d/q '[:find ?i
186+
result (->> (d/q '[:find ?e ?v
177187
:in $ ?q
178188
:where
179-
[(fulltext $ ?q {:top 20}) [[?e]]]
180-
[?e :text ?i]]
189+
[(fulltext $ ?q {:top 20}) [[?e ?a ?v]]]]
181190
(d/db conn)
182-
"assoc-me")
191+
"as->")
183192
doall)]
184193

185194
(d/close conn)
186195
(util/delete-files dir)
187196

188197
result)
189198

190-
; tests with fulltext and analyzer on a raw query
199+
; tests with fulltext and analyzer on a raw query
191200
(let [query-analyzer (su/create-analyzer
192-
{:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")
201+
{:tokenizer (datalevin/merge-tokenizers
202+
(su/create-regexp-tokenizer #"[.*]+")
203+
(su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+"))
193204
:token-filters [su/lower-case-token-filter]})
194205

195206
analyzer (su/create-analyzer
196-
{:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")
207+
{:tokenizer (datalevin/merge-tokenizers
208+
(su/create-regexp-tokenizer #"[.*]+")
209+
(su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+"))
197210
:token-filters [su/lower-case-token-filter
198211
su/prefix-token-filter]})
199212

200-
lmdb (d/open-kv "/tmp/mydb")
213+
dir (str "/tmp/lmdb-" (random-uuid))
214+
lmdb (d/open-kv dir)
201215

202216
engine (d/new-search-engine lmdb {:query-analyzer query-analyzer
203217
:analyzer analyzer
@@ -213,13 +227,18 @@
213227
7 "associative?"
214228
8 "b"
215229
9 "ba"
216-
10 "bas"}
230+
10 "bas"
231+
11 "->"
232+
12 "->>"
233+
13 "as->"
234+
14 "as->banana"}
217235

218236
_transact (doseq [[k v] input]
219237
(d/add-doc engine k v))
220238

221-
result (doall (d/search engine "assoc-m" {:top 20 :display :texts}))]
239+
result (doall (d/search engine "as->" {:top 20 :display :texts}))]
222240

223241
(d/close-kv lmdb)
242+
(util/delete-files dir)
224243

225244
result))

src/codes/clj/docs/extractor/datalevin.clj

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
(ns codes.clj.docs.extractor.datalevin
22
(:require [datalevin.core :as d]
3+
[datalevin.interpret :refer [inter-fn]]
34
[datalevin.search-utils :as su]))
45

56
;; TODO: add id :db.unique/identity and ref :db.type/ref
@@ -78,12 +79,24 @@
7879
(def db-schemas
7980
(merge project-schema namespace-schema definition-schema))
8081

82+
(defn merge-tokenizers
83+
"Merges the results of tokenizer a and b into one sequence."
84+
[tokenizer-a tokenizer-b]
85+
(inter-fn [^String s]
86+
(into (sequence (tokenizer-a s))
87+
(sequence (tokenizer-b s)))))
88+
8189
(defn bulk-transact! [datoms config]
8290
(let [query-analyzer (su/create-analyzer
83-
{:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")
91+
{:tokenizer (merge-tokenizers
92+
(su/create-regexp-tokenizer #"[.*]+")
93+
(su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+"))
8494
:token-filters [su/lower-case-token-filter]})
95+
8596
analyzer (su/create-analyzer
86-
{:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")
97+
{:tokenizer (merge-tokenizers
98+
(su/create-regexp-tokenizer #"[.*]+")
99+
(su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+"))
87100
:token-filters [su/lower-case-token-filter
88101
su/prefix-token-filter]})
89102
conn (-> config :db :dir

0 commit comments

Comments
 (0)