Skip to content

Commit 59369ac

Browse files
Merge pull request #15 from clj-codes/feat/better-fulltext-examples-analyzers
feat: better fulltext example and analyzers
2 parents 8a29667 + 851221d commit 59369ac

File tree

2 files changed

+100
-24
lines changed

2 files changed

+100
-24
lines changed

dev/playground.clj

Lines changed: 90 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -49,19 +49,42 @@
4949
(println "bulking")
5050
(core/extract! {}))
5151

52-
; tests with generated database
52+
(defn >> [a] (prn a) a)
53+
54+
; fulltext search with generated database
5355
(let [conn (d/get-conn "target/docs-db"
5456
datalevin/db-schemas)
5557

5658
db (d/db conn)
5759

58-
result (doall (d/q '[:find (pull ?e [* {:namespace/project [*]}]) ?a ?v
59-
:in $ ?q
60-
:where
61-
[(fulltext $ ?q {:domains ["definition-name"]}) [[?e ?a ?v]]]]
62-
db
63-
"associative"))]
60+
datoms (->> (d/fulltext-datoms db
61+
"ass"
62+
{:top 30
63+
:domains ["definition-name"
64+
"namespace-name"
65+
"project-name"]})
66+
(map first)
67+
(d/pull-many db '[:definition/id
68+
:definition/name
69+
:definition/doc
70+
:namespace/id
71+
:namespace/name
72+
:namespace/doc
73+
:project/id
74+
:project/artifact
75+
:project/group]))]
6476
(d/close conn)
77+
datoms)
78+
79+
; fulltext raw search with generated database
80+
(let [lmdb (d/open-kv "target/docs-db")
81+
engine (d/new-search-engine lmdb {:query-analyzer (su/create-analyzer
82+
{:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")
83+
:token-filters [su/lower-case-token-filter]})
84+
:include-text? true
85+
:domain "definition-name"})
86+
result (doall (d/search engine "a" {:top 30}))]
87+
(d/close-kv lmdb)
6588
result)
6689

6790
; simple query definition by name
@@ -108,18 +131,22 @@
108131
; tests with fulltext search
109132
(let [conn (d/get-conn "target/docs-db" datalevin/db-schemas)
110133
db (d/db conn)
111-
result (doall (d/q '[:find ?e ?name ?a ?v
134+
result (doall (d/q '[:find ?e ?name ?a ?v ?b ?d
112135
:in $ ?q
113136
:where
114-
[(fulltext $ ?q) [[?e ?a ?v]]]
137+
[(fulltext $ ?q) [[?e ?a ?v ?b ?d]]]
115138
[?e :definition/name ?name]]
116139
db
117140
"assoc"))]
118141
(d/close conn)
119142
result)
120143

121144
; tests with fulltext and analyzer
122-
(let [analyzer (su/create-analyzer
145+
(let [query-analyzer (su/create-analyzer
146+
{:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")
147+
:token-filters [su/lower-case-token-filter]})
148+
149+
analyzer (su/create-analyzer
123150
{:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")
124151
:token-filters [su/lower-case-token-filter
125152
su/prefix-token-filter]})
@@ -129,26 +156,70 @@
129156
{:text {:db/valueType :db.type/string
130157
:db/fulltext true
131158
:db.fulltext/domains ["txt"]}}
132-
{:search-domains {"txt" {:analyzer analyzer}}})
159+
{:search-domains {"txt" {:analyzer analyzer
160+
:query-analyzer query-analyzer}}})
133161

134-
data [{:text "assoc!"}
162+
data [{:text "a"}
163+
{:text "abs"}
164+
{:text "assoc!"}
135165
{:text "assoc"}
136166
{:text "assoc-in"}
137167
{:text "assoc-dom"}
138168
{:text "assoc-meta"}
139-
{:text "associative?"}]
169+
{:text "associative?"}
170+
{:text "b"}
171+
{:text "ba"}
172+
{:text "bas"}]
140173

141174
_transact (d/transact! conn data)
142175

143-
result (d/q '[:find ?e ?a ?v
144-
:in $ ?q
145-
:where [(fulltext $ ?q {:domains ["txt"]}) [[?e ?a ?v]]]]
146-
(d/db conn)
147-
"a")]
176+
result (->> (d/q '[:find ?i
177+
:in $ ?q
178+
:where
179+
[(fulltext $ ?q {:top 20}) [[?e]]]
180+
[?e :text ?i]]
181+
(d/db conn)
182+
"assoc-me")
183+
doall)]
148184

149185
(d/close conn)
150186
(util/delete-files dir)
151187

152-
result))
188+
result)
153189

190+
; tests with fulltext and analyzer on a raw query
191+
(let [query-analyzer (su/create-analyzer
192+
{:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")
193+
:token-filters [su/lower-case-token-filter]})
154194

195+
analyzer (su/create-analyzer
196+
{:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")
197+
:token-filters [su/lower-case-token-filter
198+
su/prefix-token-filter]})
199+
200+
lmdb (d/open-kv "/tmp/mydb")
201+
202+
engine (d/new-search-engine lmdb {:query-analyzer query-analyzer
203+
:analyzer analyzer
204+
:include-text? true
205+
:domain "definition-name"})
206+
input {0 "a"
207+
1 "abs"
208+
2 "assoc!"
209+
3 "assoc"
210+
4 "assoc-in"
211+
5 "assoc-dom"
212+
6 "assoc-meta"
213+
7 "associative?"
214+
8 "b"
215+
9 "ba"
216+
10 "bas"}
217+
218+
_transact (doseq [[k v] input]
219+
(d/add-doc engine k v))
220+
221+
result (doall (d/search engine "assoc-m" {:top 20 :display :texts}))]
222+
223+
(d/close-kv lmdb)
224+
225+
result))

src/codes/clj/docs/extractor/datalevin.clj

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,20 @@
7979
(merge project-schema namespace-schema definition-schema))
8080

8181
(defn bulk-transact! [datoms config]
82-
(let [analyzer (su/create-analyzer
82+
(let [query-analyzer (su/create-analyzer
83+
{:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")
84+
:token-filters [su/lower-case-token-filter]})
85+
analyzer (su/create-analyzer
8386
{:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")
8487
:token-filters [su/lower-case-token-filter
8588
su/prefix-token-filter]})
8689
conn (-> config :db :dir
8790
(d/get-conn db-schemas
88-
{:search-opts {:analyzer analyzer}
89-
:search-domains {"project-name" {:analyzer analyzer}
90-
"namespace-name" {:analyzer analyzer}
91-
"definition-name" {:analyzer analyzer}}}))]
91+
{:search-domains {"project-name" {:query-analyzer query-analyzer
92+
:analyzer analyzer}
93+
"namespace-name" {:query-analyzer query-analyzer
94+
:analyzer analyzer}
95+
"definition-name" {:query-analyzer query-analyzer
96+
:analyzer analyzer}}}))]
9297
(d/transact! conn datoms)
9398
(d/close conn)))

0 commit comments

Comments
 (0)