Skip to content

Commit d301805

Browse files
committed
fix: not using regex to replicate full symbol
1 parent 49a8f03 commit d301805

File tree

2 files changed

+15
-10
lines changed

2 files changed

+15
-10
lines changed

dev/playground.clj

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
[codes.clj.docs.extractor.core :as core]
55
[codes.clj.docs.extractor.datalevin :as datalevin]
66
[datalevin.core :as d]
7+
[datalevin.interpret :refer [inter-fn]]
78
[datalevin.search-utils :as su]
89
[datalevin.util :as util])
910
(:import [java.io File]))
@@ -58,7 +59,7 @@
5859
db (d/db conn)
5960

6061
datoms (->> (d/fulltext-datoms db
61-
"->"
62+
"."
6263
{:top 30
6364
:domains ["definition-name"
6465
"namespace-name"
@@ -144,13 +145,13 @@
144145
; tests with fulltext and analyzer
145146
(let [query-analyzer (su/create-analyzer
146147
{:tokenizer (datalevin/merge-tokenizers
147-
(su/create-regexp-tokenizer #"[.]+")
148+
(inter-fn [s] [[s 0 0]])
148149
(su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+"))
149150
:token-filters [su/lower-case-token-filter]})
150151

151152
analyzer (su/create-analyzer
152153
{:tokenizer (datalevin/merge-tokenizers
153-
(su/create-regexp-tokenizer #"[.]+")
154+
(inter-fn [s] [[s 0 0]])
154155
(su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+"))
155156
:token-filters [su/lower-case-token-filter
156157
su/prefix-token-filter]})
@@ -179,6 +180,7 @@
179180
{:text "->"}
180181
{:text "->>"}
181182
{:text "as->"}
183+
{:text "."}
182184
{:text "as->banana"}]
183185

184186
_transact (d/transact! conn data)
@@ -188,7 +190,7 @@
188190
:where
189191
[(fulltext $ ?q {:top 20}) [[?e ?a ?v]]]]
190192
(d/db conn)
191-
"as->")
193+
"as")
192194
doall)]
193195

194196
(d/close conn)
@@ -199,13 +201,13 @@
199201
; tests with fulltext and analyzer on a raw query
200202
(let [query-analyzer (su/create-analyzer
201203
{:tokenizer (datalevin/merge-tokenizers
202-
(su/create-regexp-tokenizer #"[.*]+")
204+
(inter-fn [s] [[s 0 0]])
203205
(su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+"))
204206
:token-filters [su/lower-case-token-filter]})
205207

206208
analyzer (su/create-analyzer
207209
{:tokenizer (datalevin/merge-tokenizers
208-
(su/create-regexp-tokenizer #"[.*]+")
210+
(inter-fn [s] [[s 0 0]])
209211
(su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+"))
210212
:token-filters [su/lower-case-token-filter
211213
su/prefix-token-filter]})
@@ -231,12 +233,15 @@
231233
11 "->"
232234
12 "->>"
233235
13 "as->"
234-
14 "as->banana"}
236+
14 "as->banana"
237+
15 "/"
238+
16 "*"
239+
17 "."}
235240

236241
_transact (doseq [[k v] input]
237242
(d/add-doc engine k v))
238243

239-
result (doall (d/search engine "as->" {:top 20 :display :texts}))]
244+
result (doall (d/search engine "->" {:top 20 :display :texts}))]
240245

241246
(d/close-kv lmdb)
242247
(util/delete-files dir)

src/codes/clj/docs/extractor/datalevin.clj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,13 +89,13 @@
8989
(defn bulk-transact! [datoms config]
9090
(let [query-analyzer (su/create-analyzer
9191
{:tokenizer (merge-tokenizers
92-
(su/create-regexp-tokenizer #"[.*]+")
92+
(inter-fn [s] [[s 0 0]])
9393
(su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+"))
9494
:token-filters [su/lower-case-token-filter]})
9595

9696
analyzer (su/create-analyzer
9797
{:tokenizer (merge-tokenizers
98-
(su/create-regexp-tokenizer #"[.*]+")
98+
(inter-fn [s] [[s 0 0]])
9999
(su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+"))
100100
:token-filters [su/lower-case-token-filter
101101
su/prefix-token-filter]})

0 commit comments

Comments
 (0)