Skip to content

Commit 8a29667

Browse files
Merge pull request #14 from clj-codes/feat/adds-more-fulltext-index-token
feat: fulltext index tokens
2 parents 80609fb + 44b8d9e commit 8a29667

File tree

3 files changed

+66
-22
lines changed

3 files changed

+66
-22
lines changed

dev/playground.clj

Lines changed: 48 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
[codes.clj.docs.extractor.core :as core]
55
[codes.clj.docs.extractor.datalevin :as datalevin]
66
[datalevin.core :as d]
7+
[datalevin.search-utils :as su]
78
[datalevin.util :as util])
89
(:import [java.io File]))
910

@@ -49,14 +50,17 @@
4950
(core/extract! {}))
5051

5152
; tests with generated database
52-
(let [conn (d/get-conn "target/docs-db" datalevin/db-schemas)
53+
(let [conn (d/get-conn "target/docs-db"
54+
datalevin/db-schemas)
55+
5356
db (d/db conn)
57+
5458
result (doall (d/q '[:find (pull ?e [* {:namespace/project [*]}]) ?a ?v
5559
:in $ ?q
5660
:where
57-
[(fulltext $ ?q {:domains ["definition"]}) [[?e ?a ?v]]]]
61+
[(fulltext $ ?q {:domains ["definition-name"]}) [[?e ?a ?v]]]]
5862
db
59-
"assoc"))]
63+
"associative"))]
6064
(d/close conn)
6165
result)
6266

@@ -73,25 +77,18 @@
7377
result)
7478

7579
; regex searching
76-
(let [conn (d/get-conn "target/docs-db" datalevin/db-schemas)
80+
(let [conn (d/get-conn "target/docs-db"
81+
datalevin/db-schemas)
7782
db (d/db conn)
78-
result (doall (->> (d/q '[:find [(pull ?e [:definition/id
79-
:definition/name
80-
:definition/doc
81-
:definition/group
82-
:definition/artifact
83-
:definition/git-source
84-
{:definition/namespace [:namespace/name]}]) ...]
83+
result (doall (->> (d/q '[:find [(pull ?e [*]) ...]
8584
:in $ ?q
8685
:where
87-
[(str ?q ".*") ?pattern]
86+
[(str ".*" ?q ".*") ?pattern]
8887
[(re-pattern ?pattern) ?regex]
8988
[(re-matches ?regex ?name)]
90-
[?e :definition/name ?name]
91-
[?e :definition/private false]
92-
(not [?e :definition/defined-by "cljs.core/defprotocol"])]
89+
[?e :definition/name ?name]]
9390
db
94-
"def")
91+
"pending")
9592
(sort-by (juxt
9693
:definition/id
9794
:definition/name))))]
@@ -119,4 +116,39 @@
119116
db
120117
"assoc"))]
121118
(d/close conn)
119+
result)
120+
121+
; tests with fulltext and analyzer
122+
(let [analyzer (su/create-analyzer
123+
{:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")
124+
:token-filters [su/lower-case-token-filter
125+
su/prefix-token-filter]})
126+
127+
dir "/tmp/mydb"
128+
conn (d/create-conn dir
129+
{:text {:db/valueType :db.type/string
130+
:db/fulltext true
131+
:db.fulltext/domains ["txt"]}}
132+
{:search-domains {"txt" {:analyzer analyzer}}})
133+
134+
data [{:text "assoc!"}
135+
{:text "assoc"}
136+
{:text "assoc-in"}
137+
{:text "assoc-dom"}
138+
{:text "assoc-meta"}
139+
{:text "associative?"}]
140+
141+
_transact (d/transact! conn data)
142+
143+
result (d/q '[:find ?e ?a ?v
144+
:in $ ?q
145+
:where [(fulltext $ ?q {:domains ["txt"]}) [[?e ?a ?v]]]]
146+
(d/db conn)
147+
"a")]
148+
149+
(d/close conn)
150+
(util/delete-files dir)
151+
122152
result))
153+
154+

src/codes/clj/docs/extractor/adapters.clj

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
(->> namespaces
7979
group-multi-langs
8080
(mapv (fn [{:keys [end-row meta name-end-col name-end-row name-row deprecated added
81-
name author filename col name-col end-col doc no-doc row]}]
81+
name author filename col name-col end-col doc no-doc row lang]}]
8282
(let [trim-filename (str/replace filename root "")]
8383
(assoc-some
8484
{:namespace/id (str/join "/" [group artifact name])
@@ -101,7 +101,8 @@
101101
:namespace/end-col end-col
102102
:namespace/doc doc
103103
:namespace/no-doc (some-> no-doc boolean)
104-
:namespace/row row))))))))
104+
:namespace/row row
105+
:namespace/lang lang))))))))
105106
[]
106107
analysis))
107108

@@ -122,7 +123,7 @@
122123
name-end-row name-row added deprecated ns name author
123124
defined-by filename macro col name-col end-col
124125
arglist-strs varargs-min-arity doc row
125-
private protocol-ns protocol-name]}]
126+
private protocol-ns protocol-name lang]}]
126127
(let [trim-filename (when filename (str/replace filename root ""))]
127128
(assoc-some
128129
{:definition/group group
@@ -154,7 +155,8 @@
154155
:definition/varargs-min-arity varargs-min-arity
155156
:definition/private (boolean private)
156157
:definition/protocol-ns (some-> protocol-ns str)
157-
:definition/protocol-name (some-> protocol-name str)))))))))
158+
:definition/protocol-name (some-> protocol-name str)
159+
:definition/lang lang))))))))
158160
[])
159161
(id-by (juxt :definition/group
160162
:definition/artifact

src/codes/clj/docs/extractor/datalevin.clj

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
(ns codes.clj.docs.extractor.datalevin
2-
(:require [datalevin.core :as d]))
2+
(:require [datalevin.core :as d]
3+
[datalevin.search-utils :as su]))
34

45
;; TODO: add id :db.unique/identity and ref :db.type/ref
56

@@ -78,6 +79,15 @@
7879
(merge project-schema namespace-schema definition-schema))
7980

8081
(defn bulk-transact! [datoms config]
81-
(let [conn (-> config :db :dir (d/get-conn db-schemas))]
82+
(let [analyzer (su/create-analyzer
83+
{:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")
84+
:token-filters [su/lower-case-token-filter
85+
su/prefix-token-filter]})
86+
conn (-> config :db :dir
87+
(d/get-conn db-schemas
88+
{:search-opts {:analyzer analyzer}
89+
:search-domains {"project-name" {:analyzer analyzer}
90+
"namespace-name" {:analyzer analyzer}
91+
"definition-name" {:analyzer analyzer}}}))]
8292
(d/transact! conn datoms)
8393
(d/close conn)))

0 commit comments

Comments
 (0)