Skip to content

Commit 42745d5

Browse files
committed
feat: fulltext index tokens
1 parent 80609fb commit 42745d5

File tree

3 files changed

+68
-23
lines changed

3 files changed

+68
-23
lines changed

dev/playground.clj

Lines changed: 50 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
(ns dev.playground
22
(:require [clj-http.client :as http]
33
[clojure.java.io :as io]
4+
[clojure.string :as str]
45
[codes.clj.docs.extractor.core :as core]
56
[codes.clj.docs.extractor.datalevin :as datalevin]
67
[datalevin.core :as d]
7-
[datalevin.util :as util])
8+
[datalevin.util :as util]
9+
[datalevin.search-utils :as su])
810
(:import [java.io File]))
911

1012
(defn get-url [git-url]
@@ -49,14 +51,17 @@
4951
(core/extract! {}))
5052

5153
; tests with generated database
52-
(let [conn (d/get-conn "target/docs-db" datalevin/db-schemas)
54+
(let [conn (d/get-conn "target/docs-db"
55+
datalevin/db-schemas)
56+
5357
db (d/db conn)
58+
5459
result (doall (d/q '[:find (pull ?e [* {:namespace/project [*]}]) ?a ?v
5560
:in $ ?q
5661
:where
57-
[(fulltext $ ?q {:domains ["definition"]}) [[?e ?a ?v]]]]
62+
[(fulltext $ ?q {:domains ["definition-name"]}) [[?e ?a ?v]]]]
5863
db
59-
"assoc"))]
64+
"associative"))]
6065
(d/close conn)
6166
result)
6267

@@ -73,25 +78,18 @@
7378
result)
7479

7580
; regex searching
76-
(let [conn (d/get-conn "target/docs-db" datalevin/db-schemas)
81+
(let [conn (d/get-conn "target/docs-db"
82+
datalevin/db-schemas)
7783
db (d/db conn)
78-
result (doall (->> (d/q '[:find [(pull ?e [:definition/id
79-
:definition/name
80-
:definition/doc
81-
:definition/group
82-
:definition/artifact
83-
:definition/git-source
84-
{:definition/namespace [:namespace/name]}]) ...]
84+
result (doall (->> (d/q '[:find [(pull ?e [*]) ...]
8585
:in $ ?q
8686
:where
87-
[(str ?q ".*") ?pattern]
87+
[(str ".*" ?q ".*") ?pattern]
8888
[(re-pattern ?pattern) ?regex]
8989
[(re-matches ?regex ?name)]
90-
[?e :definition/name ?name]
91-
[?e :definition/private false]
92-
(not [?e :definition/defined-by "cljs.core/defprotocol"])]
90+
[?e :definition/name ?name]]
9391
db
94-
"def")
92+
"pending")
9593
(sort-by (juxt
9694
:definition/id
9795
:definition/name))))]
@@ -119,4 +117,39 @@
119117
db
120118
"assoc"))]
121119
(d/close conn)
120+
result)
121+
122+
; tests with fulltext and analyzer
123+
(let [analyzer (su/create-analyzer
124+
{:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")
125+
:token-filters [su/lower-case-token-filter
126+
su/prefix-token-filter]})
127+
128+
dir "/tmp/mydb"
129+
conn (d/create-conn dir
130+
{:text {:db/valueType :db.type/string
131+
:db/fulltext true
132+
:db.fulltext/domains ["txt"]}}
133+
{:search-domains {"txt" {:analyzer analyzer}}})
134+
135+
data [{:text "assoc!"}
136+
{:text "assoc"}
137+
{:text "assoc-in"}
138+
{:text "assoc-dom"}
139+
{:text "assoc-meta"}
140+
{:text "associative?"}]
141+
142+
_transact (d/transact! conn data)
143+
144+
result (d/q '[:find ?e ?a ?v
145+
:in $ ?q
146+
:where [(fulltext $ ?q {:domains ["txt"]}) [[?e ?a ?v]]]]
147+
(d/db conn)
148+
"a")]
149+
150+
(d/close conn)
151+
(util/delete-files dir)
152+
122153
result))
154+
155+

src/codes/clj/docs/extractor/adapters.clj

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
(->> namespaces
7979
group-multi-langs
8080
(mapv (fn [{:keys [end-row meta name-end-col name-end-row name-row deprecated added
81-
name author filename col name-col end-col doc no-doc row]}]
81+
name author filename col name-col end-col doc no-doc row lang]}]
8282
(let [trim-filename (str/replace filename root "")]
8383
(assoc-some
8484
{:namespace/id (str/join "/" [group artifact name])
@@ -101,7 +101,8 @@
101101
:namespace/end-col end-col
102102
:namespace/doc doc
103103
:namespace/no-doc (some-> no-doc boolean)
104-
:namespace/row row))))))))
104+
:namespace/row row
105+
:namespace/lang lang))))))))
105106
[]
106107
analysis))
107108

@@ -122,7 +123,7 @@
122123
name-end-row name-row added deprecated ns name author
123124
defined-by filename macro col name-col end-col
124125
arglist-strs varargs-min-arity doc row
125-
private protocol-ns protocol-name]}]
126+
private protocol-ns protocol-name lang]}]
126127
(let [trim-filename (when filename (str/replace filename root ""))]
127128
(assoc-some
128129
{:definition/group group
@@ -154,7 +155,8 @@
154155
:definition/varargs-min-arity varargs-min-arity
155156
:definition/private (boolean private)
156157
:definition/protocol-ns (some-> protocol-ns str)
157-
:definition/protocol-name (some-> protocol-name str)))))))))
158+
:definition/protocol-name (some-> protocol-name str)
159+
:definition/lang lang))))))))
158160
[])
159161
(id-by (juxt :definition/group
160162
:definition/artifact

src/codes/clj/docs/extractor/datalevin.clj

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
(ns codes.clj.docs.extractor.datalevin
2-
(:require [datalevin.core :as d]))
2+
(:require [datalevin.core :as d]
3+
[datalevin.search-utils :as su]))
34

45
;; TODO: add id :db.unique/identity and ref :db.type/ref
56

@@ -78,6 +79,15 @@
7879
(merge project-schema namespace-schema definition-schema))
7980

8081
(defn bulk-transact! [datoms config]
81-
(let [conn (-> config :db :dir (d/get-conn db-schemas))]
82+
(let [analyzer (su/create-analyzer
83+
{:tokenizer (su/create-regexp-tokenizer #"[\s:/\.;,!=?\"'()\[\]{}|<>&@#^*\\~`\-]+")
84+
:token-filters [su/lower-case-token-filter
85+
su/prefix-token-filter]})
86+
conn (-> config :db :dir
87+
(d/get-conn db-schemas
88+
{:search-opts {:analyzer analyzer}
89+
:search-domains {"project-name" {:analyzer analyzer}
90+
"namespace-name" {:analyzer analyzer}
91+
"definition-name" {:analyzer analyzer}}}))]
8292
(d/transact! conn datoms)
8393
(d/close conn)))

0 commit comments

Comments
 (0)