Skip to content

Commit 6d56738

Browse files
Merge pull request #4 from clj-codes/feat/adds-id-and-more-index
feat: adds ids and more indexes
2 parents 686ba17 + 491ca35 commit 6d56738

File tree

9 files changed

+275
-127
lines changed

9 files changed

+275
-127
lines changed

deps.edn

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
:ns-default build
1414
:exec-args {:uber-file "target/extractor.jar"}}
1515

16-
:dev {:extra-paths ["test" "test-resources"]
16+
:dev {:extra-paths ["dev" "test" "test-resources"]
1717
:extra-deps {lambdaisland/kaocha {:mvn/version "1.82.1306"}
1818
lambdaisland/kaocha-cloverage {:mvn/version "1.1.89"}
1919
nubank/mockfn {:mvn/version "0.7.0"}

dev/playground.clj

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
(ns dev.playground
2+
(:require [codes.clj.docs.extractor.core :as core]
3+
[codes.clj.docs.extractor.datalevin :as datalevin]
4+
[datalevin.core :as d]
5+
[datalevin.util :as util]))
6+
7+
(comment
8+
; reset database
9+
(let [dir "target/docs-db"]
10+
(println "deleting")
11+
(try (util/delete-files dir) (catch Exception _))
12+
(println "bulking")
13+
(core/extract! {}))
14+
15+
; tests with generated database
16+
(let [conn (d/get-conn "target/docs-db" datalevin/db-schemas)
17+
db (d/db conn)
18+
result (doall (d/q '[:find (pull ?e [*]) ?a ?v
19+
:in $ ?q
20+
:where
21+
[(fulltext $ ?q) [[?e ?a ?v]]]]
22+
db
23+
"assoc"))]
24+
(d/close conn)
25+
result)
26+
27+
; regex searching
28+
(let [conn (d/get-conn "target/docs-db" datalevin/db-schemas)
29+
db (d/db conn)
30+
result (doall (->> (d/q '[:find [(pull ?e [:definition/id
31+
:definition/name
32+
:definition/group
33+
:definition/artifact
34+
:definition/namespace]) ...]
35+
:in $ ?q
36+
:where
37+
[(str ".*" ?q ".*") ?pattern]
38+
[(re-pattern ?pattern) ?regex]
39+
[(re-matches ?regex ?name)]
40+
[?e :definition/name ?name]
41+
[?e :definition/private false]
42+
(not [?e :definition/defined-by "cljs.core/defprotocol"])]
43+
db
44+
"assoc")
45+
(sort-by (juxt
46+
:definition/id
47+
:definition/name))))]
48+
(d/close conn)
49+
result)
50+
51+
; count all data
52+
(let [conn (d/get-conn "target/docs-db" datalevin/db-schemas)
53+
db (d/db conn)
54+
result (doall (d/q '[:find (count ?e)
55+
:in $ ?q
56+
:where [?e]]
57+
db
58+
"assoc"))]
59+
(d/close conn)
60+
result)
61+
62+
; tests with temporary database
63+
(let [db (-> (d/empty-db "/tmp/mydb"
64+
{:text {:db/valueType :db.type/string}})
65+
(d/db-with
66+
[{:db/id 1 :text "assoc!"}
67+
{:db/id 2 :text "assoc"}
68+
{:db/id 3 :text "assoc-in"}
69+
{:db/id 4 :text "assoc-dom"}
70+
{:db/id 5 :text "assoc-meta"}
71+
{:db/id 6 :text "associative?"}]))]
72+
(d/q '[:find (pull ?e [*])
73+
:in $ ?q
74+
:where ;[(fulltext $ ?q) [[?e ?a ?v]]]
75+
[(str ".*" ?q ".*") ?pattern]
76+
[(re-pattern ?pattern) ?regex]
77+
[(re-matches ?regex ?name)]
78+
[?e :text ?name]]
79+
db
80+
"assoc")))

src/codes/clj/docs/extractor/adapters.clj

Lines changed: 101 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,46 @@
1818
m
1919
(persistent! acc))))))
2020

21+
(defn ^:private group-multi-langs
22+
[items]
23+
(->> items
24+
(group-by
25+
(juxt :ns :name :row))
26+
(reduce
27+
(fn [acc2 [_k v]]
28+
(merge acc2
29+
(reduce
30+
(fn [acc cur]
31+
(let [langs (->> (into (or [(:lang acc)] []) [(:lang cur)])
32+
(remove nil?)
33+
flatten
34+
vec)]
35+
(-> (merge acc cur)
36+
(assoc :lang langs))))
37+
{}
38+
v)))
39+
[])))
40+
41+
(defn id-by
42+
[f index-key coll]
43+
(->> coll
44+
(group-by f)
45+
(reduce
46+
(fn [acc [grouped-keys values]]
47+
(into acc (map-indexed
48+
(fn [index value]
49+
(assoc value
50+
index-key (str/join "/" (conj grouped-keys index))))
51+
values)))
52+
[])))
53+
2154
(defn analysis->projects
2255
[analysis]
2356
(mapv
2457
(fn [{:keys [project]}]
2558
(let [[group artifact] (-> project :project-name (str/split #"/"))]
26-
(assoc-some {:project/name (:project-name project)}
59+
(assoc-some {:project/id (:project-name project)
60+
:project/name (:project-name project)}
2761
:project/group group
2862
:project/artifact artifact
2963
:project/paths (mapv #(str/replace % (:deps/root project) "")
@@ -41,29 +75,31 @@
4175
(into accum
4276
(let [{:git/keys [url tag] :deps/keys [root]} project
4377
[group artifact] (-> project :project-name (str/split #"/"))]
44-
(mapv (fn [{:keys [end-row meta name-end-col name-end-row name-row added
45-
name author filename col name-col end-col doc row]}]
46-
(let [trim-filename (str/replace filename root "")]
47-
(assoc-some
48-
{:library/project (:project-name project)
49-
:library/group group
50-
:library/artifact artifact
51-
:library/name (str name)}
52-
:library/end-row end-row
53-
:library/meta meta
54-
:library/name-end-col name-end-col
55-
:library/name-end-row name-end-row
56-
:library/name-row name-row
57-
:library/added added
58-
:library/author author
59-
:library/filename trim-filename
60-
:library/git-source (str url "/blob/" tag trim-filename "#L" row)
61-
:library/col col
62-
:library/name-col name-col
63-
:library/end-col end-col
64-
:library/doc doc
65-
:library/row row)))
66-
libraries))))
78+
(->> libraries
79+
group-multi-langs
80+
(mapv (fn [{:keys [end-row meta name-end-col name-end-row name-row added
81+
name author filename col name-col end-col doc row]}]
82+
(let [trim-filename (str/replace filename root "")]
83+
(assoc-some
84+
{:library/id (str/join "/" [group artifact name])
85+
:library/project {:project/id (:project-name project)}
86+
:library/group group
87+
:library/artifact artifact
88+
:library/name (str name)}
89+
:library/end-row end-row
90+
:library/meta meta
91+
:library/name-end-col name-end-col
92+
:library/name-end-row name-end-row
93+
:library/name-row name-row
94+
:library/added added
95+
:library/author author
96+
:library/filename trim-filename
97+
:library/git-source (str url "/blob/" tag trim-filename "#L" row)
98+
:library/col col
99+
:library/name-col name-col
100+
:library/end-col end-col
101+
:library/doc doc
102+
:library/row row))))))))
67103
[]
68104
analysis))
69105

@@ -78,38 +114,48 @@
78114
(into accum
79115
(let [{:git/keys [url tag] :deps/keys [root]} project
80116
[group artifact] (-> project :project-name (str/split #"/"))]
81-
(mapv (fn [{:keys [fixed-arities end-row meta name-end-col
82-
name-end-row name-row added ns name author
83-
defined-by filename macro col name-col end-col
84-
arglist-strs varargs-min-arity doc row]}]
85-
(let [trim-filename (str/replace filename root "")]
86-
(assoc-some
87-
{:definition/project (:project-name project)
88-
:definition/group group
89-
:definition/artifact artifact
90-
:definition/name (str name)}
91-
:definition/defined-by (some-> defined-by str)
92-
:definition/library (some-> ns str)
93-
:definition/fixed-arities fixed-arities
94-
:definition/arglist-strs arglist-strs
95-
:definition/end-row end-row
96-
:definition/meta meta
97-
:definition/name-end-col name-end-col
98-
:definition/name-end-row name-end-row
99-
:definition/name-row name-row
100-
:definition/added added
101-
:definition/author author
102-
:definition/filename trim-filename
103-
:definition/git-source (str url "/blob/" tag trim-filename "#L" row)
104-
:definition/col col
105-
:definition/name-col name-col
106-
:definition/end-col end-col
107-
:definition/doc doc
108-
:definition/row row
109-
:definition/macro macro
110-
:definition/varargs-min-arity varargs-min-arity)))
111-
definitions))))
117+
(->> definitions
118+
group-multi-langs
119+
(mapv (fn [{:keys [fixed-arities end-row meta name-end-col
120+
name-end-row name-row added ns name author
121+
defined-by filename macro col name-col end-col
122+
arglist-strs varargs-min-arity doc row
123+
private protocol-ns protocol-name]}]
124+
(let [trim-filename (str/replace filename root "")]
125+
(assoc-some
126+
{:definition/group group
127+
:definition/artifact artifact
128+
:definition/name (str name)}
129+
:definition/defined-by (some-> defined-by str)
130+
:definition/namespace (some-> ns str)
131+
:definition/library (when ns {:library/id (str/join "/" [group artifact ns])})
132+
:definition/fixed-arities fixed-arities
133+
:definition/arglist-strs arglist-strs
134+
:definition/end-row end-row
135+
:definition/meta meta
136+
:definition/name-end-col name-end-col
137+
:definition/name-end-row name-end-row
138+
:definition/name-row name-row
139+
:definition/added added
140+
:definition/author author
141+
:definition/filename trim-filename
142+
:definition/git-source (str url "/blob/" tag trim-filename "#L" row)
143+
:definition/col col
144+
:definition/name-col name-col
145+
:definition/end-col end-col
146+
:definition/doc doc
147+
:definition/row row
148+
:definition/macro (boolean macro)
149+
:definition/varargs-min-arity varargs-min-arity
150+
:definition/private (boolean private)
151+
:definition/protocol-ns (some-> protocol-ns str)
152+
:definition/protocol-name (some-> protocol-name str)))))))))
112153
[])
154+
(id-by (juxt :definition/group
155+
:definition/artifact
156+
:definition/namespace
157+
:definition/name)
158+
:definition/id)
113159
(remove inrelevant-definitions)))
114160

115161
(defn analysis->datoms

src/codes/clj/docs/extractor/core.clj

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,16 @@
33
[codes.clj.docs.extractor.analysis :as analysis]
44
[codes.clj.docs.extractor.config :as config]
55
[codes.clj.docs.extractor.datalevin :as datalevin]
6-
[codes.clj.docs.extractor.log :refer [log->fn]])
6+
[codes.clj.docs.extractor.log :refer [with-log]])
77
(:gen-class))
88

99
(defn extract!
1010
"Extract data from configured projects and generate Datalevin file."
1111
[_data]
1212
(let [config (config/read! "resources/config.edn")
13-
analysis-raw (log->fn (analysis/extract! config))
14-
datoms (adapters/analysis->datoms analysis-raw)]
15-
(log->fn (datalevin/bulk-transact! datoms config))))
13+
analysis-raw (with-log (analysis/extract! config))
14+
datoms (with-log (adapters/analysis->datoms analysis-raw))]
15+
(with-log (datalevin/bulk-transact! datoms config))))
1616

1717
(defn -main
1818
"The entry-point for 'gen-class'"

src/codes/clj/docs/extractor/datalevin.clj

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
;; TODO: add id :db.unique/identity and ref :db.type/ref
55

66
(def project-schema
7-
{;:project/id {:db/valueType :db.type/string :unique :db.unique/identity}
8-
:project/name {:db/valueType :db.type/string
9-
:db/fulltext true}
7+
{:project/id {:db/valueType :db.type/string
8+
:unique :db.unique/identity}
9+
:project/name {:db/valueType :db.type/string}
1010
:project/group {:db/valueType :db.type/string}
1111
:project/artifact {:db/valueType :db.type/string}
1212
:project/paths {:db/valueType :db.type/string
@@ -17,11 +17,10 @@
1717
:project/manifest {:db/valueType :db.type/keyword}})
1818

1919
(def library-schema
20-
{;:library/id {:db/valueType :db.type/string :unique :db.unique/identity}
21-
:library/name {:db/valueType :db.type/string
22-
:db/fulltext true}
23-
:library/project {:db/valueType :db.type/string} ;todo change to ref
24-
;:library/project {:db/valueType :db.type/ref}
20+
{:library/id {:db/valueType :db.type/string
21+
:unique :db.unique/identity}
22+
:library/name {:db/valueType :db.type/string}
23+
:library/project {:db/valueType :db.type/ref}
2524
:library/group {:db/valueType :db.type/string}
2625
:library/artifact {:db/valueType :db.type/string}
2726
:library/doc {:db/valueType :db.type/string
@@ -34,13 +33,11 @@
3433
:library/col {:db/valueType :db.type/long}})
3534

3635
(def definition-schema
37-
{;:definition/id {:db/valueType :db.type/string :unique :db.unique/identity}
38-
:definition/name {:db/valueType :db.type/string
39-
:db/fulltext true}
40-
:definition/library {:db/valueType :db.type/string} ;todo change to ref
41-
;:definition/library {:db/valueType :db.type/ref}
42-
:definition/project {:db/valueType :db.type/string} ;todo change to ref
43-
;:definition/project {:db/valueType :db.type/ref}
36+
{:definition/id {:db/valueType :db.type/string
37+
:unique :db.unique/identity}
38+
:definition/name {:db/valueType :db.type/string}
39+
:definition/namespace {:db/valueType :db.type/string}
40+
:definition/library {:db/valueType :db.type/ref}
4441
:definition/group {:db/valueType :db.type/string}
4542
:definition/artifact {:db/valueType :db.type/string}
4643
:definition/doc {:db/valueType :db.type/string
@@ -52,8 +49,11 @@
5249
:definition/varargs-min-arity {:db/valueType :db.type/long}
5350
:definition/added {:db/valueType :db.type/string}
5451
:definition/macro {:db/valueType :db.type/boolean}
52+
:definition/private {:db/valueType :db.type/boolean}
5553
:definition/row {:db/valueType :db.type/long}
56-
:definition/col {:db/valueType :db.type/long}})
54+
:definition/col {:db/valueType :db.type/long}
55+
:definition/protocol-ns {:db/valueType :db.type/string}
56+
:definition/protocol-name {:db/valueType :db.type/string}})
5757

5858
(def db-schemas
5959
(merge project-schema library-schema definition-schema))

src/codes/clj/docs/extractor/log.clj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
(ns codes.clj.docs.extractor.log
22
(:import [java.time Duration LocalDateTime]))
33

4-
(defmacro log->fn [& body]
4+
(defmacro with-log [& body]
55
`(do
66
(let [executed-form# ~(str (second &form))
77
start-time# (LocalDateTime/now)]

test/codes/clj/docs/extractor/adapters_test.clj

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,14 @@
2323
(testing "analysis -> datoms"
2424
(is (= 4
2525
(count (adapters/analysis->datoms fixtures.analysis/raw))))))
26+
27+
(deftest id-by-test
28+
(testing "id-by should generate id string"
29+
(is (match? [{:name "juxt", :group "core", :row 1, :id "juxt/core/0"}
30+
{:name "juxt", :group "core", :row 2, :id "juxt/core/1"}
31+
{:name "assoc", :group "core", :row 1, :id "assoc/core/0"}]
32+
(adapters/id-by (juxt :name :group)
33+
:id
34+
[{:name "juxt" :group "core" :row 1}
35+
{:name "juxt" :group "core" :row 2}
36+
{:name "assoc" :group "core" :row 1}])))))

0 commit comments

Comments
 (0)