ClickHouse
diff --git a/‎clickhouse-core/src/main/scala/xenon/clickhouse/Utils.scala‎
Lines changed: 4 additions & 0 deletions b/‎clickhouse-core/src/main/scala/xenon/clickhouse/Utils.scala‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/best_practices/01_deployment.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/best_practices/01_deployment.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/best_practices/index.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/best_practices/index.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/configurations/01_catalog_configurations.md‎
Lines changed: 6 additions & 7 deletions b/‎docs/configurations/01_catalog_configurations.md‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎docs/configurations/02_sql_configurations.md‎
Lines changed: 24 additions & 132 deletions b/‎docs/configurations/02_sql_configurations.md‎
Lines changed: 24 additions & 132 deletions
diff --git a/‎docs/configurations/index.md‎
Lines changed: 18 additions & 10 deletions b/‎docs/configurations/index.md‎
Lines changed: 18 additions & 10 deletions
diff --git a/‎docs/developers/01_build_and_test.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/developers/01_build_and_test.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/developers/02_docs_and_website.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/developers/02_docs_and_website.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/developers/03_private_release.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/developers/03_private_release.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/developers/04_public_release.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/developers/04_public_release.md‎
Lines changed: 2 additions & 2 deletions
@@ -47,6 +47,10 @@ object Utils extends Logging {
 
   def classpathResourceAsStream(name: String): InputStream = defaultClassLoader.getResourceAsStream(name)
 
+  def getCodeSourceLocation(clazz: Class[_]): String = {
+    new File(clazz.getProtectionDomain.getCodeSource.getLocation.toURI).getPath
+  }
+
   @transient lazy val tmpDirPath: Path = Files.createTempDirectory("classpath_res_")
 
   def copyFileFromClasspath(name: String): File = {
 
@@ -3,12 +3,12 @@ license: |
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
+  
       https://www.apache.org/licenses/LICENSE-2.0
+  
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
-  
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  
   See the License for the specific language governing permissions and
   limitations under the License.
 ---
 
@@ -3,12 +3,12 @@ license: |
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
+  
       https://www.apache.org/licenses/LICENSE-2.0
+  
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
-  
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  
   See the License for the specific language governing permissions and
   limitations under the License.
 ---
 
@@ -3,20 +3,18 @@ license: |
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
+  
       https://www.apache.org/licenses/LICENSE-2.0
+  
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
-  
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  
   See the License for the specific language governing permissions and
   limitations under the License.
 ---
 
-Catalog Configurations
-===
-
-## Single Instance
+<!--begin-include-->
+### Single Instance
 
 Suppose you have one ClickHouse instance which installed on `10.0.0.1` and exposes HTTP on `8123`.
 
@@ -34,7 +32,7 @@ spark.sql.catalog.clickhouse.database   default
 
 Then you can access ClickHouse table `<ck_db>.<ck_table>` from Spark SQL by using `clickhouse.<ck_db>.<ck_table>`.
 
-## Cluster
+### Cluster
 
 For ClickHouse cluster, give an unique catalog name for each instances.
 
@@ -63,3 +61,4 @@ spark.sql.catalog.clickhouse2.database   default
 
 Then you can access clickhouse1 table `<ck_db>.<ck_table>` from Spark SQL by `clickhouse1.<ck_db>.<ck_table>`,
 and access clickhouse2 table `<ck_db>.<ck_table>` by `clickhouse2.<ck_db>.<ck_table>`.
+<!--end-include-->
@@ -3,143 +3,35 @@ license: |
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
+  
       https://www.apache.org/licenses/LICENSE-2.0
+  
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
-  
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  
   See the License for the specific language governing permissions and
   limitations under the License.
 ---
 
-SQL Configurations
-===
-
-!!! tip "Since 0.1.0 - spark.clickhouse.write.batchSize"
-
-    Default Value: 10000
-
-    Description: The number of records per batch on writing to ClickHouse.
-
-!!! tip "Since 0.1.0 - spark.clickhouse.write.maxRetry"
-
-    Default Value: 3
-
-    Description: The maximum number of write we will retry for a single batch write failed with retryable codes.
-
-!!! tip "Since 0.1.0 - spark.clickhouse.write.retryInterval"
-
-    Default Value: 10
-
-    Description: The interval in seconds between write retry.
-
-!!! tip "Since 0.1.0 - spark.clickhouse.write.retryableErrorCodes"
-
-    Default Value: 241
-
-    Description: The retryable error codes returned by ClickHouse server when write failing.
-
-!!! tip "Since 0.1.0 - spark.clickhouse.write.repartitionNum"
-
-    Default Value: 0
-
-    Description: Repartition data to meet the distributions of ClickHouse table is required before writing, use this
-                 conf to specific the repartition number, value less than 1 mean no requirement.
-
-!!! tip "Since 0.3.0 - spark.clickhouse.write.repartitionByPartition"
-
-    Default Value: true
-
-    Description: Whether to repartition data by ClickHouse partition keys to meet the distributions of ClickHouse table
-                 before writing.
-
-!!! tip "Since 0.3.0 - spark.clickhouse.write.repartitionStrictly"
-
-    Default Value: false
-
-    Description: If true, Spark will strictly distribute incoming records across partitions to satisfy
-                 the required distribution before passing the records to the data source table on write.
-                 Otherwise, Spark may apply certain optimizations to speed up the query but break the
-                 distribution requirement. Note, this configuration requires SPARK-37523, w/o this patch,
-                 it always act as `true`.
-
-!!! tip "Since 0.1.0 - spark.clickhouse.write.distributed.useClusterNodes"
-
-    Default Value: true
-
-    Description: Write to all nodes of cluster when writing Distributed table.
-
-!!! tip "Since 0.1.0 - spark.clickhouse.read.distributed.useClusterNodes"
-
-    Default Value: false
-
-    Description: Read from all nodes of cluster when reading Distributed table.
-
-!!! tip "Since 0.1.0 - spark.clickhouse.write.distributed.convertLocal"
-
-    Default Value: false
-
-    Description: When writing Distributed table, write local table instead of itself. If `true`, ignore
-                 `write.distributed.useClusterNodes`.
-
-!!! tip "Since 0.1.0 - spark.clickhouse.read.distributed.convertLocal"
-
-    Default Value: true
-
-    Description: When reading Distributed table, read local table instead of itself. If `true`, ignore
-                 `read.distributed.useClusterNodes`.
-
-!!! tip "Since 0.4.0 - spark.clickhouse.read.splitByPartitionId"
-
-    Default Value: true
-
-    Description: If `true`, construct input partition filter by virtual column `_partition_id`,
-                 instead of partition value. There are known bugs to assemble SQL predication by
-                 partition value. This feature requires ClickHouse Server v21.6+.
-
-!!! tip "Since 0.3.0 - spark.clickhouse.write.localSortByPartition"
-
-    Default Value: `spark.clickhouse.write.repartitionByPartition`
-
-    Description: If `true`, do local sort by partition before writing.
-
-!!! tip "Since 0.3.0 - spark.clickhouse.write.localSortByKey"
-
-    Default Value: true
-
-    Description: If `true`, do local sort by sort keys before writing.
-
-!!! tip "Since 0.4.0 - spark.clickhouse.ignoreUnsupportedTransform"
-
-    Default Value: false
-
-    Description: ClickHouse supports using complex expressions as sharding keys or partition values,
-                 e.g. `cityHash64(col_1, col_2)`, and those can not be supported by Spark now. If `true`,
-                 ignore the unsupported expressions, otherwise fail fast w/ an exception. Note: when
-                 `spark.clickhouse.write.distributed.convertLocal` is enabled, ignore unsupported sharding keys
-                 may corrupt the data.
-
-!!! tip "Since 0.5.0 - spark.clickhouse.read.compression.codec"
-
-    Default Value: lz4
-
-    Description: The codec used to decompress data for reading. Supported codecs: none, lz4.
-
-!!! tip "Since 0.3.0 - spark.clickhouse.write.compression.codec"
-
-    Default Value: lz4
-
-    Description: The codec used to compress data for writing. Supported codecs: none, lz4.
-
-!!! tip "Since 0.6.0 - spark.clickhouse.read.format"
-
-    Default Value: json
-
-    Description: Serialize format for reading. Supported formats: json, binary.
-
-!!! tip "Since 0.4.0 - spark.clickhouse.write.format"
-
-    Default Value: arrow
-
-    Description: Serialize format for writing. Supported formats: json, arrow.
+<!--begin-include-->
+|Key | Default | Description | Since
+|--- | ------- | ----------- | -----
+spark.clickhouse.ignoreUnsupportedTransform|false|ClickHouse supports using complex expressions as sharding keys or partition values, e.g. `cityHash64(col_1, col_2)`, and those can not be supported by Spark now. If `true`, ignore the unsupported expressions, otherwise fail fast w/ an exception. Note: when `spark.clickhouse.write.distributed.convertLocal` is enabled, ignore unsupported sharding keys may corrupt the data.|0.4.0
+spark.clickhouse.read.compression.codec|lz4|The codec used to decompress data for reading. Supported codecs: none, lz4.|0.5.0
+spark.clickhouse.read.distributed.convertLocal|true|When reading Distributed table, read local table instead of itself. If `true`, ignore `spark.clickhouse.read.distributed.useClusterNodes`.|0.1.0
+spark.clickhouse.read.format|json|Serialize format for reading. Supported formats: json, binary|0.6.0
+spark.clickhouse.read.splitByPartitionId|true|If `true`, construct input partition filter by virtual column `_partition_id`, instead of partition value. There are known bugs to assemble SQL predication by partition value. This feature requires ClickHouse Server v21.6+|0.4.0
+spark.clickhouse.write.batchSize|10000|The number of records per batch on writing to ClickHouse.|0.1.0
+spark.clickhouse.write.compression.codec|lz4|The codec used to compress data for writing. Supported codecs: none, lz4.|0.3.0
+spark.clickhouse.write.distributed.convertLocal|false|When writing Distributed table, write local table instead of itself. If `true`, ignore `spark.clickhouse.write.distributed.useClusterNodes`.|0.1.0
+spark.clickhouse.write.distributed.useClusterNodes|true|Write to all nodes of cluster when writing Distributed table.|0.1.0
+spark.clickhouse.write.format|arrow|Serialize format for writing. Supported formats: json, arrow|0.4.0
+spark.clickhouse.write.localSortByKey|true|If `true`, do local sort by sort keys before writing.|0.3.0
+spark.clickhouse.write.localSortByPartition|<value of spark.clickhouse.write.repartitionByPartition>|If `true`, do local sort by partition before writing. If not set, it equals to `spark.clickhouse.write.repartitionByPartition`.|0.3.0
+spark.clickhouse.write.maxRetry|3|The maximum number of write we will retry for a single batch write failed with retryable codes.|0.1.0
+spark.clickhouse.write.repartitionByPartition|true|Whether to repartition data by ClickHouse partition keys to meet the distributions of ClickHouse table before writing.|0.3.0
+spark.clickhouse.write.repartitionNum|0|Repartition data to meet the distributions of ClickHouse table is required before writing, use this conf to specific the repartition number, value less than 1 mean no requirement.|0.1.0
+spark.clickhouse.write.repartitionStrictly|false|If `true`, Spark will strictly distribute incoming records across partitions to satisfy the required distribution before passing the records to the data source table on write. Otherwise, Spark may apply certain optimizations to speed up the query but break the distribution requirement. Note, this configuration requires SPARK-37523, w/o this patch, it always act as `true`.|0.3.0
+spark.clickhouse.write.retryInterval|10s|The interval in seconds between write retry.|0.1.0
+spark.clickhouse.write.retryableErrorCodes|241|The retryable error codes returned by ClickHouse server when write failing.|0.1.0
+<!--end-include-->
@@ -1,29 +1,37 @@
 ---
+hide:
+  - navigation
 license: |
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
+
       https://www.apache.org/licenses/LICENSE-2.0
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
-  
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  
   See the License for the specific language governing permissions and
   limitations under the License.
 ---
 
 Configurations
 ===
 
-## TODO
+## Catalog Configurations
+
+{!
+include-markdown "./01_catalog_configurations.md"
+start="<!--begin-include-->"
+end="<!--end-include-->"
+!}
 
-## Overwrite SQL Configurations
+## SQL Configurations
 
-Your can overwrite [ClickHouse SQL Configurations](./02_sql_configurations.md) by editing
-`$SPARK_HOME/conf/spark-defaults.conf`, e.g.
+SQL Configurations could be overwritten by `SET <key>=<value>` in runtime.
 
-```
-spark.clickhouse.write.batchSize          10000
-spark.clickhouse.write.maxRetry           2
-```
+{!
+include-markdown "./02_sql_configurations.md"
+start="<!--begin-include-->"
+end="<!--end-include-->"
+!}
@@ -3,12 +3,12 @@ license: |
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
+  
       https://www.apache.org/licenses/LICENSE-2.0
+  
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
-  
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  
   See the License for the specific language governing permissions and
   limitations under the License.
 ---
 
@@ -3,12 +3,12 @@ license: |
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
+  
       https://www.apache.org/licenses/LICENSE-2.0
+  
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
-  
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  
   See the License for the specific language governing permissions and
   limitations under the License.
 ---
 
@@ -3,12 +3,12 @@ license: |
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
+  
       https://www.apache.org/licenses/LICENSE-2.0
+  
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
-  
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  
   See the License for the specific language governing permissions and
   limitations under the License.
 ---
 
@@ -3,12 +3,12 @@ license: |
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
+  
       https://www.apache.org/licenses/LICENSE-2.0
+  
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
-  
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  
   See the License for the specific language governing permissions and
   limitations under the License.
 ---