Skip to content

Commit 0f577fa

Browse files
committed
compress: Improve get*Stats functions when dealing with uncompressed columns
- compress: Fix returned table from .compress.partition - compress: Allow 'COMP_ALL' in .compress.partition instead of specifying all tables - file.kdb: .file.kdb.getListLength -> .file.kdb.getLength - file.kdb: Fallback to 'count get' for unsupported lists and atoms - type: Correctly append "/" before checking if splayed table
1 parent a6099b6 commit 0f577fa

File tree

4 files changed

+39
-33
lines changed

4 files changed

+39
-33
lines changed

src/compress.q

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66

77
/ Schemas returned by .compress.getSplayStats, .compress.getPartitionStats and .compress.splay
88
.compress.cfg.schemas:(`symbol$())!();
9-
.compress.cfg.schemas[`infoSplay]: flip `column`compressedLength`uncompressedLength`algorithm`logicalBlockSize`zipLevel!"SJJIII"$\:();
10-
.compress.cfg.schemas[`infoPartition]: flip `part`table`column`compressedLength`uncompressedLength`algorithm`logicalBlockSize`zipLevel!"*SSJJIII"$\:();
11-
.compress.cfg.schemas[`compSplay]: flip `col`source`target`compressed`inplace`empty`writeMode!"SSSBBBS"$\:();
9+
.compress.cfg.schemas[`infoSplay]: flip `column`compressedLength`uncompressedLength`compressMode`algorithm`logicalBlockSize`zipLevel!"SJJSIII"$\:();
10+
.compress.cfg.schemas[`infoPartition]: flip `part`table`column`compressedLength`uncompressedLength`compressMode`algorithm`logicalBlockSize`zipLevel!"*SSJJSIII"$\:();
11+
.compress.cfg.schemas[`compSplay]: flip `column`source`target`compressed`inplace`empty`writeMode!"SSSBBBS"$\:();
12+
.compress.cfg.schemas[`compPartition]: flip `part`table`column`source`target`compressed`inplace`empty`writeMode!"*SSSSBBBS"$\:();
1213

1314
/ Splay and partition compression option defaults provide the following behaviour
1415
/ - recompress (0b): Any compressed files will be copied
@@ -30,32 +31,30 @@
3031
.compress.init:{};
3132

3233

33-
/ NOTE: Columns that are uncompressed will have null values for all information in the returned table
34+
/ NOTE: Columns that are uncompressed will have a null 'compressed' value
3435
/ @param splayPath (FolderPath) A folder path of a splayed table
3536
/ @returns (Table) The compressed stats (via -21!) of each column within the specified splay path
3637
/ @throws InvalidSplayPathException If the specified splay path does not exist, or does not contain a splayed table
3738
/ @see .compress.cfg.schemas
3839
.compress.getSplayStats:{[splayPath]
39-
/ Needs a trailing slash for .Q.qp (in .type.isSplayedTable) to work correctly
40-
if[not "/" = last string splayPath;
41-
splayPath:` sv splayPath,`;
42-
];
43-
4440
if[not[.type.isFolder splayPath] | not .type.isSplayedTable splayPath;
4541
'"InvalidSplayPathException";
4642
];
4743

4844
splayCols:cols splayPath;
4945

5046
compressStats:-21!/:` sv/: splayPath,/:splayCols;
47+
compressStats:(`algorithm`logicalBlockSize`zipLevel!0 0 0i) ^/: compressStats;
5148

52-
statsTbl:.compress.cfg.schemas[`infoSplay] upsert/ compressStats;
49+
statsTbl:.compress.cfg.schemas[`infoSplay] upsert compressStats;
5350
statsTbl:update column:splayCols from statsTbl;
51+
statsTbl:update uncompressedLength:hcount each (` sv/: splayPath,/: column) from statsTbl where null uncompressedLength;
52+
statsTbl:update compressMode:key[.compress.defaults] algorithm from statsTbl;
5453
:statsTbl;
5554
};
5655

57-
/ NOTE: Columns that are uncompressed will have null values for all information in the returned table
58-
/ @param hdbRoot (FolderPath) The root folder containing a partitioned HDB
56+
/ NOTE: Columns that are uncompressed will have a null 'compressed' value
57+
/ @param hdbRoot (FolderPath) The root folder containing a partitioned HDB or the HDB segment if a segmented HDB
5958
/ @param partVal (Date|Month|Year|Long) The specific partition within the HDB to retrieve compression stats for
6059
/ @returns (Table) The compression stats (via -21!) of each column within each table within the specified HDB partition
6160
/ @throws InvalidHdbRootException If the specified HDB root folder does not exist
@@ -83,7 +82,7 @@
8382
/ Based on the specified parameters, the functions behaviour (returned in the 'writeMode' column) for each column will be:
8483
/ - 'compress': The file is uncompressed, or is compressed and the 'recompress' option is true
8584
/ - 'copy': The file is either empty (0 = count) or is already compressed and the 'recompress' option is missing or false
86-
/ - 'ignore': The would've been copied (as above) but inplace
85+
/ - 'ignore': The file would've been copied (as above) but inplace so nothing to do
8786
/ @param sourceSplayPath (FolderPath) The source splay
8887
/ @param targetSplayPath (FolderPath) The target splay. This can be the same as 'sourceSplayPath' ONLY if the 'inplace' option is set to true
8988
/ @param compressType (Symbol|IntegerList) The compression type. If a symbol, the compression settings will be taken from '.compress.defaults'
@@ -99,8 +98,7 @@
9998
.compress.splay:{[sourceSplayPath; targetSplayPath; compressType; options]
10099
options:.compress.cfg.compressDefaults ^ options;
101100

102-
/ Needs a trailing slash for .Q.qp (in .type.isSplayedTable) to work correctly
103-
if[not .type.isSplayedTable ` sv sourceSplayPath,`;
101+
if[not .type.isSplayedTable sourceSplayPath;
104102
'"InvalidSourceSplayPathException";
105103
];
106104

@@ -129,11 +127,11 @@
129127
];
130128

131129

132-
compressCfg:.compress.cfg.schemas[`compSplay] upsert flip enlist[`col]!enlist cols sourceSplayPath;
133-
compressCfg:update source:(` sv/: sourceSplayPath,/: col), target:(` sv/: targetSplayPath,/: col) from compressCfg;
130+
compressCfg:.compress.cfg.schemas[`compSplay] upsert flip enlist[`column]!enlist cols sourceSplayPath;
131+
compressCfg:update source:(` sv/: sourceSplayPath,/: column), target:(` sv/: targetSplayPath,/: column) from compressCfg;
134132
compressCfg:update compressed:.file.isCompressed each source from compressCfg;
135133
compressCfg:update empty:0 = count first .Q.V sourceSplayPath from compressCfg;
136-
compressCfg:update inplace:source=target from compressCfg;
134+
compressCfg:update inplace:source = target from compressCfg;
137135

138136
compressCfg:update writeMode:`compress`copy compressed from compressCfg;
139137
compressCfg:update writeMode:`ignore from compressCfg where inplace, writeMode = `copy;
@@ -146,6 +144,8 @@
146144

147145

148146
.log.if.info ("Starting splay table compression [ Source: {} ] [ Target: {} ] [ Compression: {} ]"; sourceSplayPath; targetSplayPath; compressType);
147+
.log.if.trace "Compression configuration:\n",.Q.s compressCfg;
148+
149149
st:.time.now[];
150150

151151
.file.ensureDir targetSplayPath;
@@ -176,7 +176,7 @@
176176
/ @param sourceRoot (FolderPath) The path of the source HDB
177177
/ @param targetRoot (FolderPath) The path of the target HDB
178178
/ @param partVal (Date|Month|Year|Long) The specific partition within the HDB to compress
179-
/ @param tbls (SymbolList) The list of tables in the partition to compress
179+
/ @param tbls (Symbol|SymbolList) The list of tables in the partition to compress. If `COMP_ALL` is specified, all tables in the partition will be compressed
180180
/ @param compressType (Symbol|IntegerList) See '.compress.splay'
181181
/ @param options (Dict) See '.compress.splay', 'srcParTxt' / 'tgtParTxt' - set to false to ignore 'par.txt' in source or target HDBs respectively
182182
/ @throws SourceHdbPartitionDoesNotExistException If the specified source HDB does not exist
@@ -201,8 +201,11 @@
201201
'"SourceHdbPartitionDoesNotExistException";
202202
];
203203

204+
srcTables:.file.ls srcPartPath;
204205

205-
srcTables:tbls inter .file.ls srcPartPath;
206+
if[.type.isSymbolList tbls;
207+
srcTables:tbls inter srcTables;
208+
];
206209

207210
srcTblPaths:` sv/: srcPartPath,/:srcTables;
208211
tgtTblPaths:` sv/: tgtPartPath,/:srcTables;
@@ -211,6 +214,9 @@
211214
st:.time.now[];
212215

213216
compressCfg:.compress.splay[;; compressType; options]'[srcTblPaths; tgtTblPaths];
217+
compressCfg:(flip each enlist[`table]!/:enlist each (count each compressCfg)#'srcTables),''compressCfg;
218+
compressCfg:.compress.cfg.schemas[`compPartition] upsert raze compressCfg;
219+
compressCfg:update part:partVal from compressCfg;
214220

215221
.log.if.info ("HDB partition compression complete [ Source HDB: {} ] [ Target HDB: {} ] [ Partition: {} ] [ Tables: {} ] [ Compression Type: {} ] [ Time Taken: {} ]"; sourceRoot; targetRoot; partVal; srcTables; compressType; .time.now[] - st);
216222

src/file.kdb.q

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,19 @@
22
// Copyright (c) 2021 Jaskirat Rajasansir
33

44

5-
/ Optimised list element length function. Only requires reading the first 16 bytes of the specified file
6-
/ to return the length (instead of "count get")
5+
/ Optimised element length function. Only requires reading the first 16 bytes of the specified file
6+
/ to return the length (instead of "count get").
7+
/ NOTE: Optimised code path currently only works for single-type lists (excluding new format GUID lists)
78
/ @param list (FilePath) The list file to return the element size
89
/ @returns (Long) The list length
9-
/ @throws nyi For list types not supported. Currently - new format GUIDs, anymap lists
10-
/ @throws NotAListException If a non-list file is supplied
11-
.file.kdb.getListLength:{[list]
10+
.file.kdb.getLength:{[list]
1211
header:read1 (list; 0; 16);
1312

1413
lType:header 2;
1514

16-
$[lType = 0h;
17-
'"nyi";
18-
lType > count .Q.t;
19-
'"NotAListException"
20-
];
21-
22-
$[11h = lType;
15+
$[(0h = lType) | lType > count .Q.t;
16+
:count get list;
17+
11h = lType;
2318
:`long$0x0 sv reverse header 4 5 6 7;
2419
/ else
2520
:0x0 sv reverse header 8 9 10 11 12 13 14 15

src/require.q

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@
232232
};
233233

234234
/ Standard out logger
235-
.require.i.log: ('[-1; .require.i.parameterisedLog])
235+
.require.i.log: ('[-1; .require.i.parameterisedLog]);
236236

237237
/ Standard error logger
238238
.require.i.logE:('[-2; .require.i.parameterisedLog]);

src/type.q

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,16 @@
6262
:0 < count keys x;
6363
};
6464

65+
/ Supports checking a folder path without being loaded via system "l"
6566
.type.isSplayedTable:{
6667
if[.type.isFilePath x;
6768
if[not .type.isFolder x;
6869
:0b;
6970
];
71+
72+
if[not "/" = last string x;
73+
x:` sv x,`;
74+
];
7075
];
7176

7277
:0b~.Q.qp $[.type.isSymbol x;get;::] x;

0 commit comments

Comments
 (0)