diff --git a/packages/sparql-qlever/src/importer.ts b/packages/sparql-qlever/src/importer.ts index 6f6cf383..c6078f34 100644 --- a/packages/sparql-qlever/src/importer.ts +++ b/packages/sparql-qlever/src/importer.ts @@ -252,15 +252,30 @@ export class Importer implements ImporterInterface { const metadataFile = `${this.options.indexName}.meta-data.json`; const localName = basename(file); const decompressCommand = localName.toLowerCase().endsWith('.zip') - ? `unzip -p '${localName}'` - : `(gunzip -c '${localName}' 2>/dev/null || cat '${localName}')`; + ? `unzip -p ${shellQuote(localName)}` + : `(gunzip -c ${shellQuote(localName)} 2>/dev/null || cat ${shellQuote(localName)})`; const indexTask = await this.options.taskRunner.run( - `${decompressCommand} | qlever-index ${flags} && cat ${metadataFile}`, + `${decompressCommand} | qlever-index ${flags} && cat ${shellQuote(metadataFile)}`, ); return await this.options.taskRunner.wait(indexTask); } } +/** + * POSIX-quote a value for safe interpolation into a shell command: wrap it in + * single quotes and escape any embedded single quote as `'\''`. + * + * Without this, a data filename containing an apostrophe — e.g. a dataset + * titled `'s-Hertogenbosch`, whose distribution URL maps to a local file like + * `…Erfgoed+'s-Hertogenbosch.nt` — would terminate the surrounding quotes, so + * `cat`/`gunzip` would read a non-existent path and feed `qlever-index` empty + * input. The index then "succeeds" with 0 triples, the import is treated as + * failed, and every distribution (and the JSON-LD fallback) fails the same way. + */ +function shellQuote(value: string): string { + return `'${value.replace(/'/g, "'\\''")}'`; +} + type fileFormat = 'nt' | 'nq' | 'ttl'; /** diff --git a/packages/sparql-qlever/test/importer.test.ts b/packages/sparql-qlever/test/importer.test.ts index 94d3ee89..847d6b3c 100644 --- a/packages/sparql-qlever/test/importer.test.ts +++ b/packages/sparql-qlever/test/importer.test.ts @@ -13,6 +13,7 @@ import { utimes, } from 'node:fs/promises'; import { tmpdir } from 'node:os'; +import { execFileSync } from 'node:child_process'; import { TaskRunner } from '@lde/task-runner'; function makeDistributions(): Distribution[] { @@ -331,6 +332,48 @@ describe('Importer', () => { ]); }); + it('shell-escapes data filenames containing single quotes', async () => { + // A dataset titled e.g. "'s-Hertogenbosch" maps to a local filename with + // an apostrophe. Naive single-quoting (`cat ''`) lets the apostrophe + // terminate the quote, so cat/gunzip read a non-existent path and feed + // qlever-index empty input — silently indexing 0 triples. + const trickyFile = join( + tempDir, + "Dataset+Beeldbank+Erfgoed+'s-Hertogenbosch.ttl", + ); + await copyFile(dataFile, trickyFile); + + const runner = stubTaskRunner(42); + const importer = new Importer({ + taskRunner: runner, + indexName, + downloader: { + async download() { + return { path: trickyFile, headers: new Headers() }; + }, + }, + }); + + const result = await importer.import(makeDistributions()); + + expect(result).toBeInstanceOf(ImportSuccessful); + + const command = runner.commands[0]; + // The apostrophe is POSIX-escaped, not left to break the quoting. + expect(command).toContain( + "Dataset+Beeldbank+Erfgoed+'\\''s-Hertogenbosch.ttl", + ); + expect(command).not.toContain("Erfgoed+'s"); // the broken, unescaped form + + // And the decompress sub-command actually reads the real file when a shell + // runs it — this is what produced empty input (0 triples) before the fix. + const decompress = command.slice(0, command.indexOf('| qlever-index')); + const output = execFileSync('sh', ['-c', decompress], { + cwd: tempDir, + }).toString(); + expect(output).toContain('http://example.com/source'); + }); + it('falls back to file extension when Content-Type is a compression type', async () => { const nqFile = join(tempDir, 'data.nq'); await copyFile(dataFile, nqFile); diff --git a/packages/sparql-qlever/vite.config.ts b/packages/sparql-qlever/vite.config.ts index 6ebd10a8..45abe22c 100644 --- a/packages/sparql-qlever/vite.config.ts +++ b/packages/sparql-qlever/vite.config.ts @@ -13,10 +13,10 @@ export default mergeConfig( coverage: { thresholds: { autoUpdate: true, - lines: 93.81, + lines: 95.55, functions: 100, - branches: 80.64, - statements: 93.87, + branches: 83.87, + statements: 95.6, }, }, },