Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions packages/sparql-qlever/src/importer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -252,15 +252,30 @@ export class Importer implements ImporterInterface {
const metadataFile = `${this.options.indexName}.meta-data.json`;
const localName = basename(file);
const decompressCommand = localName.toLowerCase().endsWith('.zip')
? `unzip -p '${localName}'`
: `(gunzip -c '${localName}' 2>/dev/null || cat '${localName}')`;
? `unzip -p ${shellQuote(localName)}`
: `(gunzip -c ${shellQuote(localName)} 2>/dev/null || cat ${shellQuote(localName)})`;
const indexTask = await this.options.taskRunner.run(
`${decompressCommand} | qlever-index ${flags} && cat ${metadataFile}`,
`${decompressCommand} | qlever-index ${flags} && cat ${shellQuote(metadataFile)}`,
);
return await this.options.taskRunner.wait(indexTask);
}
}

/**
* POSIX-quote a value for safe interpolation into a shell command: wrap it in
* single quotes and escape any embedded single quote as `'\''`.
*
* Without this, a data filename containing an apostrophe — e.g. a dataset
* titled `'s-Hertogenbosch`, whose distribution URL maps to a local file like
* `…Erfgoed+'s-Hertogenbosch.nt` — would terminate the surrounding quotes, so
* `cat`/`gunzip` would read a non-existent path and feed `qlever-index` empty
* input. The index then "succeeds" with 0 triples, the import is treated as
* failed, and every distribution (and the JSON-LD fallback) fails the same way.
*/
function shellQuote(value: string): string {
return `'${value.replace(/'/g, "'\\''")}'`;
}

type fileFormat = 'nt' | 'nq' | 'ttl';

/**
Expand Down
43 changes: 43 additions & 0 deletions packages/sparql-qlever/test/importer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import {
utimes,
} from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { execFileSync } from 'node:child_process';
import { TaskRunner } from '@lde/task-runner';

function makeDistributions(): Distribution[] {
Expand Down Expand Up @@ -331,6 +332,48 @@ describe('Importer', () => {
]);
});

it('shell-escapes data filenames containing single quotes', async () => {
// A dataset titled e.g. "'s-Hertogenbosch" maps to a local filename with
// an apostrophe. Naive single-quoting (`cat '<name>'`) lets the apostrophe
// terminate the quote, so cat/gunzip read a non-existent path and feed
// qlever-index empty input — silently indexing 0 triples.
const trickyFile = join(
tempDir,
"Dataset+Beeldbank+Erfgoed+'s-Hertogenbosch.ttl",
);
await copyFile(dataFile, trickyFile);

const runner = stubTaskRunner(42);
const importer = new Importer({
taskRunner: runner,
indexName,
downloader: {
async download() {
return { path: trickyFile, headers: new Headers() };
},
},
});

const result = await importer.import(makeDistributions());

expect(result).toBeInstanceOf(ImportSuccessful);

const command = runner.commands[0];
// The apostrophe is POSIX-escaped, not left to break the quoting.
expect(command).toContain(
"Dataset+Beeldbank+Erfgoed+'\\''s-Hertogenbosch.ttl",
);
expect(command).not.toContain("Erfgoed+'s"); // the broken, unescaped form

// And the decompress sub-command actually reads the real file when a shell
// runs it — this is what produced empty input (0 triples) before the fix.
const decompress = command.slice(0, command.indexOf('| qlever-index'));
const output = execFileSync('sh', ['-c', decompress], {
cwd: tempDir,
}).toString();
expect(output).toContain('http://example.com/source');
});

it('falls back to file extension when Content-Type is a compression type', async () => {
const nqFile = join(tempDir, 'data.nq');
await copyFile(dataFile, nqFile);
Expand Down
6 changes: 3 additions & 3 deletions packages/sparql-qlever/vite.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ export default mergeConfig(
coverage: {
thresholds: {
autoUpdate: true,
lines: 93.81,
lines: 95.55,
functions: 100,
branches: 80.64,
statements: 93.87,
branches: 83.87,
statements: 95.6,
},
},
},
Expand Down