diff --git a/.gitignore b/.gitignore index 968b524..42b927c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,5 +4,5 @@ .DS_Store .vscode/ node_modules/ -.env +.env* .claude diff --git a/scripts/sync-skills.sh b/scripts/sync-skills.sh index 26eec12..c91cc88 100755 --- a/scripts/sync-skills.sh +++ b/scripts/sync-skills.sh @@ -79,7 +79,7 @@ for i in "${!SKILL_NAMES[@]}"; do src="${SKILL_PATHS[$i]}" dst="$DEST/$name" mkdir -p "$dst" - rsync -a --delete "$src/" "$dst/" + rsync -a --delete --exclude node_modules/ --exclude .env "$src/" "$dst/" echo "[sync-skills] synced: $name" done diff --git a/skills/vault-x-bookmarks/SKILL.md b/skills/vault-x-bookmarks/SKILL.md new file mode 100644 index 0000000..87b196e --- /dev/null +++ b/skills/vault-x-bookmarks/SKILL.md @@ -0,0 +1,114 @@ +--- +name: vault-x-bookmarks +description: + Review a bounded slice of X bookmarks via the X API and capture selected + bookmarks as external source records in raw/sources +--- + +# Vault X Bookmarks + +Use the bundled TypeScript helper to review a bounded slice of the authenticated +user's X bookmarks and capture selected items into `raw/sources/`. + +This is source-first: the helper writes `external` source records, records +captured bookmark IDs in `raw/state/x-bookmarks/`, and leaves organization to a +follow-up `vault-ingest` run. + +## Command + +Run from this skill directory: + +```sh +npx tsx scripts/x-bookmarks.ts [--limit N] [--max-pages N] [--head-pages N] [--path PATH] +``` + +## Parameters + +- `--limit N` (default: `15`, range: `1..100`): maximum selected bookmarks to + evaluate and capture in one run. +- `--max-pages N` (default: `10`, range: `1..100`): maximum bookmark pages to + fetch across the head scan and catch-up scan. +- `--head-pages N` (default: `2`, range: `1..100`): newest bookmark pages to + scan before resuming backlog pagination. +- `--path PATH` (default: current working directory): vault root containing + `raw/sources/` and `raw/state/x-bookmarks/`. + +There is no report mode. This helper is apply-only and bounded by `--limit`. It +writes source records, appends reviewed/run state, and updates the catch-up +checkpoint after successful runs. + +## Authentication + +Install `xurl`, authenticate it with OAuth 2.0 for the X account whose bookmarks +should be captured, and verify it before running the helper: + +```sh +xurl auth status +xurl whoami +``` + +Required scopes: + +- `bookmark.read` +- `tweet.read` +- `users.read` +- `offline.access` + +Do not use an app-only bearer token. The helper resolves the authenticated user +through `xurl whoami`; do not pass a manual user ID. + +## Behavior + +1. Load reviewed IDs from `raw/state/x-bookmarks/reviewed.jsonl`. +2. Fetch bookmark pages by shelling out to `xurl --auth oauth2`. +3. Always scan newest `--head-pages` first so newly saved bookmarks are found + even after backlog has been processed. +4. If backlog remains and the head scan does not fill `--limit`, continue from + the saved catch-up pagination token until `--max-pages` or `--limit` is + reached. +5. Advance the saved catch-up token only past pages whose unreviewed bookmarks + have all been captured or recorded; if `--limit` stops mid-page, save the + token for that same page. +6. Select the oldest unreviewed bookmarks reachable in the combined scan, up to + `--limit`. +7. Process the selected slice oldest-to-newest. +8. Capture every selected bookmark to `raw/sources/` as markdown tagged + `external`. +9. Append every captured bookmark to `reviewed.jsonl`. +10. Append the run summary to `runs.jsonl` and update `checkpoint.json` only + after reviewed entries are durable. + +Never mutate X bookmarks. Never route captured files directly into permanent +vault folders. + +## Capture Policy + +Capture every selected bookmark. Do not apply regex scoring, string scoring, or +filtering; the user's bookmark action is the relevance signal. Manually delete +unwanted source records after a run. + +## Output + +The command prints a JSON summary with the authenticated X user ID, pages +fetched, evaluated and captured counts, created source paths, state files +updated, and errors or partial failures. + +## Examples + +Regular run: + +```sh +npx tsx scripts/x-bookmarks.ts --limit 15 +``` + +Catch up more backlog in one run: + +```sh +npx tsx scripts/x-bookmarks.ts --limit 75 --max-pages 25 --head-pages 2 +``` + +Override the vault path: + +```sh +npx tsx scripts/x-bookmarks.ts --path /path/to/vault --limit 20 +``` diff --git a/skills/vault-x-bookmarks/package-lock.json b/skills/vault-x-bookmarks/package-lock.json new file mode 100644 index 0000000..b32f4dd --- /dev/null +++ b/skills/vault-x-bookmarks/package-lock.json @@ -0,0 +1,603 @@ +{ + "name": "vault-x-bookmarks", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "vault-x-bookmarks", + "dependencies": { + "dotenv": "^17.4.2" + }, + "devDependencies": { + "@types/node": "^25.6.0", + "tsx": "^4.21.0", + "typescript": "^6.0.3" + } + }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.7.tgz", + "integrity": "sha512-EKX3Qwmhz1eMdEJokhALr0YiD0lhQNwDqkPYyPhiSwKrh7/4KRjQc04sZ8db+5DVVnZ1LmbNDI1uAMPEUBnQPg==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.7.tgz", + "integrity": "sha512-jbPXvB4Yj2yBV7HUfE2KHe4GJX51QplCN1pGbYjvsyCZbQmies29EoJbkEc+vYuU5o45AfQn37vZlyXy4YJ8RQ==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.7.tgz", + "integrity": "sha512-62dPZHpIXzvChfvfLJow3q5dDtiNMkwiRzPylSCfriLvZeq0a1bWChrGx/BbUbPwOrsWKMn8idSllklzBy+dgQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.7.tgz", + "integrity": "sha512-x5VpMODneVDb70PYV2VQOmIUUiBtY3D3mPBG8NxVk5CogneYhkR7MmM3yR/uMdITLrC1ml/NV1rj4bMJuy9MCg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.7.tgz", + "integrity": "sha512-5lckdqeuBPlKUwvoCXIgI2D9/ABmPq3Rdp7IfL70393YgaASt7tbju3Ac+ePVi3KDH6N2RqePfHnXkaDtY9fkw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.7.tgz", + "integrity": "sha512-rYnXrKcXuT7Z+WL5K980jVFdvVKhCHhUwid+dDYQpH+qu+TefcomiMAJpIiC2EM3Rjtq0sO3StMV/+3w3MyyqQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.7.tgz", + "integrity": "sha512-B48PqeCsEgOtzME2GbNM2roU29AMTuOIN91dsMO30t+Ydis3z/3Ngoj5hhnsOSSwNzS+6JppqWsuhTp6E82l2w==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.7.tgz", + "integrity": "sha512-jOBDK5XEjA4m5IJK3bpAQF9/Lelu/Z9ZcdhTRLf4cajlB+8VEhFFRjWgfy3M1O4rO2GQ/b2dLwCUGpiF/eATNQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.7.tgz", + "integrity": "sha512-RkT/YXYBTSULo3+af8Ib0ykH8u2MBh57o7q/DAs3lTJlyVQkgQvlrPTnjIzzRPQyavxtPtfg0EopvDyIt0j1rA==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.7.tgz", + "integrity": "sha512-RZPHBoxXuNnPQO9rvjh5jdkRmVizktkT7TCDkDmQ0W2SwHInKCAV95GRuvdSvA7w4VMwfCjUiPwDi0ZO6Nfe9A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.7.tgz", + "integrity": "sha512-GA48aKNkyQDbd3KtkplYWT102C5sn/EZTY4XROkxONgruHPU72l+gW+FfF8tf2cFjeHaRbWpOYa/uRBz/Xq1Pg==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.7.tgz", + "integrity": "sha512-a4POruNM2oWsD4WKvBSEKGIiWQF8fZOAsycHOt6JBpZ+JN2n2JH9WAv56SOyu9X5IqAjqSIPTaJkqN8F7XOQ5Q==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.7.tgz", + "integrity": "sha512-KabT5I6StirGfIz0FMgl1I+R1H73Gp0ofL9A3nG3i/cYFJzKHhouBV5VWK1CSgKvVaG4q1RNpCTR2LuTVB3fIw==", + "cpu": [ + "mips64el" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.7.tgz", + "integrity": "sha512-gRsL4x6wsGHGRqhtI+ifpN/vpOFTQtnbsupUF5R5YTAg+y/lKelYR1hXbnBdzDjGbMYjVJLJTd2OFmMewAgwlQ==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.7.tgz", + "integrity": "sha512-hL25LbxO1QOngGzu2U5xeXtxXcW+/GvMN3ejANqXkxZ/opySAZMrc+9LY/WyjAan41unrR3YrmtTsUpwT66InQ==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.7.tgz", + "integrity": "sha512-2k8go8Ycu1Kb46vEelhu1vqEP+UeRVj2zY1pSuPdgvbd5ykAw82Lrro28vXUrRmzEsUV0NzCf54yARIK8r0fdw==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.7.tgz", + "integrity": "sha512-hzznmADPt+OmsYzw1EE33ccA+HPdIqiCRq7cQeL1Jlq2gb1+OyWBkMCrYGBJ+sxVzve2ZJEVeePbLM2iEIZSxA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.7.tgz", + "integrity": "sha512-b6pqtrQdigZBwZxAn1UpazEisvwaIDvdbMbmrly7cDTMFnw/+3lVxxCTGOrkPVnsYIosJJXAsILG9XcQS+Yu6w==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.7.tgz", + "integrity": "sha512-OfatkLojr6U+WN5EDYuoQhtM+1xco+/6FSzJJnuWiUw5eVcicbyK3dq5EeV/QHT1uy6GoDhGbFpprUiHUYggrw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.7.tgz", + "integrity": "sha512-AFuojMQTxAz75Fo8idVcqoQWEHIXFRbOc1TrVcFSgCZtQfSdc1RXgB3tjOn/krRHENUB4j00bfGjyl2mJrU37A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.7.tgz", + "integrity": "sha512-+A1NJmfM8WNDv5CLVQYJ5PshuRm/4cI6WMZRg1by1GwPIQPCTs1GLEUHwiiQGT5zDdyLiRM/l1G0Pv54gvtKIg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openharmony-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.7.tgz", + "integrity": "sha512-+KrvYb/C8zA9CU/g0sR6w2RBw7IGc5J2BPnc3dYc5VJxHCSF1yNMxTV5LQ7GuKteQXZtspjFbiuW5/dOj7H4Yw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.7.tgz", + "integrity": "sha512-ikktIhFBzQNt/QDyOL580ti9+5mL/YZeUPKU2ivGtGjdTYoqz6jObj6nOMfhASpS4GU4Q/Clh1QtxWAvcYKamA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.7.tgz", + "integrity": "sha512-7yRhbHvPqSpRUV7Q20VuDwbjW5kIMwTHpptuUzV+AA46kiPze5Z7qgt6CLCK3pWFrHeNfDd1VKgyP4O+ng17CA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.7.tgz", + "integrity": "sha512-SmwKXe6VHIyZYbBLJrhOoCJRB/Z1tckzmgTLfFYOfpMAx63BJEaL9ExI8x7v0oAO3Zh6D/Oi1gVxEYr5oUCFhw==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.7.tgz", + "integrity": "sha512-56hiAJPhwQ1R4i+21FVF7V8kSD5zZTdHcVuRFMW0hn753vVfQN8xlx4uOPT4xoGH0Z/oVATuR82AiqSTDIpaHg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@types/node": { + "version": "25.6.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.6.0.tgz", + "integrity": "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~7.19.0" + } + }, + "node_modules/dotenv": { + "version": "17.4.2", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.4.2.tgz", + "integrity": "sha512-nI4U3TottKAcAD9LLud4Cb7b2QztQMUEfHbvhTH09bqXTxnSie8WnjPALV/WMCrJZ6UV/qHJ6L03OqO3LcdYZw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, + "node_modules/esbuild": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.7.tgz", + "integrity": "sha512-IxpibTjyVnmrIQo5aqNpCgoACA/dTKLTlhMHihVHhdkxKyPO1uBBthumT0rdHmcsk9uMonIWS0m4FljWzILh3w==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.27.7", + "@esbuild/android-arm": "0.27.7", + "@esbuild/android-arm64": "0.27.7", + "@esbuild/android-x64": "0.27.7", + "@esbuild/darwin-arm64": "0.27.7", + "@esbuild/darwin-x64": "0.27.7", + "@esbuild/freebsd-arm64": "0.27.7", + "@esbuild/freebsd-x64": "0.27.7", + "@esbuild/linux-arm": "0.27.7", + "@esbuild/linux-arm64": "0.27.7", + "@esbuild/linux-ia32": "0.27.7", + "@esbuild/linux-loong64": "0.27.7", + "@esbuild/linux-mips64el": "0.27.7", + "@esbuild/linux-ppc64": "0.27.7", + "@esbuild/linux-riscv64": "0.27.7", + "@esbuild/linux-s390x": "0.27.7", + "@esbuild/linux-x64": "0.27.7", + "@esbuild/netbsd-arm64": "0.27.7", + "@esbuild/netbsd-x64": "0.27.7", + "@esbuild/openbsd-arm64": "0.27.7", + "@esbuild/openbsd-x64": "0.27.7", + "@esbuild/openharmony-arm64": "0.27.7", + "@esbuild/sunos-x64": "0.27.7", + "@esbuild/win32-arm64": "0.27.7", + "@esbuild/win32-ia32": "0.27.7", + "@esbuild/win32-x64": "0.27.7" + } + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/get-tsconfig": { + "version": "4.14.0", + "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.14.0.tgz", + "integrity": "sha512-yTb+8DXzDREzgvYmh6s9vHsSVCHeC0G3PI5bEXNBHtmshPnO+S5O7qgLEOn0I5QvMy6kpZN8K1NKGyilLb93wA==", + "dev": true, + "license": "MIT", + "dependencies": { + "resolve-pkg-maps": "^1.0.0" + }, + "funding": { + "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" + } + }, + "node_modules/resolve-pkg-maps": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", + "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" + } + }, + "node_modules/tsx": { + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz", + "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==", + "dev": true, + "license": "MIT", + "dependencies": { + "esbuild": "~0.27.0", + "get-tsconfig": "^4.7.5" + }, + "bin": { + "tsx": "dist/cli.mjs" + }, + "engines": { + "node": ">=18.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + } + }, + "node_modules/typescript": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-6.0.3.tgz", + "integrity": "sha512-y2TvuxSZPDyQakkFRPZHKFm+KKVqIisdg9/CZwm9ftvKXLP8NRWj38/ODjNbr43SsoXqNuAisEf1GdCxqWcdBw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "7.19.2", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.19.2.tgz", + "integrity": "sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg==", + "dev": true, + "license": "MIT" + } + } +} diff --git a/skills/vault-x-bookmarks/package.json b/skills/vault-x-bookmarks/package.json new file mode 100644 index 0000000..5d7e091 --- /dev/null +++ b/skills/vault-x-bookmarks/package.json @@ -0,0 +1,15 @@ +{ + "name": "vault-x-bookmarks", + "private": true, + "scripts": { + "test:x-bookmarks": "node --import tsx --test scripts/x-bookmarks.test.ts" + }, + "dependencies": { + "dotenv": "^17.4.2" + }, + "devDependencies": { + "@types/node": "^25.6.0", + "tsx": "^4.21.0", + "typescript": "^6.0.3" + } +} diff --git a/skills/vault-x-bookmarks/scripts/README.md b/skills/vault-x-bookmarks/scripts/README.md new file mode 100644 index 0000000..a193817 --- /dev/null +++ b/skills/vault-x-bookmarks/scripts/README.md @@ -0,0 +1,89 @@ +# X Bookmarks Helper + +TypeScript CLI used by the `vault-x-bookmarks` skill to capture selected X +bookmarks into a vault's `raw/sources/` directory. + +## Requirements + +- Node.js with npm. +- [`xurl`](https://github.com/xdevplatform/xurl) installed and available on + `PATH`. +- `xurl` authenticated with OAuth 2.0 for the X account whose bookmarks should + be captured. +- The authenticated `xurl` account needs these scopes: + - `bookmark.read` + - `tweet.read` + - `users.read` + - `offline.access` + +Do not use an app-only bearer token. Bookmarks are private user-context data, +and the script shells out to `xurl --auth oauth2`. + +## Setup + +From the skill root: + +```sh +cd vault/skills/vault-x-bookmarks +npm install +``` + +Verify that `xurl` can see your authenticated user: + +```sh +xurl auth status +xurl whoami +``` + +The script loads `.env` via `dotenv` before invoking `xurl`, so environment +variables needed by your local `xurl` setup can live in the skill root. + +## Run + +From the skill root: + +```sh +npx tsx scripts/x-bookmarks.ts --limit 15 +``` + +Options: + +- `--limit N` (default `15`, range `1..100`): maximum selected bookmarks to + capture. +- `--max-pages N` (default `10`, range `1..100`): maximum bookmark pages to + scan. +- `--head-pages N` (default `2`, range `1..100`): newest pages to scan before + continuing backlog catch-up. +- `--path PATH`: vault root. Defaults to the current working directory. + +Examples: + +```sh +npx tsx scripts/x-bookmarks.ts --limit 15 +npx tsx scripts/x-bookmarks.ts --limit 75 --max-pages 25 --head-pages 2 +npx tsx scripts/x-bookmarks.ts --path /path/to/vault --limit 20 +``` + +## Output + +The script writes: + +- source records to `raw/sources/` +- reviewed bookmark state to `raw/state/x-bookmarks/reviewed.jsonl` +- run summaries to `raw/state/x-bookmarks/runs.jsonl` +- catch-up cursor state to `raw/state/x-bookmarks/checkpoint.json` + +It prints a JSON summary with the `xurl whoami` user ID, pages fetched, captured +count, created source paths, and any errors. + +The script never mutates X bookmarks and does not route files into permanent +vault folders. Run `vault-ingest` afterward to classify captured source records. + +## Test + +From the skill root: + +```sh +npm run test:x-bookmarks +npx tsc --noEmit --module NodeNext --moduleResolution NodeNext --target ES2022 --types node scripts/x-bookmarks.ts scripts/x-bookmarks.test.ts +``` diff --git a/skills/vault-x-bookmarks/scripts/x-bookmarks.test.ts b/skills/vault-x-bookmarks/scripts/x-bookmarks.test.ts new file mode 100644 index 0000000..388c5d5 --- /dev/null +++ b/skills/vault-x-bookmarks/scripts/x-bookmarks.test.ts @@ -0,0 +1,455 @@ +import assert from "node:assert/strict"; +import { + mkdir, + mkdtemp, + readFile, + readdir, + rm, + writeFile, +} from "node:fs/promises"; +import { tmpdir } from "node:os"; +import path from "node:path"; +import test from "node:test"; +import { + type BookmarkPost, + type XurlJsonRunner, + buildSourceMarkdown, + collectSelectedBookmarks, + mergePagesWithoutDuplicatePosts, + parseArgs, + readCheckpoint, + readReviewedIds, + run, + selectOldestReachableUnreviewed, + sourceFilename, + uniquePath, + // The script is loaded by tsx at runtime; the explicit extension is intentional. + // @ts-ignore TS5097 +} from "./x-bookmarks.ts"; + +function post(id: string, createdAt = `2026-01-${id.padStart(2, "0")}T00:00:00Z`): BookmarkPost { + return { + id, + text: `Post ${id}`, + authorId: "author-1", + authorName: "Jane Example", + authorHandle: "jane_example", + createdAt, + publicMetrics: {}, + links: [], + }; +} + +function tweet(id: string, nextText = `Post ${id}`): Record { + return { + id, + text: nextText, + author_id: "author-1", + created_at: `2026-01-${id.replace(/\D/g, "").padStart(2, "0").slice(-2)}T00:00:00Z`, + public_metrics: { + like_count: 1, + retweet_count: 2, + reply_count: 3, + quote_count: 4, + }, + }; +} + +function bookmarkResponse( + ids: string[], + nextToken: string | null = null +): Record { + return { + data: ids.map((id) => tweet(id)), + includes: { + users: [{ id: "author-1", name: "Jane Example", username: "jane_example" }], + }, + meta: nextToken ? { next_token: nextToken } : {}, + }; +} + +function fakeBookmarkClient( + pages: Map | Error> +): { runner: XurlJsonRunner; requestedTokens: Array; requestedArgs: string[][] } { + const requestedTokens: Array = []; + const requestedArgs: string[][] = []; + const runner: XurlJsonRunner = async (args) => { + requestedArgs.push(args); + assert.deepEqual(args.slice(0, 2), ["--auth", "oauth2"]); + const requestPath = args.at(-1) ?? ""; + assert.match(requestPath, /^\/2\/users\/user-1\/bookmarks\?/); + const query = new URLSearchParams(requestPath.split("?")[1] ?? ""); + const token = query.get("pagination_token"); + requestedTokens.push(token); + const page = pages.get(token ?? ""); + if (page instanceof Error) { + throw page; + } + if (!page) { + throw new Error(`Unexpected bookmark request token: ${token ?? "head"}`); + } + return page; + }; + + return { runner, requestedTokens, requestedArgs }; +} + +async function withTempDir(callback: (dir: string) => Promise): Promise { + const dir = await mkdtemp(path.join(tmpdir(), "x-bookmarks-test-")); + try { + return await callback(dir); + } finally { + await rm(dir, { recursive: true, force: true }); + } +} + +test("parseArgs returns defaults and validates flags", () => { + assert.deepEqual(parseArgs([]), { + limit: 15, + maxPages: 10, + headPages: 2, + vaultRoot: process.cwd(), + }); + + assert.deepEqual(parseArgs(["--limit", "7", "--max-pages", "8", "--head-pages", "3"]), { + limit: 7, + maxPages: 8, + headPages: 3, + vaultRoot: process.cwd(), + }); + + assert.equal(parseArgs(["--path", "relative-vault"]).vaultRoot, path.resolve("relative-vault")); + assert.throws(() => parseArgs(["--limit", "0"]), /--limit must be an integer from 1 to 100/); + assert.throws(() => parseArgs(["--max-pages", "101"]), /--max-pages must be an integer from 1 to 100/); + assert.throws(() => parseArgs(["--head-pages"]), /--head-pages requires a value/); + assert.throws(() => parseArgs(["--user-id", "123"]), /Unknown argument: --user-id/); +}); + +test("selectOldestReachableUnreviewed chooses oldest reachable unreviewed posts first", () => { + const selected = selectOldestReachableUnreviewed( + [ + [post("5"), post("4")], + [post("3"), post("2")], + [post("1")], + ], + new Set(["3"]), + 3 + ); + + assert.deepEqual( + selected.map((item) => item.id), + ["1", "2", "4"] + ); +}); + +test("mergePagesWithoutDuplicatePosts keeps the first occurrence", () => { + const merged = mergePagesWithoutDuplicatePosts([ + [ + [post("a"), post("b")], + [post("c"), post("a")], + ], + [[post("b"), post("d")]], + ]); + + assert.deepEqual( + merged.map((page) => page.map((item) => item.id)), + [["a", "b"], ["c"], ["d"]] + ); +}); + +test("buildSourceMarkdown creates an external source record", () => { + const markdown = buildSourceMarkdown( + { + ...post("123", "2026-02-03T04:05:06Z"), + text: "Useful bookmark about durable notes", + authorName: "Jane Doe", + authorHandle: "@jane_doe", + links: ["https://example.com/article"], + }, + "2026-02-04T00:00:00Z" + ); + + assert.match(markdown, /tags:\n - external/); + assert.match(markdown, /source: x-bookmark/); + assert.match(markdown, /source_url: https:\/\/x\.com\/jane_doe\/status\/123/); + assert.match(markdown, /posted_at: "2026-02-03T04:05:06Z"/); + assert.doesNotMatch(markdown, /vault_candidate_reason/); + assert.match(markdown, /## Metadata/); +}); + +test("sourceFilename includes date, source, identity, and post ID", () => { + assert.equal( + sourceFilename( + { + ...post("123", "2026-02-03T04:05:06Z"), + authorHandle: "@Jane Doe", + }, + "2026-02-04T00:00:00Z" + ), + "2026-02-03-x-bookmark-jane-doe-123.md" + ); +}); + +test("readReviewedIds tolerates missing files and parses JSONL", async () => { + await withTempDir(async (dir) => { + const reviewedPath = path.join(dir, "reviewed.jsonl"); + + assert.deepEqual(await readReviewedIds(reviewedPath), new Set()); + + await writeFile( + reviewedPath, + [ + JSON.stringify({ post_id: "one" }), + "", + JSON.stringify({ post_id: "two", ignored: true }), + JSON.stringify({ post_id: 3 }), + ].join("\n"), + "utf8" + ); + + assert.deepEqual(await readReviewedIds(reviewedPath), new Set(["one", "two"])); + }); +}); + +test("readCheckpoint tolerates missing checkpoint files", async () => { + await withTempDir(async (dir) => { + const checkpointPath = path.join(dir, "checkpoint.json"); + + assert.deepEqual(await readCheckpoint(checkpointPath), {}); + + await writeFile(checkpointPath, JSON.stringify({ backlog_token: "next" }), "utf8"); + assert.deepEqual(await readCheckpoint(checkpointPath), { backlog_token: "next" }); + }); +}); + +test("uniquePath avoids overwriting existing files", async () => { + await withTempDir(async (dir) => { + const filePath = path.join(dir, "source.md"); + await writeFile(filePath, "first", "utf8"); + await writeFile(path.join(dir, "source-2.md"), "second", "utf8"); + + assert.equal(await uniquePath(filePath), path.join(dir, "source-3.md")); + }); +}); + +test("collectSelectedBookmarks scans head then backlog and preserves a mid-page backlog cursor", async () => { + const pages = new Map | Error>([ + ["", bookmarkResponse(["h2", "h1"], "head-next")], + ["head-next", bookmarkResponse(["b2", "b1"], "older")], + ]); + const { runner, requestedTokens, requestedArgs } = fakeBookmarkClient(pages); + + const result = await collectSelectedBookmarks( + runner, + "user-1", + { limit: 3, maxPages: 3, headPages: 1, vaultRoot: process.cwd() }, + {}, + new Set() + ); + + assert.deepEqual( + result.selected.map((item) => item.id), + ["b1", "h1", "h2"] + ); + assert.equal(result.pagesFetched, 2); + assert.equal(result.nextBacklogToken, "head-next"); + assert.equal(result.backlogExhausted, false); + assert.deepEqual(requestedTokens, [null, "head-next"]); + assert.deepEqual(requestedArgs[0].slice(0, 2), ["--auth", "oauth2"]); +}); + +test("collectSelectedBookmarks resets a stale saved backlog cursor", async () => { + const pages = new Map | Error>([ + ["", bookmarkResponse(["h1"], "head-next")], + ["stale", new Error("Invalid pagination token")], + ["head-next", bookmarkResponse(["b1"])], + ]); + const { runner, requestedTokens } = fakeBookmarkClient(pages); + + const result = await collectSelectedBookmarks( + runner, + "user-1", + { limit: 1, maxPages: 3, headPages: 1, vaultRoot: process.cwd() }, + { backlog_token: "stale" }, + new Set(["h1"]) + ); + + assert.deepEqual( + result.selected.map((item) => item.id), + ["b1"] + ); + assert.equal(result.cursorReset, true); + assert.match(result.errors.join("\n"), /Saved backlog cursor was rejected/); + assert.deepEqual(requestedTokens, [null, "stale", "head-next"]); +}); + +test("collectSelectedBookmarks does not select duplicate head-page posts twice", async () => { + const pages = new Map | Error>([ + ["", bookmarkResponse(["h2", "h1", "h1"])], + ]); + const { runner, requestedTokens } = fakeBookmarkClient(pages); + + const result = await collectSelectedBookmarks( + runner, + "user-1", + { limit: 3, maxPages: 1, headPages: 1, vaultRoot: process.cwd() }, + {}, + new Set() + ); + + assert.deepEqual( + result.selected.map((item) => item.id), + ["h1", "h2"] + ); + assert.deepEqual(requestedTokens, [null]); +}); + +test("run appends JSONL records with a separator when the file lacks a trailing newline", async () => { + await withTempDir(async (vaultRoot) => { + const reviewedPath = path.join(vaultRoot, "raw", "state", "x-bookmarks", "reviewed.jsonl"); + await mkdir(path.dirname(reviewedPath), { recursive: true }); + await writeFile( + reviewedPath, + JSON.stringify({ + post_id: "already-reviewed", + reviewed_at: "2026-01-01T00:00:00Z", + run_id: "previous", + decision: "captured", + source_record_path: "raw/sources/old.md", + source: "main", + }), + "utf8" + ); + + const originalConsoleLog = console.log; + console.log = () => undefined; + const runner: XurlJsonRunner = async (args) => { + if (args[0] === "whoami") { + return { id: "user-1" }; + } + const requestPath = args.at(-1) ?? ""; + if (requestPath.includes("/bookmarks")) { + return bookmarkResponse(["fresh"]); + } + throw new Error(`Unexpected live-style request path: ${requestPath}`); + }; + + try { + await run({ limit: 1, maxPages: 1, headPages: 1, vaultRoot }, runner); + } finally { + console.log = originalConsoleLog; + } + + const reviewedContent = await readFile(reviewedPath, "utf8"); + assert.match(reviewedContent, /}\n\{/); + assert.equal(reviewedContent.endsWith("\n"), true); + assert.deepEqual( + reviewedContent + .trim() + .split("\n") + .map((line) => JSON.parse(line).post_id), + ["already-reviewed", "fresh"] + ); + + const sourceFiles = await readdir(path.join(vaultRoot, "raw", "sources")); + assert.equal(sourceFiles.length, 1); + const sourceContent = await readFile( + path.join(vaultRoot, "raw", "sources", sourceFiles[0]), + "utf8" + ); + assert.match(sourceContent, /post_id: "fresh"/); + }); +}); + +test("run captures full note_tweet text when X truncates long posts", async () => { + await withTempDir(async (vaultRoot) => { + const fullText = + "These 6 principles will make your coding agent write better code.\n\nVerify, don't assume — Run it. Test it. Prove it works."; + const originalConsoleLog = console.log; + console.log = () => undefined; + const runner: XurlJsonRunner = async (args) => { + if (args[0] === "whoami") { + return { id: "user-1" }; + } + const requestPath = args.at(-1) ?? ""; + if (requestPath.includes("/bookmarks")) { + assert.match(requestPath, /tweet\.fields=.*note_tweet/); + return { + ...bookmarkResponse(["long-post"]), + data: [ + { + ...tweet("long-post", "These 6 principles will make your coding agent write better code."), + note_tweet: { + text: fullText, + entities: { + urls: [ + { + url: "https://t.co/example", + expanded_url: "https://agents.md/", + }, + ], + }, + }, + }, + ], + }; + } + throw new Error(`Unexpected live-style request path: ${requestPath}`); + }; + + try { + await run({ limit: 1, maxPages: 1, headPages: 1, vaultRoot }, runner); + } finally { + console.log = originalConsoleLog; + } + + const sourceFiles = await readdir(path.join(vaultRoot, "raw", "sources")); + assert.equal(sourceFiles.length, 1); + const sourceContent = await readFile( + path.join(vaultRoot, "raw", "sources", sourceFiles[0]), + "utf8" + ); + assert.match(sourceContent, /Verify, don't assume/); + assert.match(sourceContent, /https:\/\/agents\.md\//); + }); +}); + +test("run removes a source record when reviewed append fails", async () => { + await withTempDir(async (vaultRoot) => { + const stateDir = path.join(vaultRoot, "raw", "state", "x-bookmarks"); + const reviewedPath = path.join(stateDir, "reviewed.jsonl"); + const checkpointPath = path.join(stateDir, "checkpoint.json"); + const sourcesDir = path.join(vaultRoot, "raw", "sources"); + await mkdir(stateDir, { recursive: true }); + await writeFile(reviewedPath, "", "utf8"); + + const originalConsoleLog = console.log; + console.log = () => undefined; + const runner: XurlJsonRunner = async (args) => { + if (args[0] === "whoami") { + return { id: "user-1" }; + } + const requestPath = args.at(-1) ?? ""; + if (requestPath.includes("/bookmarks")) { + await rm(reviewedPath, { force: true }); + await mkdir(reviewedPath); + return bookmarkResponse(["cleanup"]); + } + throw new Error(`Unexpected live-style request path: ${requestPath}`); + }; + + try { + await assert.rejects( + run({ limit: 1, maxPages: 1, headPages: 1, vaultRoot }, runner), + /Run completed with source\/state write errors; checkpoint was not advanced\./ + ); + } finally { + console.log = originalConsoleLog; + } + + assert.deepEqual(await readdir(sourcesDir), []); + await assert.rejects(readFile(checkpointPath, "utf8"), (error: unknown) => { + return error instanceof Error && "code" in error && error.code === "ENOENT"; + }); + }); +}); diff --git a/skills/vault-x-bookmarks/scripts/x-bookmarks.ts b/skills/vault-x-bookmarks/scripts/x-bookmarks.ts new file mode 100644 index 0000000..fd5025b --- /dev/null +++ b/skills/vault-x-bookmarks/scripts/x-bookmarks.ts @@ -0,0 +1,930 @@ +import { + mkdir, + open, + readFile, + rename, + rm, + stat, + writeFile, +} from 'node:fs/promises' +import { execFile } from 'node:child_process' +import path from 'node:path' +import { promisify } from 'node:util' +import 'dotenv/config' + +export interface CliOptions { + limit: number + maxPages: number + headPages: number + vaultRoot: string +} + +export interface BookmarkPost { + id: string + text: string + authorId?: string + authorName?: string + authorHandle?: string + createdAt?: string + publicMetrics?: { + like_count?: number + retweet_count?: number + reply_count?: number + quote_count?: number + } + links: string[] +} + +export interface ReviewedEntry { + post_id: string + reviewed_at: string + run_id: string + decision: 'captured' + source_record_path: string + source: 'main' +} + +export interface RunEntry { + run_id: string + started_at: string + finished_at: string + limit: number + max_pages: number + pages_fetched: number + evaluated_count: number + captured_count: number + errors: string[] +} + +export interface Checkpoint { + last_successful_run_at?: string + last_pages_fetched?: number + head_pages?: number + backlog_token?: string | null + backlog_exhausted?: boolean +} + +export interface BookmarkPage { + posts: BookmarkPost[] + requestToken: string | null + nextToken: string | null +} + +export interface CollectionResult { + selected: BookmarkPost[] + pagesFetched: number + nextBacklogToken: string | null + backlogExhausted: boolean + cursorReset: boolean + errors: string[] +} + +type JsonRecord = Record +export type XurlJsonRunner = (args: string[]) => Promise + +const DEFAULT_OPTIONS: CliOptions = { + limit: 15, + maxPages: 10, + headPages: 2, + vaultRoot: process.cwd(), +} + +const SAVED_CURSOR_REJECTED = + 'Saved backlog cursor was rejected; reset catch-up cursor to current head scan.' + +const execFileAsync = promisify(execFile) +const XURL_MAX_BUFFER_BYTES = 64 * 1024 * 1024 + +export function parseArgs(argv: string[]): CliOptions { + const options = { ...DEFAULT_OPTIONS } + + for (let index = 0; index < argv.length; index += 1) { + const arg = argv[index] + + if (arg === '--limit') { + options.limit = parseIntegerFlag(arg, argv[index + 1]) + index += 1 + continue + } + + if (arg === '--max-pages') { + options.maxPages = parseIntegerFlag(arg, argv[index + 1]) + index += 1 + continue + } + + if (arg === '--head-pages') { + options.headPages = parseIntegerFlag(arg, argv[index + 1]) + index += 1 + continue + } + + if (arg === '--path') { + const next = argv[index + 1] + if (next === undefined || next.startsWith('--')) { + throw new Error(`${arg} requires a value`) + } + options.vaultRoot = path.resolve(next) + index += 1 + continue + } + + throw new Error(`Unknown argument: ${arg}`) + } + + return options +} + +export function selectOldestReachableUnreviewed( + pagesNewestFirst: BookmarkPost[][], + reviewedIds: Set, + limit: number, +): BookmarkPost[] { + const seen = new Set() + + return pagesNewestFirst + .flat() + .filter((post) => { + if (reviewedIds.has(post.id) || seen.has(post.id)) { + return false + } + seen.add(post.id) + return true + }) + .slice(-limit) + .reverse() +} + +export function mergePagesWithoutDuplicatePosts( + pageGroups: BookmarkPost[][][], +): BookmarkPost[][] { + const seen = new Set() + const merged: BookmarkPost[][] = [] + + for (const group of pageGroups) { + for (const page of group) { + const uniquePage = page.filter((post) => { + if (seen.has(post.id)) { + return false + } + seen.add(post.id) + return true + }) + merged.push(uniquePage) + } + } + + return merged +} + +function recordNewestFirstOrder( + posts: BookmarkPost[], + postOrder: Map, + startOrdinal: number, +): number { + let ordinal = startOrdinal + for (const post of posts) { + if (!postOrder.has(post.id)) { + postOrder.set(post.id, ordinal) + } + ordinal += 1 + } + return ordinal +} + +function orderSelectedOldestToNewest( + selected: BookmarkPost[], + postOrder: Map, +): BookmarkPost[] { + return [...selected].sort((left, right) => { + const leftOrder = postOrder.get(left.id) ?? -1 + const rightOrder = postOrder.get(right.id) ?? -1 + return rightOrder - leftOrder + }) +} + +export function buildSourceMarkdown( + post: BookmarkPost, + capturedAt: string, +): string { + const handle = normalizeHandle(post.authorHandle) ?? 'unknown' + const author = post.authorName?.trim() || 'Unknown author' + const postedAt = post.createdAt?.trim() || 'unknown' + const sourceUrl = `https://x.com/${handle}/status/${post.id}` + const title = markdownTitle(post.text) + const links = post.links.length + ? post.links.map((link) => `- ${link}`).join('\n') + : '- unknown' + const metrics = post.publicMetrics ?? {} + + return `--- +tags: + - external +source: x-bookmark +source_url: ${sourceUrl} +post_id: "${yamlEscape(post.id)}" +author: "${yamlEscape(author)}" +handle: "${yamlEscape(handle)}" +posted_at: "${yamlEscape(postedAt)}" +captured_at: "${yamlEscape(capturedAt)}" +--- + +# X Bookmark: ${title} + +Source: [${sourceUrl}](${sourceUrl}) +Author: ${author} (@${handle}) +Posted: ${postedAt} + +## Post + +${post.text.trim() || 'unknown'} + +## Links + +${links} + +## Metadata + +- Likes: ${metricValue(metrics.like_count)} +- Reposts: ${metricValue(metrics.retweet_count)} +- Replies: ${metricValue(metrics.reply_count)} +- Quotes: ${metricValue(metrics.quote_count)} +` +} + +export function sourceFilename(post: BookmarkPost, capturedAt: string): string { + const day = dateDay(post.createdAt) ?? dateDay(capturedAt) ?? 'unknown-date' + const identity = slugify( + normalizeHandle(post.authorHandle) ?? post.authorName ?? 'unknown', + ) + return `${day}-x-bookmark-${identity}-${post.id}.md` +} + +export async function readReviewedIds( + reviewedPath: string, +): Promise> { + if (!(await exists(reviewedPath))) { + return new Set() + } + + const reviewedIds = new Set() + const content = await readFile(reviewedPath, 'utf8') + for (const line of content.split(/\r?\n/)) { + if (!line.trim()) { + continue + } + const entry = JSON.parse(line) as { post_id?: unknown } + if (typeof entry.post_id === 'string') { + reviewedIds.add(entry.post_id) + } + } + return reviewedIds +} + +export async function readCheckpoint( + checkpointPath: string, +): Promise { + if (!(await exists(checkpointPath))) { + return {} + } + return JSON.parse(await readFile(checkpointPath, 'utf8')) as Checkpoint +} + +export async function uniquePath(filePath: string): Promise { + if (!(await exists(filePath))) { + return filePath + } + + const parsed = path.parse(filePath) + for (let copy = 2; ; copy += 1) { + const candidate = path.join( + parsed.dir, + `${parsed.name}-${copy}${parsed.ext}`, + ) + if (!(await exists(candidate))) { + return candidate + } + } +} + +export async function collectSelectedBookmarks( + xurlJson: XurlJsonRunner, + userId: string, + options: CliOptions, + checkpoint: Checkpoint, + reviewedIds: Set, +): Promise { + const errors: string[] = [] + const selected: BookmarkPost[] = [] + const selectedIds = new Set(reviewedIds) + const postOrder = new Map() + let pagesFetched = 0 + let cursorReset = false + let backlogExhausted = false + let nextBacklogToken: string | null = checkpoint.backlog_token ?? null + let headCursor: string | null = null + let newestFirstOrdinal = 0 + + const headPages: BookmarkPost[][] = [] + const headPageLimit = Math.min(options.headPages, options.maxPages) + let token: string | null = null + + for (let pageIndex = 0; pageIndex < headPageLimit; pageIndex += 1) { + const page = await fetchBookmarkPage(xurlJson, userId, token) + pagesFetched += 1 + errors.push(...page.errors) + headPages.push(page.bookmarkPage.posts) + newestFirstOrdinal = recordNewestFirstOrder( + page.bookmarkPage.posts, + postOrder, + newestFirstOrdinal, + ) + headCursor = page.bookmarkPage.nextToken + token = page.bookmarkPage.nextToken + + if (!token) { + break + } + } + + for (const post of selectOldestReachableUnreviewed( + headPages, + reviewedIds, + options.limit, + )) { + selected.push(post) + selectedIds.add(post.id) + } + + if (selected.length >= options.limit) { + return { + selected: orderSelectedOldestToNewest(selected, postOrder), + pagesFetched, + nextBacklogToken: checkpoint.backlog_token ?? headCursor, + backlogExhausted: checkpoint.backlog_exhausted ?? false, + cursorReset, + errors, + } + } + + if (checkpoint.backlog_exhausted) { + return { + selected: orderSelectedOldestToNewest(selected, postOrder), + pagesFetched, + nextBacklogToken: null, + backlogExhausted: true, + cursorReset, + errors, + } + } + + token = checkpoint.backlog_token ?? headCursor + nextBacklogToken = token + + while (selected.length < options.limit && pagesFetched < options.maxPages) { + if (!token) { + backlogExhausted = true + nextBacklogToken = null + break + } + + let page: NormalizedPage + try { + page = await fetchBookmarkPage(xurlJson, userId, token) + } catch (error) { + if ( + checkpoint.backlog_token && + token === checkpoint.backlog_token && + !cursorReset && + isRejectedCursorError(error) + ) { + errors.push(SAVED_CURSOR_REJECTED) + cursorReset = true + token = headCursor + nextBacklogToken = token + if (!token) { + backlogExhausted = true + nextBacklogToken = null + break + } + continue + } + throw error + } + + pagesFetched += 1 + errors.push(...page.errors) + newestFirstOrdinal = recordNewestFirstOrder( + page.bookmarkPage.posts, + postOrder, + newestFirstOrdinal, + ) + + const oldestToNewest = [...page.bookmarkPage.posts].reverse() + for (let index = 0; index < oldestToNewest.length; index += 1) { + const post = oldestToNewest[index] + if (selectedIds.has(post.id)) { + continue + } + selected.push(post) + selectedIds.add(post.id) + if (selected.length >= options.limit) { + nextBacklogToken = + index < oldestToNewest.length - 1 + ? page.bookmarkPage.requestToken + : page.bookmarkPage.nextToken + break + } + } + + if (selected.length >= options.limit) { + break + } + + token = page.bookmarkPage.nextToken + nextBacklogToken = token + if (!token) { + backlogExhausted = true + nextBacklogToken = null + break + } + } + + return { + selected: orderSelectedOldestToNewest(selected, postOrder), + pagesFetched, + nextBacklogToken, + backlogExhausted, + cursorReset, + errors, + } +} + +export async function run( + options: CliOptions, + xurlJson: XurlJsonRunner = runXurlJsonCommand, +): Promise { + const runId = new Date().toISOString().replace(/[-:.TZ]/g, '') + const startedAt = new Date().toISOString() + const stateDir = path.join(options.vaultRoot, 'raw', 'state', 'x-bookmarks') + const sourcesDir = path.join(options.vaultRoot, 'raw', 'sources') + const reviewedPath = path.join(stateDir, 'reviewed.jsonl') + const runsPath = path.join(stateDir, 'runs.jsonl') + const checkpointPath = path.join(stateDir, 'checkpoint.json') + const sourceRecords: string[] = [] + const writeErrors: string[] = [] + + await mkdir(stateDir, { recursive: true }) + await mkdir(sourcesDir, { recursive: true }) + + const reviewedIds = await readReviewedIds(reviewedPath) + const checkpoint = await readCheckpoint(checkpointPath) + const me = await xurlJson(['whoami']) + const userId = getNestedString(asRecord(me), ['data', 'id']) + ?? stringValue(asRecord(me).id) + if (!userId) { + throw new Error('Failed to resolve authenticated user through xurl whoami.') + } + + const collection = await collectSelectedBookmarks( + xurlJson, + userId, + options, + checkpoint, + reviewedIds, + ) + + for (const post of collection.selected) { + const capturedAt = new Date().toISOString() + const desiredPath = path.join(sourcesDir, sourceFilename(post, capturedAt)) + + try { + const sourcePath = await uniquePath(desiredPath) + await writeFile(sourcePath, buildSourceMarkdown(post, capturedAt), { + encoding: 'utf8', + flag: 'wx', + }) + const reviewedEntry: ReviewedEntry = { + post_id: post.id, + reviewed_at: capturedAt, + run_id: runId, + decision: 'captured', + source_record_path: path.relative(options.vaultRoot, sourcePath), + source: 'main', + } + try { + await appendJsonLine(reviewedPath, reviewedEntry) + } catch (error) { + try { + await rm(sourcePath, { force: true }) + } catch (cleanupError) { + throw new Error( + `${errorMessage(error)}; failed to remove orphaned source record ${sourcePath}: ${errorMessage(cleanupError)}`, + ) + } + throw error + } + sourceRecords.push(reviewedEntry.source_record_path) + } catch (error) { + writeErrors.push(errorMessage(error)) + } + } + + const allErrors = [...collection.errors, ...writeErrors] + const finishedAt = new Date().toISOString() + const runEntry: RunEntry = { + run_id: runId, + started_at: startedAt, + finished_at: finishedAt, + limit: options.limit, + max_pages: options.maxPages, + pages_fetched: collection.pagesFetched, + evaluated_count: collection.selected.length, + captured_count: sourceRecords.length, + errors: allErrors, + } + + await appendJsonLine(runsPath, runEntry) + + if (writeErrors.length === 0) { + const nextCheckpoint: Checkpoint = { + last_successful_run_at: finishedAt, + last_pages_fetched: collection.pagesFetched, + head_pages: options.headPages, + backlog_token: collection.nextBacklogToken, + backlog_exhausted: collection.backlogExhausted, + } + await writeCheckpointAtomically(checkpointPath, nextCheckpoint) + } + + const summary = { + user_id: userId, + ...runEntry, + source_records: sourceRecords, + next_backlog_token: collection.nextBacklogToken, + backlog_exhausted: collection.backlogExhausted, + cursor_reset: collection.cursorReset, + } + console.log(JSON.stringify(summary, null, 2)) + + if (writeErrors.length > 0) { + throw new Error( + 'Run completed with source/state write errors; checkpoint was not advanced.', + ) + } +} + +interface NormalizedPage { + bookmarkPage: BookmarkPage + errors: string[] +} + +async function fetchBookmarkPage( + xurlJson: XurlJsonRunner, + userId: string, + paginationToken: string | null, +): Promise { + const query = new URLSearchParams({ + max_results: '100', + expansions: 'author_id,attachments.media_keys', + 'tweet.fields': 'author_id,created_at,entities,note_tweet,public_metrics', + 'user.fields': 'id,name,username', + 'media.fields': 'url,preview_image_url,type', + }) + if (paginationToken) { + query.set('pagination_token', paginationToken) + } + const response = await xurlJson([ + '--auth', + 'oauth2', + `/2/users/${userId}/bookmarks?${query.toString()}`, + ]) + + return normalizeBookmarkPage(response, paginationToken) +} + +async function runXurlJsonCommand(args: string[]): Promise { + try { + const { stdout } = await execFileAsync('xurl', args, { + maxBuffer: XURL_MAX_BUFFER_BYTES, + }) + return JSON.parse(stdout) as JsonRecord + } catch (error) { + throw new Error(formatXurlError(error)) + } +} + +function formatXurlError(error: unknown): string { + if (!(error instanceof Error)) { + return 'xurl execution failed' + } + + const parts = [error.message] + const detail = error as { stdout?: unknown; stderr?: unknown } + if (typeof detail.stdout === 'string' && detail.stdout.trim().length > 0) { + parts.push(detail.stdout.trim()) + } + if (typeof detail.stderr === 'string' && detail.stderr.trim().length > 0) { + parts.push(detail.stderr.trim()) + } + return parts.join('\n') +} + +function normalizeBookmarkPage( + response: unknown, + requestToken: string | null, +): NormalizedPage { + const record = asRecord(response) + const data = asArray(record.data) + const includes = asRecord(record.includes) + const users = new Map() + + for (const user of asArray(includes.users)) { + const userRecord = asRecord(user) + const id = stringValue(userRecord.id) + if (id) { + users.set(id, userRecord) + } + } + + const posts = data + .map((item) => normalizePost(asRecord(item), users)) + .filter((post): post is BookmarkPost => post !== null) + const meta = asRecord(record.meta) + const nextToken = + stringValue(meta.next_token) ?? stringValue(meta.nextToken) ?? null + const errors = asArray(record.errors).map((error) => errorMessage(error)) + + return { + bookmarkPage: { + posts, + requestToken, + nextToken, + }, + errors, + } +} + +function normalizePost( + tweet: JsonRecord, + users: Map, +): BookmarkPost | null { + const id = stringValue(tweet.id) + if (!id) { + return null + } + + const authorId = + stringValue(tweet.author_id) ?? stringValue(tweet.authorId) ?? undefined + const author = authorId ? users.get(authorId) : undefined + const publicMetrics = + optionalRecord(tweet.public_metrics) ?? optionalRecord(tweet.publicMetrics) + const noteTweet = + optionalRecord(tweet.note_tweet) ?? optionalRecord(tweet.noteTweet) + const text = + stringValue(noteTweet?.text) ?? stringValue(tweet.text) ?? '' + + return { + id, + text, + authorId, + authorName: author ? (stringValue(author.name) ?? undefined) : undefined, + authorHandle: author + ? (stringValue(author.username) ?? undefined) + : (stringValue(tweet.username) ?? undefined), + createdAt: + stringValue(tweet.created_at) ?? + stringValue(tweet.createdAt) ?? + undefined, + publicMetrics: { + like_count: numberValue( + publicMetrics?.like_count ?? publicMetrics?.likeCount, + ), + retweet_count: numberValue( + publicMetrics?.retweet_count ?? publicMetrics?.retweetCount, + ), + reply_count: numberValue( + publicMetrics?.reply_count ?? publicMetrics?.replyCount, + ), + quote_count: numberValue( + publicMetrics?.quote_count ?? publicMetrics?.quoteCount, + ), + }, + links: extractLinks(tweet, noteTweet), + } +} + +function extractLinks( + tweet: JsonRecord, + noteTweet?: JsonRecord, +): string[] { + const entityGroups = [asRecord(tweet.entities), asRecord(noteTweet?.entities)] + const seen = new Set() + const links: string[] = [] + + for (const entities of entityGroups) { + for (const url of asArray(entities?.urls)) { + const urlRecord = asRecord(url) + const link = + stringValue(urlRecord.unwound_url) ?? + stringValue(urlRecord.unwoundUrl) ?? + stringValue(urlRecord.expanded_url) ?? + stringValue(urlRecord.expandedUrl) ?? + stringValue(urlRecord.url) + if (link && !seen.has(link)) { + seen.add(link) + links.push(link) + } + } + } + + return links +} + +async function appendJsonLine(filePath: string, entry: unknown): Promise { + const prefix = (await needsJsonLineSeparator(filePath)) ? '\n' : '' + await writeFile(filePath, `${prefix}${JSON.stringify(entry)}\n`, { + encoding: 'utf8', + flag: 'a', + }) +} + +async function needsJsonLineSeparator(filePath: string): Promise { + let fileInfo: Awaited> + try { + fileInfo = await stat(filePath) + } catch (error) { + if (isNodeError(error) && error.code === 'ENOENT') { + return false + } + throw error + } + + if (fileInfo.size === 0) { + return false + } + + const handle = await open(filePath, 'r') + try { + const buffer = Buffer.alloc(1) + await handle.read(buffer, 0, 1, fileInfo.size - 1) + return buffer[0] !== 10 + } finally { + await handle.close() + } +} + +async function writeCheckpointAtomically( + checkpointPath: string, + checkpoint: Checkpoint, +): Promise { + const tempPath = path.join( + path.dirname(checkpointPath), + `.${path.basename(checkpointPath)}.${process.pid}.${Date.now()}.tmp`, + ) + + try { + await writeFile( + tempPath, + `${JSON.stringify(checkpoint, null, 2)}\n`, + 'utf8', + ) + await rename(tempPath, checkpointPath) + } catch (error) { + await rm(tempPath, { force: true }).catch(() => undefined) + throw error + } +} + +async function exists(filePath: string): Promise { + try { + await stat(filePath) + return true + } catch (error) { + if (isNodeError(error) && error.code === 'ENOENT') { + return false + } + throw error + } +} + +function parseIntegerFlag(flag: string, value: string | undefined): number { + if (value === undefined || value.startsWith('--')) { + throw new Error(`${flag} requires a value`) + } + if (!/^(?:[1-9][0-9]?)$|^100$/.test(value)) { + throw new Error(`${flag} must be an integer from 1 to 100`) + } + return Number(value) +} + +function markdownTitle(text: string): string { + const title = text.replace(/\s+/g, ' ').trim().slice(0, 80) + return title || 'Untitled' +} + +function yamlEscape(value: string): string { + return value.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n') +} + +function normalizeHandle(handle: string | undefined): string | undefined { + const normalized = handle?.trim().replace(/^@+/, '') + return normalized || undefined +} + +function metricValue(value: number | undefined): string { + return typeof value === 'number' ? String(value) : 'unknown' +} + +function dateDay(value: string | undefined): string | undefined { + if (!value) { + return undefined + } + const date = new Date(value) + if (Number.isNaN(date.getTime())) { + return undefined + } + return date.toISOString().slice(0, 10) +} + +function slugify(value: string): string { + const slug = value + .trim() + .toLowerCase() + .replace(/^@+/, '') + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, '') + return slug || 'unknown' +} + +function getNestedString( + record: JsonRecord, + pathParts: string[], +): string | undefined { + let current: unknown = record + for (const part of pathParts) { + current = asRecord(current)?.[part] + } + return stringValue(current) ?? undefined +} + +function asRecord(value: unknown): JsonRecord { + if (typeof value === 'object' && value !== null && !Array.isArray(value)) { + return value as JsonRecord + } + return {} +} + +function optionalRecord(value: unknown): JsonRecord | undefined { + if (typeof value === 'object' && value !== null && !Array.isArray(value)) { + return value as JsonRecord + } + return undefined +} + +function asArray(value: unknown): unknown[] { + return Array.isArray(value) ? value : [] +} + +function stringValue(value: unknown): string | undefined { + return typeof value === 'string' && value.length > 0 ? value : undefined +} + +function numberValue(value: unknown): number | undefined { + return typeof value === 'number' && Number.isFinite(value) ? value : undefined +} + +function errorMessage(error: unknown): string { + if (error instanceof Error) { + return error.message + } + if (typeof error === 'string') { + return error + } + try { + return JSON.stringify(error) + } catch { + return String(error) + } +} + +function isRejectedCursorError(error: unknown): boolean { + const message = errorMessage(error).toLowerCase() + return ( + message.includes('pagination') || + message.includes('token') || + message.includes('cursor') || + message.includes('invalid') + ) +} + +function isNodeError(error: unknown): error is NodeJS.ErrnoException { + return error instanceof Error && 'code' in error +} + +if (typeof require !== 'undefined' && require.main === module) { + run(parseArgs(process.argv.slice(2))).catch((error) => { + console.error(errorMessage(error)) + process.exit(1) + }) +} diff --git a/vault/.codex-plugin/plugin.json b/vault/.codex-plugin/plugin.json index 0f6aa8c..1412dc5 100644 --- a/vault/.codex-plugin/plugin.json +++ b/vault/.codex-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "vault", "version": "0.0.7", - "description": "Raw-first Obsidian vault manager. Classify captures, route sources, manage ideas/projects, and maintain index/log.", + "description": "Raw-first Obsidian vault manager. Classify captures, route sources, import X bookmarks, manage ideas/projects, and maintain index/log.", "author": { "name": "Mark Phelps" }, @@ -14,18 +14,21 @@ "research", "linking", "ai", - "knowledge-base" + "knowledge-base", + "x-bookmarks", + "bookmarks" ], "skills": "./skills/", "interface": { "displayName": "Vault", "shortDescription": "Raw-first Obsidian workflows", - "longDescription": "Operate an Obsidian vault with source-first capture routing, idea/project lifecycle management, optional research synthesis, and index/log maintenance.", + "longDescription": "Operate an Obsidian vault with source-first capture routing, bounded X bookmark capture, idea/project lifecycle management, optional research synthesis, and index/log maintenance.", "developerName": "Mark Phelps", "category": "Productivity", "capabilities": ["Read", "Write", "Web Search"], "defaultPrompt": [ "Initialize a raw-first vault structure in this folder.", + "Capture a bounded slice of X bookmarks into raw/sources.", "Categorize source captures from raw/sources and move them to the right vault locations.", "Refresh index and append a structured operation log entry." ] diff --git a/vault/AGENTS.md b/vault/AGENTS.md index d9a8f4d..e19be0e 100644 --- a/vault/AGENTS.md +++ b/vault/AGENTS.md @@ -16,12 +16,13 @@ This plugin contains vault-focused skills for the Karpathy Wiki + Idea Lifecycle | `vault-tracker` | Project lifecycle + tracker maintenance | | `vault-maintain` | Weekly orchestrator running all checks | -## Optional Skills (2) +## Optional Skills (3) -| Skill | Purpose | -| ---------------- | ------------------------------------------------------ | -| `vault-concepts` | Promote themes → canonical concept pages | -| `vault-research` | External research → source records and optional synthesis | +| Skill | Purpose | +| ------------------- | -------------------------------------------------------- | +| `vault-concepts` | Promote themes → canonical concept pages | +| `vault-research` | External research → source records and optional synthesis | +| `vault-x-bookmarks` | Capture bounded X bookmark slices into `raw/sources/` | ## Usage Patterns @@ -31,10 +32,17 @@ vault-ingest --mode report # see pending vault-ingest --mode apply # categorize and move sources ``` +**X bookmark capture:** +```bash +vault-x-bookmarks --limit 15 +vault-x-bookmarks --limit 75 --max-pages 25 --head-pages 2 +vault-ingest --mode report +``` + **Weekly maintenance:** ``` vault-maintain --mode report # full audit -vault-maintain --mode apply # apply safe fixes +vault-maintain --mode apply-safe # apply safe fixes ``` **Project transitions:** @@ -51,8 +59,12 @@ vault-concepts --mode apply # create/update concepts ## Conventions -- **Mode convention:** `--mode report|apply-safe|apply` (default `report`) -- **Safety first:** All skills are non-destructive by default +- **Mode convention:** Mode-based skills use + `--mode report|apply-safe|apply` (default `report`). +- **Apply-only exception:** `vault-x-bookmarks` has no report mode; it only runs + when explicitly invoked and captures bounded bookmark slices into + `raw/sources/`. +- **Safety first:** Mode-based skills are non-destructive by default. - **Manual lifecycle:** Explicit directory structure replaces automation - **Raw protection:** Never delete from `raw/sources/` or `raw/processed/` - **README maintenance:** Keep `vault/README.md` current for every vault plugin diff --git a/vault/README.md b/vault/README.md index 3f1d585..1f25d41 100644 --- a/vault/README.md +++ b/vault/README.md @@ -21,12 +21,15 @@ Optional skills: - `vault-concepts`: Promote recurring themes into canonical concept pages. - `vault-research`: Collect external source records and optionally synthesize research summaries when requested. +- `vault-x-bookmarks`: Review a bounded slice of X bookmarks through the X API + and capture selected items as `external` source records in `raw/sources/`. ## Flow ```mermaid flowchart TD Capture[New capture] --> KnownIdea{Already known as
your own idea?} + XBookmarks[X bookmarks
vault-x-bookmarks] --> Raw KnownIdea -->|yes| IdeaState{Idea state} IdeaState --> Fleeting[ideas/fleeting/] @@ -102,6 +105,10 @@ flowchart TD - Skills are the runtime source of truth. - Workflows should be non-destructive by default. - `raw/sources/` is an unprocessed inbox for new captures. +- `vault-x-bookmarks` is an apply-only capture skill. It calls its bundled + TypeScript helper, writes selected external bookmark records into + `raw/sources/`, and records reviewed IDs under `raw/state/x-bookmarks/`; + `vault-ingest` handles later routing. - Known owned ideas should go directly to `ideas/fleeting/`, `ideas/incubating/`, `ideas/someday/`, or `ideas/rejected/` instead of lingering in `raw/sources/`. diff --git a/vault/skills/vault-x-bookmarks/SKILL.md b/vault/skills/vault-x-bookmarks/SKILL.md new file mode 100644 index 0000000..af61e69 --- /dev/null +++ b/vault/skills/vault-x-bookmarks/SKILL.md @@ -0,0 +1,125 @@ +--- +name: vault-x-bookmarks +description: + Review a bounded slice of X bookmarks via the X API and capture selected + bookmarks as external source records in raw/sources +--- + +# Vault X Bookmarks + +Use the bundled TypeScript helper to review a bounded slice of the authenticated +user's X bookmarks and capture selected items into `raw/sources/`. + +This is source-first: the helper writes `external` source records, records +captured bookmark IDs in `raw/state/x-bookmarks/`, and leaves organization to a +follow-up `vault-ingest` run. + +## Requirements + +- Node.js 18+ for `tsx` and `dotenv`. +- An X Developer account and app. See + [X API docs](https://developer.twitter.com/en/docs/twitter-api) for details. +- [`xurl`](https://github.com/xdevplatform/xurl/) CLI tool installed and + authenticated with OAuth 2.0. + +## Command + +Run from this skill directory: + +```sh +npx tsx scripts/x-bookmarks.ts [--limit N] [--max-pages N] [--head-pages N] [--path PATH] +``` + +## Parameters + +- `--limit N` (default: `15`, range: `1..100`): maximum selected bookmarks to + evaluate and capture in one run. +- `--max-pages N` (default: `10`, range: `1..100`): maximum bookmark pages to + fetch across the head scan and catch-up scan. +- `--head-pages N` (default: `2`, range: `1..100`): newest bookmark pages to + scan before resuming backlog pagination. +- `--path PATH` (default: current working directory): vault root containing + `raw/sources/` and `raw/state/x-bookmarks/`. + +There is no report mode. This helper is apply-only and bounded by `--limit`. It +writes source records, appends reviewed/run state, and updates the catch-up +checkpoint after successful runs. + +## Authentication + +Install `xurl`, authenticate it with OAuth 2.0 for the X account whose bookmarks +should be captured, and verify it before running the helper: + +```sh +xurl auth apps add vault-x-bookmarks --client-id --client-secret +xurl auth oauth2 --app vault-x-bookmarks +xurl default # to set the default authenticated app for later `xurl` commands +xurl auth status +xurl whoami +``` + +Required scopes: + +- `bookmark.read` +- `tweet.read` +- `users.read` +- `offline.access` + +Do not use an app-only bearer token. The helper resolves the authenticated user +through `xurl whoami`; do not pass a manual user ID. + +## Behavior + +1. Load reviewed IDs from `raw/state/x-bookmarks/reviewed.jsonl`. +2. Fetch bookmark pages by shelling out to `xurl --auth oauth2`. +3. Always scan newest `--head-pages` first so newly saved bookmarks are found + even after backlog has been processed. +4. If backlog remains and the head scan does not fill `--limit`, continue from + the saved catch-up pagination token until `--max-pages` or `--limit` is + reached. +5. Advance the saved catch-up token only past pages whose unreviewed bookmarks + have all been captured or recorded; if `--limit` stops mid-page, save the + token for that same page. +6. Select the oldest unreviewed bookmarks reachable in the combined scan, up to + `--limit`. +7. Process the selected slice oldest-to-newest. +8. Capture every selected bookmark to `raw/sources/` as markdown tagged + `external`. +9. Append every captured bookmark to `reviewed.jsonl`. +10. Append the run summary to `runs.jsonl` and update `checkpoint.json` only + after reviewed entries are durable. + +Never mutate X bookmarks. Never route captured files directly into permanent +vault folders. + +## Capture Policy + +Capture every selected bookmark. Do not apply regex scoring, string scoring, or +filtering; the user's bookmark action is the relevance signal. Manually delete +unwanted source records after a run. + +## Output + +The command prints a JSON summary with the authenticated X user ID, pages +fetched, evaluated and captured counts, created source paths, state files +updated, and errors or partial failures. + +## Examples + +Regular run: + +```sh +npx tsx scripts/x-bookmarks.ts --limit 15 +``` + +Catch up more backlog in one run: + +```sh +npx tsx scripts/x-bookmarks.ts --limit 75 --max-pages 25 --head-pages 2 +``` + +Override the vault path: + +```sh +npx tsx scripts/x-bookmarks.ts --path /path/to/vault --limit 20 +``` diff --git a/vault/skills/vault-x-bookmarks/package-lock.json b/vault/skills/vault-x-bookmarks/package-lock.json new file mode 100644 index 0000000..b32f4dd --- /dev/null +++ b/vault/skills/vault-x-bookmarks/package-lock.json @@ -0,0 +1,603 @@ +{ + "name": "vault-x-bookmarks", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "vault-x-bookmarks", + "dependencies": { + "dotenv": "^17.4.2" + }, + "devDependencies": { + "@types/node": "^25.6.0", + "tsx": "^4.21.0", + "typescript": "^6.0.3" + } + }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.7.tgz", + "integrity": "sha512-EKX3Qwmhz1eMdEJokhALr0YiD0lhQNwDqkPYyPhiSwKrh7/4KRjQc04sZ8db+5DVVnZ1LmbNDI1uAMPEUBnQPg==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.7.tgz", + "integrity": "sha512-jbPXvB4Yj2yBV7HUfE2KHe4GJX51QplCN1pGbYjvsyCZbQmies29EoJbkEc+vYuU5o45AfQn37vZlyXy4YJ8RQ==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.7.tgz", + "integrity": "sha512-62dPZHpIXzvChfvfLJow3q5dDtiNMkwiRzPylSCfriLvZeq0a1bWChrGx/BbUbPwOrsWKMn8idSllklzBy+dgQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.7.tgz", + "integrity": "sha512-x5VpMODneVDb70PYV2VQOmIUUiBtY3D3mPBG8NxVk5CogneYhkR7MmM3yR/uMdITLrC1ml/NV1rj4bMJuy9MCg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.7.tgz", + "integrity": "sha512-5lckdqeuBPlKUwvoCXIgI2D9/ABmPq3Rdp7IfL70393YgaASt7tbju3Ac+ePVi3KDH6N2RqePfHnXkaDtY9fkw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.7.tgz", + "integrity": "sha512-rYnXrKcXuT7Z+WL5K980jVFdvVKhCHhUwid+dDYQpH+qu+TefcomiMAJpIiC2EM3Rjtq0sO3StMV/+3w3MyyqQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.7.tgz", + "integrity": "sha512-B48PqeCsEgOtzME2GbNM2roU29AMTuOIN91dsMO30t+Ydis3z/3Ngoj5hhnsOSSwNzS+6JppqWsuhTp6E82l2w==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.7.tgz", + "integrity": "sha512-jOBDK5XEjA4m5IJK3bpAQF9/Lelu/Z9ZcdhTRLf4cajlB+8VEhFFRjWgfy3M1O4rO2GQ/b2dLwCUGpiF/eATNQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.7.tgz", + "integrity": "sha512-RkT/YXYBTSULo3+af8Ib0ykH8u2MBh57o7q/DAs3lTJlyVQkgQvlrPTnjIzzRPQyavxtPtfg0EopvDyIt0j1rA==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.7.tgz", + "integrity": "sha512-RZPHBoxXuNnPQO9rvjh5jdkRmVizktkT7TCDkDmQ0W2SwHInKCAV95GRuvdSvA7w4VMwfCjUiPwDi0ZO6Nfe9A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.7.tgz", + "integrity": "sha512-GA48aKNkyQDbd3KtkplYWT102C5sn/EZTY4XROkxONgruHPU72l+gW+FfF8tf2cFjeHaRbWpOYa/uRBz/Xq1Pg==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.7.tgz", + "integrity": "sha512-a4POruNM2oWsD4WKvBSEKGIiWQF8fZOAsycHOt6JBpZ+JN2n2JH9WAv56SOyu9X5IqAjqSIPTaJkqN8F7XOQ5Q==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.7.tgz", + "integrity": "sha512-KabT5I6StirGfIz0FMgl1I+R1H73Gp0ofL9A3nG3i/cYFJzKHhouBV5VWK1CSgKvVaG4q1RNpCTR2LuTVB3fIw==", + "cpu": [ + "mips64el" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.7.tgz", + "integrity": "sha512-gRsL4x6wsGHGRqhtI+ifpN/vpOFTQtnbsupUF5R5YTAg+y/lKelYR1hXbnBdzDjGbMYjVJLJTd2OFmMewAgwlQ==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.7.tgz", + "integrity": "sha512-hL25LbxO1QOngGzu2U5xeXtxXcW+/GvMN3ejANqXkxZ/opySAZMrc+9LY/WyjAan41unrR3YrmtTsUpwT66InQ==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.7.tgz", + "integrity": "sha512-2k8go8Ycu1Kb46vEelhu1vqEP+UeRVj2zY1pSuPdgvbd5ykAw82Lrro28vXUrRmzEsUV0NzCf54yARIK8r0fdw==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.7.tgz", + "integrity": "sha512-hzznmADPt+OmsYzw1EE33ccA+HPdIqiCRq7cQeL1Jlq2gb1+OyWBkMCrYGBJ+sxVzve2ZJEVeePbLM2iEIZSxA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.7.tgz", + "integrity": "sha512-b6pqtrQdigZBwZxAn1UpazEisvwaIDvdbMbmrly7cDTMFnw/+3lVxxCTGOrkPVnsYIosJJXAsILG9XcQS+Yu6w==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.7.tgz", + "integrity": "sha512-OfatkLojr6U+WN5EDYuoQhtM+1xco+/6FSzJJnuWiUw5eVcicbyK3dq5EeV/QHT1uy6GoDhGbFpprUiHUYggrw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.7.tgz", + "integrity": "sha512-AFuojMQTxAz75Fo8idVcqoQWEHIXFRbOc1TrVcFSgCZtQfSdc1RXgB3tjOn/krRHENUB4j00bfGjyl2mJrU37A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.7.tgz", + "integrity": "sha512-+A1NJmfM8WNDv5CLVQYJ5PshuRm/4cI6WMZRg1by1GwPIQPCTs1GLEUHwiiQGT5zDdyLiRM/l1G0Pv54gvtKIg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openharmony-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.7.tgz", + "integrity": "sha512-+KrvYb/C8zA9CU/g0sR6w2RBw7IGc5J2BPnc3dYc5VJxHCSF1yNMxTV5LQ7GuKteQXZtspjFbiuW5/dOj7H4Yw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.7.tgz", + "integrity": "sha512-ikktIhFBzQNt/QDyOL580ti9+5mL/YZeUPKU2ivGtGjdTYoqz6jObj6nOMfhASpS4GU4Q/Clh1QtxWAvcYKamA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.7.tgz", + "integrity": "sha512-7yRhbHvPqSpRUV7Q20VuDwbjW5kIMwTHpptuUzV+AA46kiPze5Z7qgt6CLCK3pWFrHeNfDd1VKgyP4O+ng17CA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.7.tgz", + "integrity": "sha512-SmwKXe6VHIyZYbBLJrhOoCJRB/Z1tckzmgTLfFYOfpMAx63BJEaL9ExI8x7v0oAO3Zh6D/Oi1gVxEYr5oUCFhw==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.7.tgz", + "integrity": "sha512-56hiAJPhwQ1R4i+21FVF7V8kSD5zZTdHcVuRFMW0hn753vVfQN8xlx4uOPT4xoGH0Z/oVATuR82AiqSTDIpaHg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@types/node": { + "version": "25.6.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.6.0.tgz", + "integrity": "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~7.19.0" + } + }, + "node_modules/dotenv": { + "version": "17.4.2", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.4.2.tgz", + "integrity": "sha512-nI4U3TottKAcAD9LLud4Cb7b2QztQMUEfHbvhTH09bqXTxnSie8WnjPALV/WMCrJZ6UV/qHJ6L03OqO3LcdYZw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, + "node_modules/esbuild": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.7.tgz", + "integrity": "sha512-IxpibTjyVnmrIQo5aqNpCgoACA/dTKLTlhMHihVHhdkxKyPO1uBBthumT0rdHmcsk9uMonIWS0m4FljWzILh3w==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.27.7", + "@esbuild/android-arm": "0.27.7", + "@esbuild/android-arm64": "0.27.7", + "@esbuild/android-x64": "0.27.7", + "@esbuild/darwin-arm64": "0.27.7", + "@esbuild/darwin-x64": "0.27.7", + "@esbuild/freebsd-arm64": "0.27.7", + "@esbuild/freebsd-x64": "0.27.7", + "@esbuild/linux-arm": "0.27.7", + "@esbuild/linux-arm64": "0.27.7", + "@esbuild/linux-ia32": "0.27.7", + "@esbuild/linux-loong64": "0.27.7", + "@esbuild/linux-mips64el": "0.27.7", + "@esbuild/linux-ppc64": "0.27.7", + "@esbuild/linux-riscv64": "0.27.7", + "@esbuild/linux-s390x": "0.27.7", + "@esbuild/linux-x64": "0.27.7", + "@esbuild/netbsd-arm64": "0.27.7", + "@esbuild/netbsd-x64": "0.27.7", + "@esbuild/openbsd-arm64": "0.27.7", + "@esbuild/openbsd-x64": "0.27.7", + "@esbuild/openharmony-arm64": "0.27.7", + "@esbuild/sunos-x64": "0.27.7", + "@esbuild/win32-arm64": "0.27.7", + "@esbuild/win32-ia32": "0.27.7", + "@esbuild/win32-x64": "0.27.7" + } + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/get-tsconfig": { + "version": "4.14.0", + "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.14.0.tgz", + "integrity": "sha512-yTb+8DXzDREzgvYmh6s9vHsSVCHeC0G3PI5bEXNBHtmshPnO+S5O7qgLEOn0I5QvMy6kpZN8K1NKGyilLb93wA==", + "dev": true, + "license": "MIT", + "dependencies": { + "resolve-pkg-maps": "^1.0.0" + }, + "funding": { + "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" + } + }, + "node_modules/resolve-pkg-maps": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", + "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" + } + }, + "node_modules/tsx": { + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz", + "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==", + "dev": true, + "license": "MIT", + "dependencies": { + "esbuild": "~0.27.0", + "get-tsconfig": "^4.7.5" + }, + "bin": { + "tsx": "dist/cli.mjs" + }, + "engines": { + "node": ">=18.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + } + }, + "node_modules/typescript": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-6.0.3.tgz", + "integrity": "sha512-y2TvuxSZPDyQakkFRPZHKFm+KKVqIisdg9/CZwm9ftvKXLP8NRWj38/ODjNbr43SsoXqNuAisEf1GdCxqWcdBw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "7.19.2", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.19.2.tgz", + "integrity": "sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg==", + "dev": true, + "license": "MIT" + } + } +} diff --git a/vault/skills/vault-x-bookmarks/package.json b/vault/skills/vault-x-bookmarks/package.json new file mode 100644 index 0000000..5d7e091 --- /dev/null +++ b/vault/skills/vault-x-bookmarks/package.json @@ -0,0 +1,15 @@ +{ + "name": "vault-x-bookmarks", + "private": true, + "scripts": { + "test:x-bookmarks": "node --import tsx --test scripts/x-bookmarks.test.ts" + }, + "dependencies": { + "dotenv": "^17.4.2" + }, + "devDependencies": { + "@types/node": "^25.6.0", + "tsx": "^4.21.0", + "typescript": "^6.0.3" + } +} diff --git a/vault/skills/vault-x-bookmarks/scripts/README.md b/vault/skills/vault-x-bookmarks/scripts/README.md new file mode 100644 index 0000000..a193817 --- /dev/null +++ b/vault/skills/vault-x-bookmarks/scripts/README.md @@ -0,0 +1,89 @@ +# X Bookmarks Helper + +TypeScript CLI used by the `vault-x-bookmarks` skill to capture selected X +bookmarks into a vault's `raw/sources/` directory. + +## Requirements + +- Node.js with npm. +- [`xurl`](https://github.com/xdevplatform/xurl) installed and available on + `PATH`. +- `xurl` authenticated with OAuth 2.0 for the X account whose bookmarks should + be captured. +- The authenticated `xurl` account needs these scopes: + - `bookmark.read` + - `tweet.read` + - `users.read` + - `offline.access` + +Do not use an app-only bearer token. Bookmarks are private user-context data, +and the script shells out to `xurl --auth oauth2`. + +## Setup + +From the skill root: + +```sh +cd vault/skills/vault-x-bookmarks +npm install +``` + +Verify that `xurl` can see your authenticated user: + +```sh +xurl auth status +xurl whoami +``` + +The script loads `.env` via `dotenv` before invoking `xurl`, so environment +variables needed by your local `xurl` setup can live in the skill root. + +## Run + +From the skill root: + +```sh +npx tsx scripts/x-bookmarks.ts --limit 15 +``` + +Options: + +- `--limit N` (default `15`, range `1..100`): maximum selected bookmarks to + capture. +- `--max-pages N` (default `10`, range `1..100`): maximum bookmark pages to + scan. +- `--head-pages N` (default `2`, range `1..100`): newest pages to scan before + continuing backlog catch-up. +- `--path PATH`: vault root. Defaults to the current working directory. + +Examples: + +```sh +npx tsx scripts/x-bookmarks.ts --limit 15 +npx tsx scripts/x-bookmarks.ts --limit 75 --max-pages 25 --head-pages 2 +npx tsx scripts/x-bookmarks.ts --path /path/to/vault --limit 20 +``` + +## Output + +The script writes: + +- source records to `raw/sources/` +- reviewed bookmark state to `raw/state/x-bookmarks/reviewed.jsonl` +- run summaries to `raw/state/x-bookmarks/runs.jsonl` +- catch-up cursor state to `raw/state/x-bookmarks/checkpoint.json` + +It prints a JSON summary with the `xurl whoami` user ID, pages fetched, captured +count, created source paths, and any errors. + +The script never mutates X bookmarks and does not route files into permanent +vault folders. Run `vault-ingest` afterward to classify captured source records. + +## Test + +From the skill root: + +```sh +npm run test:x-bookmarks +npx tsc --noEmit --module NodeNext --moduleResolution NodeNext --target ES2022 --types node scripts/x-bookmarks.ts scripts/x-bookmarks.test.ts +``` diff --git a/vault/skills/vault-x-bookmarks/scripts/x-bookmarks.test.ts b/vault/skills/vault-x-bookmarks/scripts/x-bookmarks.test.ts new file mode 100644 index 0000000..388c5d5 --- /dev/null +++ b/vault/skills/vault-x-bookmarks/scripts/x-bookmarks.test.ts @@ -0,0 +1,455 @@ +import assert from "node:assert/strict"; +import { + mkdir, + mkdtemp, + readFile, + readdir, + rm, + writeFile, +} from "node:fs/promises"; +import { tmpdir } from "node:os"; +import path from "node:path"; +import test from "node:test"; +import { + type BookmarkPost, + type XurlJsonRunner, + buildSourceMarkdown, + collectSelectedBookmarks, + mergePagesWithoutDuplicatePosts, + parseArgs, + readCheckpoint, + readReviewedIds, + run, + selectOldestReachableUnreviewed, + sourceFilename, + uniquePath, + // The script is loaded by tsx at runtime; the explicit extension is intentional. + // @ts-ignore TS5097 +} from "./x-bookmarks.ts"; + +function post(id: string, createdAt = `2026-01-${id.padStart(2, "0")}T00:00:00Z`): BookmarkPost { + return { + id, + text: `Post ${id}`, + authorId: "author-1", + authorName: "Jane Example", + authorHandle: "jane_example", + createdAt, + publicMetrics: {}, + links: [], + }; +} + +function tweet(id: string, nextText = `Post ${id}`): Record { + return { + id, + text: nextText, + author_id: "author-1", + created_at: `2026-01-${id.replace(/\D/g, "").padStart(2, "0").slice(-2)}T00:00:00Z`, + public_metrics: { + like_count: 1, + retweet_count: 2, + reply_count: 3, + quote_count: 4, + }, + }; +} + +function bookmarkResponse( + ids: string[], + nextToken: string | null = null +): Record { + return { + data: ids.map((id) => tweet(id)), + includes: { + users: [{ id: "author-1", name: "Jane Example", username: "jane_example" }], + }, + meta: nextToken ? { next_token: nextToken } : {}, + }; +} + +function fakeBookmarkClient( + pages: Map | Error> +): { runner: XurlJsonRunner; requestedTokens: Array; requestedArgs: string[][] } { + const requestedTokens: Array = []; + const requestedArgs: string[][] = []; + const runner: XurlJsonRunner = async (args) => { + requestedArgs.push(args); + assert.deepEqual(args.slice(0, 2), ["--auth", "oauth2"]); + const requestPath = args.at(-1) ?? ""; + assert.match(requestPath, /^\/2\/users\/user-1\/bookmarks\?/); + const query = new URLSearchParams(requestPath.split("?")[1] ?? ""); + const token = query.get("pagination_token"); + requestedTokens.push(token); + const page = pages.get(token ?? ""); + if (page instanceof Error) { + throw page; + } + if (!page) { + throw new Error(`Unexpected bookmark request token: ${token ?? "head"}`); + } + return page; + }; + + return { runner, requestedTokens, requestedArgs }; +} + +async function withTempDir(callback: (dir: string) => Promise): Promise { + const dir = await mkdtemp(path.join(tmpdir(), "x-bookmarks-test-")); + try { + return await callback(dir); + } finally { + await rm(dir, { recursive: true, force: true }); + } +} + +test("parseArgs returns defaults and validates flags", () => { + assert.deepEqual(parseArgs([]), { + limit: 15, + maxPages: 10, + headPages: 2, + vaultRoot: process.cwd(), + }); + + assert.deepEqual(parseArgs(["--limit", "7", "--max-pages", "8", "--head-pages", "3"]), { + limit: 7, + maxPages: 8, + headPages: 3, + vaultRoot: process.cwd(), + }); + + assert.equal(parseArgs(["--path", "relative-vault"]).vaultRoot, path.resolve("relative-vault")); + assert.throws(() => parseArgs(["--limit", "0"]), /--limit must be an integer from 1 to 100/); + assert.throws(() => parseArgs(["--max-pages", "101"]), /--max-pages must be an integer from 1 to 100/); + assert.throws(() => parseArgs(["--head-pages"]), /--head-pages requires a value/); + assert.throws(() => parseArgs(["--user-id", "123"]), /Unknown argument: --user-id/); +}); + +test("selectOldestReachableUnreviewed chooses oldest reachable unreviewed posts first", () => { + const selected = selectOldestReachableUnreviewed( + [ + [post("5"), post("4")], + [post("3"), post("2")], + [post("1")], + ], + new Set(["3"]), + 3 + ); + + assert.deepEqual( + selected.map((item) => item.id), + ["1", "2", "4"] + ); +}); + +test("mergePagesWithoutDuplicatePosts keeps the first occurrence", () => { + const merged = mergePagesWithoutDuplicatePosts([ + [ + [post("a"), post("b")], + [post("c"), post("a")], + ], + [[post("b"), post("d")]], + ]); + + assert.deepEqual( + merged.map((page) => page.map((item) => item.id)), + [["a", "b"], ["c"], ["d"]] + ); +}); + +test("buildSourceMarkdown creates an external source record", () => { + const markdown = buildSourceMarkdown( + { + ...post("123", "2026-02-03T04:05:06Z"), + text: "Useful bookmark about durable notes", + authorName: "Jane Doe", + authorHandle: "@jane_doe", + links: ["https://example.com/article"], + }, + "2026-02-04T00:00:00Z" + ); + + assert.match(markdown, /tags:\n - external/); + assert.match(markdown, /source: x-bookmark/); + assert.match(markdown, /source_url: https:\/\/x\.com\/jane_doe\/status\/123/); + assert.match(markdown, /posted_at: "2026-02-03T04:05:06Z"/); + assert.doesNotMatch(markdown, /vault_candidate_reason/); + assert.match(markdown, /## Metadata/); +}); + +test("sourceFilename includes date, source, identity, and post ID", () => { + assert.equal( + sourceFilename( + { + ...post("123", "2026-02-03T04:05:06Z"), + authorHandle: "@Jane Doe", + }, + "2026-02-04T00:00:00Z" + ), + "2026-02-03-x-bookmark-jane-doe-123.md" + ); +}); + +test("readReviewedIds tolerates missing files and parses JSONL", async () => { + await withTempDir(async (dir) => { + const reviewedPath = path.join(dir, "reviewed.jsonl"); + + assert.deepEqual(await readReviewedIds(reviewedPath), new Set()); + + await writeFile( + reviewedPath, + [ + JSON.stringify({ post_id: "one" }), + "", + JSON.stringify({ post_id: "two", ignored: true }), + JSON.stringify({ post_id: 3 }), + ].join("\n"), + "utf8" + ); + + assert.deepEqual(await readReviewedIds(reviewedPath), new Set(["one", "two"])); + }); +}); + +test("readCheckpoint tolerates missing checkpoint files", async () => { + await withTempDir(async (dir) => { + const checkpointPath = path.join(dir, "checkpoint.json"); + + assert.deepEqual(await readCheckpoint(checkpointPath), {}); + + await writeFile(checkpointPath, JSON.stringify({ backlog_token: "next" }), "utf8"); + assert.deepEqual(await readCheckpoint(checkpointPath), { backlog_token: "next" }); + }); +}); + +test("uniquePath avoids overwriting existing files", async () => { + await withTempDir(async (dir) => { + const filePath = path.join(dir, "source.md"); + await writeFile(filePath, "first", "utf8"); + await writeFile(path.join(dir, "source-2.md"), "second", "utf8"); + + assert.equal(await uniquePath(filePath), path.join(dir, "source-3.md")); + }); +}); + +test("collectSelectedBookmarks scans head then backlog and preserves a mid-page backlog cursor", async () => { + const pages = new Map | Error>([ + ["", bookmarkResponse(["h2", "h1"], "head-next")], + ["head-next", bookmarkResponse(["b2", "b1"], "older")], + ]); + const { runner, requestedTokens, requestedArgs } = fakeBookmarkClient(pages); + + const result = await collectSelectedBookmarks( + runner, + "user-1", + { limit: 3, maxPages: 3, headPages: 1, vaultRoot: process.cwd() }, + {}, + new Set() + ); + + assert.deepEqual( + result.selected.map((item) => item.id), + ["b1", "h1", "h2"] + ); + assert.equal(result.pagesFetched, 2); + assert.equal(result.nextBacklogToken, "head-next"); + assert.equal(result.backlogExhausted, false); + assert.deepEqual(requestedTokens, [null, "head-next"]); + assert.deepEqual(requestedArgs[0].slice(0, 2), ["--auth", "oauth2"]); +}); + +test("collectSelectedBookmarks resets a stale saved backlog cursor", async () => { + const pages = new Map | Error>([ + ["", bookmarkResponse(["h1"], "head-next")], + ["stale", new Error("Invalid pagination token")], + ["head-next", bookmarkResponse(["b1"])], + ]); + const { runner, requestedTokens } = fakeBookmarkClient(pages); + + const result = await collectSelectedBookmarks( + runner, + "user-1", + { limit: 1, maxPages: 3, headPages: 1, vaultRoot: process.cwd() }, + { backlog_token: "stale" }, + new Set(["h1"]) + ); + + assert.deepEqual( + result.selected.map((item) => item.id), + ["b1"] + ); + assert.equal(result.cursorReset, true); + assert.match(result.errors.join("\n"), /Saved backlog cursor was rejected/); + assert.deepEqual(requestedTokens, [null, "stale", "head-next"]); +}); + +test("collectSelectedBookmarks does not select duplicate head-page posts twice", async () => { + const pages = new Map | Error>([ + ["", bookmarkResponse(["h2", "h1", "h1"])], + ]); + const { runner, requestedTokens } = fakeBookmarkClient(pages); + + const result = await collectSelectedBookmarks( + runner, + "user-1", + { limit: 3, maxPages: 1, headPages: 1, vaultRoot: process.cwd() }, + {}, + new Set() + ); + + assert.deepEqual( + result.selected.map((item) => item.id), + ["h1", "h2"] + ); + assert.deepEqual(requestedTokens, [null]); +}); + +test("run appends JSONL records with a separator when the file lacks a trailing newline", async () => { + await withTempDir(async (vaultRoot) => { + const reviewedPath = path.join(vaultRoot, "raw", "state", "x-bookmarks", "reviewed.jsonl"); + await mkdir(path.dirname(reviewedPath), { recursive: true }); + await writeFile( + reviewedPath, + JSON.stringify({ + post_id: "already-reviewed", + reviewed_at: "2026-01-01T00:00:00Z", + run_id: "previous", + decision: "captured", + source_record_path: "raw/sources/old.md", + source: "main", + }), + "utf8" + ); + + const originalConsoleLog = console.log; + console.log = () => undefined; + const runner: XurlJsonRunner = async (args) => { + if (args[0] === "whoami") { + return { id: "user-1" }; + } + const requestPath = args.at(-1) ?? ""; + if (requestPath.includes("/bookmarks")) { + return bookmarkResponse(["fresh"]); + } + throw new Error(`Unexpected live-style request path: ${requestPath}`); + }; + + try { + await run({ limit: 1, maxPages: 1, headPages: 1, vaultRoot }, runner); + } finally { + console.log = originalConsoleLog; + } + + const reviewedContent = await readFile(reviewedPath, "utf8"); + assert.match(reviewedContent, /}\n\{/); + assert.equal(reviewedContent.endsWith("\n"), true); + assert.deepEqual( + reviewedContent + .trim() + .split("\n") + .map((line) => JSON.parse(line).post_id), + ["already-reviewed", "fresh"] + ); + + const sourceFiles = await readdir(path.join(vaultRoot, "raw", "sources")); + assert.equal(sourceFiles.length, 1); + const sourceContent = await readFile( + path.join(vaultRoot, "raw", "sources", sourceFiles[0]), + "utf8" + ); + assert.match(sourceContent, /post_id: "fresh"/); + }); +}); + +test("run captures full note_tweet text when X truncates long posts", async () => { + await withTempDir(async (vaultRoot) => { + const fullText = + "These 6 principles will make your coding agent write better code.\n\nVerify, don't assume — Run it. Test it. Prove it works."; + const originalConsoleLog = console.log; + console.log = () => undefined; + const runner: XurlJsonRunner = async (args) => { + if (args[0] === "whoami") { + return { id: "user-1" }; + } + const requestPath = args.at(-1) ?? ""; + if (requestPath.includes("/bookmarks")) { + assert.match(requestPath, /tweet\.fields=.*note_tweet/); + return { + ...bookmarkResponse(["long-post"]), + data: [ + { + ...tweet("long-post", "These 6 principles will make your coding agent write better code."), + note_tweet: { + text: fullText, + entities: { + urls: [ + { + url: "https://t.co/example", + expanded_url: "https://agents.md/", + }, + ], + }, + }, + }, + ], + }; + } + throw new Error(`Unexpected live-style request path: ${requestPath}`); + }; + + try { + await run({ limit: 1, maxPages: 1, headPages: 1, vaultRoot }, runner); + } finally { + console.log = originalConsoleLog; + } + + const sourceFiles = await readdir(path.join(vaultRoot, "raw", "sources")); + assert.equal(sourceFiles.length, 1); + const sourceContent = await readFile( + path.join(vaultRoot, "raw", "sources", sourceFiles[0]), + "utf8" + ); + assert.match(sourceContent, /Verify, don't assume/); + assert.match(sourceContent, /https:\/\/agents\.md\//); + }); +}); + +test("run removes a source record when reviewed append fails", async () => { + await withTempDir(async (vaultRoot) => { + const stateDir = path.join(vaultRoot, "raw", "state", "x-bookmarks"); + const reviewedPath = path.join(stateDir, "reviewed.jsonl"); + const checkpointPath = path.join(stateDir, "checkpoint.json"); + const sourcesDir = path.join(vaultRoot, "raw", "sources"); + await mkdir(stateDir, { recursive: true }); + await writeFile(reviewedPath, "", "utf8"); + + const originalConsoleLog = console.log; + console.log = () => undefined; + const runner: XurlJsonRunner = async (args) => { + if (args[0] === "whoami") { + return { id: "user-1" }; + } + const requestPath = args.at(-1) ?? ""; + if (requestPath.includes("/bookmarks")) { + await rm(reviewedPath, { force: true }); + await mkdir(reviewedPath); + return bookmarkResponse(["cleanup"]); + } + throw new Error(`Unexpected live-style request path: ${requestPath}`); + }; + + try { + await assert.rejects( + run({ limit: 1, maxPages: 1, headPages: 1, vaultRoot }, runner), + /Run completed with source\/state write errors; checkpoint was not advanced\./ + ); + } finally { + console.log = originalConsoleLog; + } + + assert.deepEqual(await readdir(sourcesDir), []); + await assert.rejects(readFile(checkpointPath, "utf8"), (error: unknown) => { + return error instanceof Error && "code" in error && error.code === "ENOENT"; + }); + }); +}); diff --git a/vault/skills/vault-x-bookmarks/scripts/x-bookmarks.ts b/vault/skills/vault-x-bookmarks/scripts/x-bookmarks.ts new file mode 100644 index 0000000..fd5025b --- /dev/null +++ b/vault/skills/vault-x-bookmarks/scripts/x-bookmarks.ts @@ -0,0 +1,930 @@ +import { + mkdir, + open, + readFile, + rename, + rm, + stat, + writeFile, +} from 'node:fs/promises' +import { execFile } from 'node:child_process' +import path from 'node:path' +import { promisify } from 'node:util' +import 'dotenv/config' + +export interface CliOptions { + limit: number + maxPages: number + headPages: number + vaultRoot: string +} + +export interface BookmarkPost { + id: string + text: string + authorId?: string + authorName?: string + authorHandle?: string + createdAt?: string + publicMetrics?: { + like_count?: number + retweet_count?: number + reply_count?: number + quote_count?: number + } + links: string[] +} + +export interface ReviewedEntry { + post_id: string + reviewed_at: string + run_id: string + decision: 'captured' + source_record_path: string + source: 'main' +} + +export interface RunEntry { + run_id: string + started_at: string + finished_at: string + limit: number + max_pages: number + pages_fetched: number + evaluated_count: number + captured_count: number + errors: string[] +} + +export interface Checkpoint { + last_successful_run_at?: string + last_pages_fetched?: number + head_pages?: number + backlog_token?: string | null + backlog_exhausted?: boolean +} + +export interface BookmarkPage { + posts: BookmarkPost[] + requestToken: string | null + nextToken: string | null +} + +export interface CollectionResult { + selected: BookmarkPost[] + pagesFetched: number + nextBacklogToken: string | null + backlogExhausted: boolean + cursorReset: boolean + errors: string[] +} + +type JsonRecord = Record +export type XurlJsonRunner = (args: string[]) => Promise + +const DEFAULT_OPTIONS: CliOptions = { + limit: 15, + maxPages: 10, + headPages: 2, + vaultRoot: process.cwd(), +} + +const SAVED_CURSOR_REJECTED = + 'Saved backlog cursor was rejected; reset catch-up cursor to current head scan.' + +const execFileAsync = promisify(execFile) +const XURL_MAX_BUFFER_BYTES = 64 * 1024 * 1024 + +export function parseArgs(argv: string[]): CliOptions { + const options = { ...DEFAULT_OPTIONS } + + for (let index = 0; index < argv.length; index += 1) { + const arg = argv[index] + + if (arg === '--limit') { + options.limit = parseIntegerFlag(arg, argv[index + 1]) + index += 1 + continue + } + + if (arg === '--max-pages') { + options.maxPages = parseIntegerFlag(arg, argv[index + 1]) + index += 1 + continue + } + + if (arg === '--head-pages') { + options.headPages = parseIntegerFlag(arg, argv[index + 1]) + index += 1 + continue + } + + if (arg === '--path') { + const next = argv[index + 1] + if (next === undefined || next.startsWith('--')) { + throw new Error(`${arg} requires a value`) + } + options.vaultRoot = path.resolve(next) + index += 1 + continue + } + + throw new Error(`Unknown argument: ${arg}`) + } + + return options +} + +export function selectOldestReachableUnreviewed( + pagesNewestFirst: BookmarkPost[][], + reviewedIds: Set, + limit: number, +): BookmarkPost[] { + const seen = new Set() + + return pagesNewestFirst + .flat() + .filter((post) => { + if (reviewedIds.has(post.id) || seen.has(post.id)) { + return false + } + seen.add(post.id) + return true + }) + .slice(-limit) + .reverse() +} + +export function mergePagesWithoutDuplicatePosts( + pageGroups: BookmarkPost[][][], +): BookmarkPost[][] { + const seen = new Set() + const merged: BookmarkPost[][] = [] + + for (const group of pageGroups) { + for (const page of group) { + const uniquePage = page.filter((post) => { + if (seen.has(post.id)) { + return false + } + seen.add(post.id) + return true + }) + merged.push(uniquePage) + } + } + + return merged +} + +function recordNewestFirstOrder( + posts: BookmarkPost[], + postOrder: Map, + startOrdinal: number, +): number { + let ordinal = startOrdinal + for (const post of posts) { + if (!postOrder.has(post.id)) { + postOrder.set(post.id, ordinal) + } + ordinal += 1 + } + return ordinal +} + +function orderSelectedOldestToNewest( + selected: BookmarkPost[], + postOrder: Map, +): BookmarkPost[] { + return [...selected].sort((left, right) => { + const leftOrder = postOrder.get(left.id) ?? -1 + const rightOrder = postOrder.get(right.id) ?? -1 + return rightOrder - leftOrder + }) +} + +export function buildSourceMarkdown( + post: BookmarkPost, + capturedAt: string, +): string { + const handle = normalizeHandle(post.authorHandle) ?? 'unknown' + const author = post.authorName?.trim() || 'Unknown author' + const postedAt = post.createdAt?.trim() || 'unknown' + const sourceUrl = `https://x.com/${handle}/status/${post.id}` + const title = markdownTitle(post.text) + const links = post.links.length + ? post.links.map((link) => `- ${link}`).join('\n') + : '- unknown' + const metrics = post.publicMetrics ?? {} + + return `--- +tags: + - external +source: x-bookmark +source_url: ${sourceUrl} +post_id: "${yamlEscape(post.id)}" +author: "${yamlEscape(author)}" +handle: "${yamlEscape(handle)}" +posted_at: "${yamlEscape(postedAt)}" +captured_at: "${yamlEscape(capturedAt)}" +--- + +# X Bookmark: ${title} + +Source: [${sourceUrl}](${sourceUrl}) +Author: ${author} (@${handle}) +Posted: ${postedAt} + +## Post + +${post.text.trim() || 'unknown'} + +## Links + +${links} + +## Metadata + +- Likes: ${metricValue(metrics.like_count)} +- Reposts: ${metricValue(metrics.retweet_count)} +- Replies: ${metricValue(metrics.reply_count)} +- Quotes: ${metricValue(metrics.quote_count)} +` +} + +export function sourceFilename(post: BookmarkPost, capturedAt: string): string { + const day = dateDay(post.createdAt) ?? dateDay(capturedAt) ?? 'unknown-date' + const identity = slugify( + normalizeHandle(post.authorHandle) ?? post.authorName ?? 'unknown', + ) + return `${day}-x-bookmark-${identity}-${post.id}.md` +} + +export async function readReviewedIds( + reviewedPath: string, +): Promise> { + if (!(await exists(reviewedPath))) { + return new Set() + } + + const reviewedIds = new Set() + const content = await readFile(reviewedPath, 'utf8') + for (const line of content.split(/\r?\n/)) { + if (!line.trim()) { + continue + } + const entry = JSON.parse(line) as { post_id?: unknown } + if (typeof entry.post_id === 'string') { + reviewedIds.add(entry.post_id) + } + } + return reviewedIds +} + +export async function readCheckpoint( + checkpointPath: string, +): Promise { + if (!(await exists(checkpointPath))) { + return {} + } + return JSON.parse(await readFile(checkpointPath, 'utf8')) as Checkpoint +} + +export async function uniquePath(filePath: string): Promise { + if (!(await exists(filePath))) { + return filePath + } + + const parsed = path.parse(filePath) + for (let copy = 2; ; copy += 1) { + const candidate = path.join( + parsed.dir, + `${parsed.name}-${copy}${parsed.ext}`, + ) + if (!(await exists(candidate))) { + return candidate + } + } +} + +export async function collectSelectedBookmarks( + xurlJson: XurlJsonRunner, + userId: string, + options: CliOptions, + checkpoint: Checkpoint, + reviewedIds: Set, +): Promise { + const errors: string[] = [] + const selected: BookmarkPost[] = [] + const selectedIds = new Set(reviewedIds) + const postOrder = new Map() + let pagesFetched = 0 + let cursorReset = false + let backlogExhausted = false + let nextBacklogToken: string | null = checkpoint.backlog_token ?? null + let headCursor: string | null = null + let newestFirstOrdinal = 0 + + const headPages: BookmarkPost[][] = [] + const headPageLimit = Math.min(options.headPages, options.maxPages) + let token: string | null = null + + for (let pageIndex = 0; pageIndex < headPageLimit; pageIndex += 1) { + const page = await fetchBookmarkPage(xurlJson, userId, token) + pagesFetched += 1 + errors.push(...page.errors) + headPages.push(page.bookmarkPage.posts) + newestFirstOrdinal = recordNewestFirstOrder( + page.bookmarkPage.posts, + postOrder, + newestFirstOrdinal, + ) + headCursor = page.bookmarkPage.nextToken + token = page.bookmarkPage.nextToken + + if (!token) { + break + } + } + + for (const post of selectOldestReachableUnreviewed( + headPages, + reviewedIds, + options.limit, + )) { + selected.push(post) + selectedIds.add(post.id) + } + + if (selected.length >= options.limit) { + return { + selected: orderSelectedOldestToNewest(selected, postOrder), + pagesFetched, + nextBacklogToken: checkpoint.backlog_token ?? headCursor, + backlogExhausted: checkpoint.backlog_exhausted ?? false, + cursorReset, + errors, + } + } + + if (checkpoint.backlog_exhausted) { + return { + selected: orderSelectedOldestToNewest(selected, postOrder), + pagesFetched, + nextBacklogToken: null, + backlogExhausted: true, + cursorReset, + errors, + } + } + + token = checkpoint.backlog_token ?? headCursor + nextBacklogToken = token + + while (selected.length < options.limit && pagesFetched < options.maxPages) { + if (!token) { + backlogExhausted = true + nextBacklogToken = null + break + } + + let page: NormalizedPage + try { + page = await fetchBookmarkPage(xurlJson, userId, token) + } catch (error) { + if ( + checkpoint.backlog_token && + token === checkpoint.backlog_token && + !cursorReset && + isRejectedCursorError(error) + ) { + errors.push(SAVED_CURSOR_REJECTED) + cursorReset = true + token = headCursor + nextBacklogToken = token + if (!token) { + backlogExhausted = true + nextBacklogToken = null + break + } + continue + } + throw error + } + + pagesFetched += 1 + errors.push(...page.errors) + newestFirstOrdinal = recordNewestFirstOrder( + page.bookmarkPage.posts, + postOrder, + newestFirstOrdinal, + ) + + const oldestToNewest = [...page.bookmarkPage.posts].reverse() + for (let index = 0; index < oldestToNewest.length; index += 1) { + const post = oldestToNewest[index] + if (selectedIds.has(post.id)) { + continue + } + selected.push(post) + selectedIds.add(post.id) + if (selected.length >= options.limit) { + nextBacklogToken = + index < oldestToNewest.length - 1 + ? page.bookmarkPage.requestToken + : page.bookmarkPage.nextToken + break + } + } + + if (selected.length >= options.limit) { + break + } + + token = page.bookmarkPage.nextToken + nextBacklogToken = token + if (!token) { + backlogExhausted = true + nextBacklogToken = null + break + } + } + + return { + selected: orderSelectedOldestToNewest(selected, postOrder), + pagesFetched, + nextBacklogToken, + backlogExhausted, + cursorReset, + errors, + } +} + +export async function run( + options: CliOptions, + xurlJson: XurlJsonRunner = runXurlJsonCommand, +): Promise { + const runId = new Date().toISOString().replace(/[-:.TZ]/g, '') + const startedAt = new Date().toISOString() + const stateDir = path.join(options.vaultRoot, 'raw', 'state', 'x-bookmarks') + const sourcesDir = path.join(options.vaultRoot, 'raw', 'sources') + const reviewedPath = path.join(stateDir, 'reviewed.jsonl') + const runsPath = path.join(stateDir, 'runs.jsonl') + const checkpointPath = path.join(stateDir, 'checkpoint.json') + const sourceRecords: string[] = [] + const writeErrors: string[] = [] + + await mkdir(stateDir, { recursive: true }) + await mkdir(sourcesDir, { recursive: true }) + + const reviewedIds = await readReviewedIds(reviewedPath) + const checkpoint = await readCheckpoint(checkpointPath) + const me = await xurlJson(['whoami']) + const userId = getNestedString(asRecord(me), ['data', 'id']) + ?? stringValue(asRecord(me).id) + if (!userId) { + throw new Error('Failed to resolve authenticated user through xurl whoami.') + } + + const collection = await collectSelectedBookmarks( + xurlJson, + userId, + options, + checkpoint, + reviewedIds, + ) + + for (const post of collection.selected) { + const capturedAt = new Date().toISOString() + const desiredPath = path.join(sourcesDir, sourceFilename(post, capturedAt)) + + try { + const sourcePath = await uniquePath(desiredPath) + await writeFile(sourcePath, buildSourceMarkdown(post, capturedAt), { + encoding: 'utf8', + flag: 'wx', + }) + const reviewedEntry: ReviewedEntry = { + post_id: post.id, + reviewed_at: capturedAt, + run_id: runId, + decision: 'captured', + source_record_path: path.relative(options.vaultRoot, sourcePath), + source: 'main', + } + try { + await appendJsonLine(reviewedPath, reviewedEntry) + } catch (error) { + try { + await rm(sourcePath, { force: true }) + } catch (cleanupError) { + throw new Error( + `${errorMessage(error)}; failed to remove orphaned source record ${sourcePath}: ${errorMessage(cleanupError)}`, + ) + } + throw error + } + sourceRecords.push(reviewedEntry.source_record_path) + } catch (error) { + writeErrors.push(errorMessage(error)) + } + } + + const allErrors = [...collection.errors, ...writeErrors] + const finishedAt = new Date().toISOString() + const runEntry: RunEntry = { + run_id: runId, + started_at: startedAt, + finished_at: finishedAt, + limit: options.limit, + max_pages: options.maxPages, + pages_fetched: collection.pagesFetched, + evaluated_count: collection.selected.length, + captured_count: sourceRecords.length, + errors: allErrors, + } + + await appendJsonLine(runsPath, runEntry) + + if (writeErrors.length === 0) { + const nextCheckpoint: Checkpoint = { + last_successful_run_at: finishedAt, + last_pages_fetched: collection.pagesFetched, + head_pages: options.headPages, + backlog_token: collection.nextBacklogToken, + backlog_exhausted: collection.backlogExhausted, + } + await writeCheckpointAtomically(checkpointPath, nextCheckpoint) + } + + const summary = { + user_id: userId, + ...runEntry, + source_records: sourceRecords, + next_backlog_token: collection.nextBacklogToken, + backlog_exhausted: collection.backlogExhausted, + cursor_reset: collection.cursorReset, + } + console.log(JSON.stringify(summary, null, 2)) + + if (writeErrors.length > 0) { + throw new Error( + 'Run completed with source/state write errors; checkpoint was not advanced.', + ) + } +} + +interface NormalizedPage { + bookmarkPage: BookmarkPage + errors: string[] +} + +async function fetchBookmarkPage( + xurlJson: XurlJsonRunner, + userId: string, + paginationToken: string | null, +): Promise { + const query = new URLSearchParams({ + max_results: '100', + expansions: 'author_id,attachments.media_keys', + 'tweet.fields': 'author_id,created_at,entities,note_tweet,public_metrics', + 'user.fields': 'id,name,username', + 'media.fields': 'url,preview_image_url,type', + }) + if (paginationToken) { + query.set('pagination_token', paginationToken) + } + const response = await xurlJson([ + '--auth', + 'oauth2', + `/2/users/${userId}/bookmarks?${query.toString()}`, + ]) + + return normalizeBookmarkPage(response, paginationToken) +} + +async function runXurlJsonCommand(args: string[]): Promise { + try { + const { stdout } = await execFileAsync('xurl', args, { + maxBuffer: XURL_MAX_BUFFER_BYTES, + }) + return JSON.parse(stdout) as JsonRecord + } catch (error) { + throw new Error(formatXurlError(error)) + } +} + +function formatXurlError(error: unknown): string { + if (!(error instanceof Error)) { + return 'xurl execution failed' + } + + const parts = [error.message] + const detail = error as { stdout?: unknown; stderr?: unknown } + if (typeof detail.stdout === 'string' && detail.stdout.trim().length > 0) { + parts.push(detail.stdout.trim()) + } + if (typeof detail.stderr === 'string' && detail.stderr.trim().length > 0) { + parts.push(detail.stderr.trim()) + } + return parts.join('\n') +} + +function normalizeBookmarkPage( + response: unknown, + requestToken: string | null, +): NormalizedPage { + const record = asRecord(response) + const data = asArray(record.data) + const includes = asRecord(record.includes) + const users = new Map() + + for (const user of asArray(includes.users)) { + const userRecord = asRecord(user) + const id = stringValue(userRecord.id) + if (id) { + users.set(id, userRecord) + } + } + + const posts = data + .map((item) => normalizePost(asRecord(item), users)) + .filter((post): post is BookmarkPost => post !== null) + const meta = asRecord(record.meta) + const nextToken = + stringValue(meta.next_token) ?? stringValue(meta.nextToken) ?? null + const errors = asArray(record.errors).map((error) => errorMessage(error)) + + return { + bookmarkPage: { + posts, + requestToken, + nextToken, + }, + errors, + } +} + +function normalizePost( + tweet: JsonRecord, + users: Map, +): BookmarkPost | null { + const id = stringValue(tweet.id) + if (!id) { + return null + } + + const authorId = + stringValue(tweet.author_id) ?? stringValue(tweet.authorId) ?? undefined + const author = authorId ? users.get(authorId) : undefined + const publicMetrics = + optionalRecord(tweet.public_metrics) ?? optionalRecord(tweet.publicMetrics) + const noteTweet = + optionalRecord(tweet.note_tweet) ?? optionalRecord(tweet.noteTweet) + const text = + stringValue(noteTweet?.text) ?? stringValue(tweet.text) ?? '' + + return { + id, + text, + authorId, + authorName: author ? (stringValue(author.name) ?? undefined) : undefined, + authorHandle: author + ? (stringValue(author.username) ?? undefined) + : (stringValue(tweet.username) ?? undefined), + createdAt: + stringValue(tweet.created_at) ?? + stringValue(tweet.createdAt) ?? + undefined, + publicMetrics: { + like_count: numberValue( + publicMetrics?.like_count ?? publicMetrics?.likeCount, + ), + retweet_count: numberValue( + publicMetrics?.retweet_count ?? publicMetrics?.retweetCount, + ), + reply_count: numberValue( + publicMetrics?.reply_count ?? publicMetrics?.replyCount, + ), + quote_count: numberValue( + publicMetrics?.quote_count ?? publicMetrics?.quoteCount, + ), + }, + links: extractLinks(tweet, noteTweet), + } +} + +function extractLinks( + tweet: JsonRecord, + noteTweet?: JsonRecord, +): string[] { + const entityGroups = [asRecord(tweet.entities), asRecord(noteTweet?.entities)] + const seen = new Set() + const links: string[] = [] + + for (const entities of entityGroups) { + for (const url of asArray(entities?.urls)) { + const urlRecord = asRecord(url) + const link = + stringValue(urlRecord.unwound_url) ?? + stringValue(urlRecord.unwoundUrl) ?? + stringValue(urlRecord.expanded_url) ?? + stringValue(urlRecord.expandedUrl) ?? + stringValue(urlRecord.url) + if (link && !seen.has(link)) { + seen.add(link) + links.push(link) + } + } + } + + return links +} + +async function appendJsonLine(filePath: string, entry: unknown): Promise { + const prefix = (await needsJsonLineSeparator(filePath)) ? '\n' : '' + await writeFile(filePath, `${prefix}${JSON.stringify(entry)}\n`, { + encoding: 'utf8', + flag: 'a', + }) +} + +async function needsJsonLineSeparator(filePath: string): Promise { + let fileInfo: Awaited> + try { + fileInfo = await stat(filePath) + } catch (error) { + if (isNodeError(error) && error.code === 'ENOENT') { + return false + } + throw error + } + + if (fileInfo.size === 0) { + return false + } + + const handle = await open(filePath, 'r') + try { + const buffer = Buffer.alloc(1) + await handle.read(buffer, 0, 1, fileInfo.size - 1) + return buffer[0] !== 10 + } finally { + await handle.close() + } +} + +async function writeCheckpointAtomically( + checkpointPath: string, + checkpoint: Checkpoint, +): Promise { + const tempPath = path.join( + path.dirname(checkpointPath), + `.${path.basename(checkpointPath)}.${process.pid}.${Date.now()}.tmp`, + ) + + try { + await writeFile( + tempPath, + `${JSON.stringify(checkpoint, null, 2)}\n`, + 'utf8', + ) + await rename(tempPath, checkpointPath) + } catch (error) { + await rm(tempPath, { force: true }).catch(() => undefined) + throw error + } +} + +async function exists(filePath: string): Promise { + try { + await stat(filePath) + return true + } catch (error) { + if (isNodeError(error) && error.code === 'ENOENT') { + return false + } + throw error + } +} + +function parseIntegerFlag(flag: string, value: string | undefined): number { + if (value === undefined || value.startsWith('--')) { + throw new Error(`${flag} requires a value`) + } + if (!/^(?:[1-9][0-9]?)$|^100$/.test(value)) { + throw new Error(`${flag} must be an integer from 1 to 100`) + } + return Number(value) +} + +function markdownTitle(text: string): string { + const title = text.replace(/\s+/g, ' ').trim().slice(0, 80) + return title || 'Untitled' +} + +function yamlEscape(value: string): string { + return value.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n') +} + +function normalizeHandle(handle: string | undefined): string | undefined { + const normalized = handle?.trim().replace(/^@+/, '') + return normalized || undefined +} + +function metricValue(value: number | undefined): string { + return typeof value === 'number' ? String(value) : 'unknown' +} + +function dateDay(value: string | undefined): string | undefined { + if (!value) { + return undefined + } + const date = new Date(value) + if (Number.isNaN(date.getTime())) { + return undefined + } + return date.toISOString().slice(0, 10) +} + +function slugify(value: string): string { + const slug = value + .trim() + .toLowerCase() + .replace(/^@+/, '') + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, '') + return slug || 'unknown' +} + +function getNestedString( + record: JsonRecord, + pathParts: string[], +): string | undefined { + let current: unknown = record + for (const part of pathParts) { + current = asRecord(current)?.[part] + } + return stringValue(current) ?? undefined +} + +function asRecord(value: unknown): JsonRecord { + if (typeof value === 'object' && value !== null && !Array.isArray(value)) { + return value as JsonRecord + } + return {} +} + +function optionalRecord(value: unknown): JsonRecord | undefined { + if (typeof value === 'object' && value !== null && !Array.isArray(value)) { + return value as JsonRecord + } + return undefined +} + +function asArray(value: unknown): unknown[] { + return Array.isArray(value) ? value : [] +} + +function stringValue(value: unknown): string | undefined { + return typeof value === 'string' && value.length > 0 ? value : undefined +} + +function numberValue(value: unknown): number | undefined { + return typeof value === 'number' && Number.isFinite(value) ? value : undefined +} + +function errorMessage(error: unknown): string { + if (error instanceof Error) { + return error.message + } + if (typeof error === 'string') { + return error + } + try { + return JSON.stringify(error) + } catch { + return String(error) + } +} + +function isRejectedCursorError(error: unknown): boolean { + const message = errorMessage(error).toLowerCase() + return ( + message.includes('pagination') || + message.includes('token') || + message.includes('cursor') || + message.includes('invalid') + ) +} + +function isNodeError(error: unknown): error is NodeJS.ErrnoException { + return error instanceof Error && 'code' in error +} + +if (typeof require !== 'undefined' && require.main === module) { + run(parseArgs(process.argv.slice(2))).catch((error) => { + console.error(errorMessage(error)) + process.exit(1) + }) +}