diff --git a/dashboard/assets/scripts/dashboard.js b/dashboard/assets/scripts/dashboard.js index f570f438..6bdc7e64 100644 --- a/dashboard/assets/scripts/dashboard.js +++ b/dashboard/assets/scripts/dashboard.js @@ -5,6 +5,10 @@ "use strict"; +String.prototype.removePrefix = function (prefix) { + return this.startsWith(prefix) ? this.substr(prefix.length) : this.toString(); +}; + function assert(condition, message) { if (!condition) { throw message || "Assertion failed"; @@ -122,14 +126,6 @@ function regExpEscape(s) { return escaped; } -function addAnyChangeListener(elem, func) { - // DOM0 handler for convenient use by Clear button - elem.onchange = func; - elem.addEventListener("keydown", func, false); - elem.addEventListener("paste", func, false); - elem.addEventListener("input", func, false); -} - function scrollToBottom(elem) { // Scroll to the bottom. To avoid serious performance problems in Firefox, // use a big number instead of elem.scrollHeight. @@ -225,10 +221,11 @@ class JobsTracker { } class JobRenderInfo { - constructor(logWindow, logSegment, statsElements, jobNote, lineCountWindow, lineCountSegments) { + constructor(logWindow, logSegment, statsElements, jobUrl, jobNote, lineCountWindow, lineCountSegments) { this.logWindow = logWindow; this.logSegment = logSegment; this.statsElements = statsElements; + this.jobUrl = jobUrl; this.jobNote = jobNote; this.lineCountWindow = lineCountWindow; this.lineCountSegments = lineCountSegments; @@ -283,7 +280,25 @@ class JobsRenderer { constructor(container, filterBox, historyLines, showNicks, contextMenuRenderer) { this.container = container; this.filterBox = filterBox; - addAnyChangeListener(this.filterBox, () => this.applyFilter()); + this.filterTimeout = null; + this.filterBox.onchange = (e) => { + const repeats = [ + "insertText", + "deleteContent", + "deleteContentForward", + "deleteContentBackward", + ]; + let ms = e && e.inputType && repeats.includes(e.inputType) ? 100 : 0; + ms = !this.filterBox.value ? 0 : ms; + clearTimeout(this.filterTimeout); + this.filterTimeout = setTimeout(() => { + if (this.filterBox.value !== this.filterBox.old) { + this.applyFilter(); + this.filterBox.old = this.filterBox.value; + } + }, ms); + }; + this.filterBox.oninput = this.filterBox.onchange; this.filterBox.onkeypress = (ev) => { // Don't let `j` or `k` in the filter box cause the job window to switch ev.stopPropagation(); @@ -298,6 +313,7 @@ class JobsRenderer { this.mouseInside = null; this.numCrawls = byId("num-crawls"); this._aligned = true; + this.pipelines = {}; } _getNextJobInSorted(ident) { @@ -314,6 +330,24 @@ class JobsRenderer { return h("div"); } + pipelineInfo(job) { + const pipeline_id = job.pipeline_id; + const pipeline_just_id = pipeline_id.removePrefix("pipeline:"); + const pipeline_nick = this.pipelines[pipeline_id]; + return [ + `pipeline ${pipeline_nick ?? "unknown"} ${pipeline_just_id}`, + pipeline_nick ?? pipeline_just_id, + ] + } + + updatePipelines(pipelines) { + this.pipelines = pipelines; + for (const job of this.jobs.sorted) { + const pipeline = this.renderInfo[job.ident].statsElements.pipeline; + [pipeline.title, pipeline.textContent] = this.pipelineInfo(job); + } + } + _createLogContainer(jobData) { const ident = jobData.ident; const beforeJob = this._getNextJobInSorted(ident); @@ -352,6 +386,9 @@ class JobsRenderer { return s; }; + + const [pipeline_title, pipeline_text] = this.pipelineInfo(jobData); + const statsElements = { mb: h("span", { className: `inline-stat ${maybeAligned("job-mb")}` }, "?"), responses: h("span", { className: `inline-stat ${maybeAligned("job-responses")}` }, "?"), @@ -359,7 +396,17 @@ class JobsRenderer { queueLength: h("span", { className: `inline-stat ${maybeAligned("job-in-queue")}` }, "? in q."), connections: h("span", { className: `inline-stat ${maybeAligned("job-connections")}` }, "?"), delay: h("span", { className: `inline-stat ${maybeAligned("job-delay")}` }, "? ms delay"), - ignores: h("span", { className: "job-ignores" }, "?"), + ignores: h("a", { + className: "job-ignores", + href: `//${ds.host}${ds.port}/ignores/${ident}?compact=true`, + onclick: (ev) => { ev.stopPropagation(); }, + }, "?" ), + pipeline: h("a", { + className: `inline-stat ${maybeAligned("job-pipeline")}`, + href: `//${ds.host}${ds.port}/pipelines?initialFilter=${jobData.pipeline_id}`, + title: pipeline_title, + onclick: (ev) => { ev.stopPropagation(); }, + }, pipeline_text), jobInfo: null /* set later */, }; @@ -408,16 +455,19 @@ class JobsRenderer { statsElements.delay, "; ", statsElements.ignores, + "; ", + statsElements.pipeline, ], ), ]); + const jobUrl = statsElements.jobInfo.querySelector(".job-url"); const logWindow = h("div", logWindowAttrs, logSegment); const div = h("div", { className: "log-container", id: `log-container-${ident}` }, [ h("div", { className: "job-header" }, [statsElements.jobInfo, h("span", { className: "job-ident" }, ident)]), logWindow, ]); - this.renderInfo[ident] = new JobRenderInfo(logWindow, logSegment, statsElements, jobNote, 0, [0]); + this.renderInfo[ident] = new JobRenderInfo(logWindow, logSegment, statsElements, jobUrl, jobNote, 0, [0]); this.container.insertBefore(div, beforeElement); // Filter hasn't changed, but we might need to filter out the new job, or // add/remove log-window-expanded class @@ -480,12 +530,17 @@ class JobsRenderer { logSegment.appendChild(h("div", Reusable.obj_className_line_stdout, line)); renderedLines += 1; - // Check for 'Finished RsyncUpload for Item' - // instead of 'Starting MarkItemAsDone for Item' - // because the latter is often missing - if (/^Finished RsyncUpload for Item/.test(line)) { + // Check for several completion messages + // because some of them are often missing + // Ignore error jobs as they get done messages. + if (!info.statsElements.jobInfo.classList.contains("job-info-fatal") && + !info.statsElements.jobInfo.classList.contains("job-info-aborted") && + !info.statsElements.jobInfo.classList.contains("job-info-failed") && + /^ *[1-9][0-9]* bytes\.$|^Starting (RelabelIfAborted|MarkItemAsDone) for Item$|^Finished (WgetDownload|MoveFiles|StopHeartbeat) for Item$/.test(line)) { info.statsElements.jobInfo.classList.add("job-info-done"); this.jobs.markFinished(ident); + } else if (/^ *0 bytes\.$/.test(line)) { + info.statsElements.jobInfo.classList.add("job-info-failed"); } else if ( /^CRITICAL (Sorry|Please report)|^ERROR Fatal exception|No space left on device|^Fatal Python error:|^(Thread|Current thread) 0x/.test( line, @@ -506,6 +561,7 @@ class JobsRenderer { } else if (/^Received item /.test(line)) { // Clear other statuses if a job restarts with the same job ID info.statsElements.jobInfo.classList.remove("job-info-done"); + info.statsElements.jobInfo.classList.remove("job-info-failed"); info.statsElements.jobInfo.classList.remove("job-info-fatal"); info.statsElements.jobInfo.classList.remove("job-info-aborted"); this.jobs.markUnfinished(ident); @@ -578,8 +634,17 @@ class JobsRenderer { } } + // Update pipeline in case a job is restarted on another pipline + const pipeline = info.statsElements.pipeline; + [pipeline.title, pipeline.textContent] = this.pipelineInfo(jobData); + // Update note info.jobNote.textContent = isBlank(jobData.note) ? "" : ` (${jobData.note})`; + if (isBlank(jobData.note)) { + info.jobUrl.removeAttribute("title"); + } else { + info.jobUrl.title = jobData.note; + } info.lineCountWindow += linesRendered; info.lineCountSegments[info.lineCountSegments.length - 1] += linesRendered; @@ -621,14 +686,20 @@ class JobsRenderer { } applyFilter() { - const query = this.filterBox.value; + const query = RegExp(this.filterBox.value); let matches = 0; const matchedWindows = []; const unmatchedWindows = []; this.firstFilterMatch = null; for (const job of this.jobs.sorted) { const w = this.renderInfo[job.ident].logWindow; - if (!RegExp(query).test(job.url)) { + const show = + (byId("filter-job-id").checked && query.test(job.ident)) || + (byId("filter-job-url").checked && query.test(job.url)) || + (byId("filter-job-note").checked && query.test(job.note)) || + (byId("filter-job-pipeline").checked && (query.test(job.pipeline_id) || query.test(this.pipelines[job.pipeline_id]))) || + (this.showNicks && byId("filter-job-nick").checked && query.test(job.started_by)); + if (!show) { w.classList.add("log-window-hidden"); unmatchedWindows.push(w); @@ -1020,7 +1091,18 @@ class Dashboard { const batchMaxItems = args.batchMaxItems ? Number(args.batchMaxItems) : 250; const showNicks = args.showNicks ? Boolean(Number(args.showNicks)) : false; const contextMenu = args.contextMenu ? Boolean(Number(args.contextMenu)) : true; - const initialFilter = args.initialFilter ?? "^$"; + this.initialFilter = args.initialFilter ?? "^$"; + const filterJobID = args.filterJobID ? Boolean(Number(args.filterJobID)) : true; + const filterJobURL = args.filterJobURL ? Boolean(Number(args.filterJobURL)) : true; + const filterJobNote = args.filterJobNote ? Boolean(Number(args.filterJobNote)) : true; + const filterJobPipe = args.filterJobPipe ? Boolean(Number(args.filterJobPipe)) : true; + const filterJobNick = args.filterJobNick ? Boolean(Number(args.filterJobNick)) : true; + const showAllHeaders = args.showAllHeaders ? Boolean(Number(args.showAllHeaders)) : true; + const showRunningJobs = args.showRunningJobs ? Boolean(Number(args.showRunningJobs)) : true; + const showFinishedJobs = args.showFinishedJobs ? Boolean(Number(args.showFinishedJobs)) : true; + const showFailedJobs = args.showFailedJobs ? Boolean(Number(args.showFailedJobs)) : true; + const showFatalJobs = args.showFatalJobs ? Boolean(Number(args.showFatalJobs)) : true; + const showAbortedJobs = args.showAbortedJobs ? Boolean(Number(args.showAbortedJobs)) : true; const loadRecent = args.loadRecent ? Boolean(Number(args.loadRecent)) : true; this.debug = args.debug ? Boolean(Number(args.debug)) : false; @@ -1030,6 +1112,10 @@ class Dashboard { } this.host = args.host ? args.host : location.hostname; + this.port = args.port ? `:${Number(args.port)}` : ''; + const wsproto = window.location.protocol === "https:" ? "wss:" : "ws:"; + this.websocketUrl = args.websocketUrl ?? `${wsproto}//${this.host}:4568/stream`; + this.dumpTraffic = args.dumpMax && Number(args.dumpMax) > 0; if (this.dumpTraffic) { this.dumpMax = Number(args.dumpMax); @@ -1056,6 +1142,8 @@ class Dashboard { this.contextMenuRenderer, ); + this.loadPipelines(); + document.onkeypress = (ev) => this.keyPress(ev); // Adjust help text based on URL @@ -1067,9 +1155,52 @@ class Dashboard { if (!showNicks) { addPageStyles(".job-nick-aligned { width: 0; }"); + } else { + byId("filter-types").lastChild.after( + h("input", { + type: "checkbox", + id: "filter-job-nick", + onclick: () => { ds.jobsRenderer.applyFilter(); }, + checked: true, + }) + ); + byId("filter-types").lastChild.after("\n\t\t\t"); + byId("filter-types").lastChild.after( + h("label", { className: "filter-job", htmlFor: "filter-job-nick", textContent: "Nick" }), + ); + byId("filter-types").lastChild.after(h("br")); + byId("filter-types").lastChild.after("\n"); + } + + byId("filter-job-id").checked = filterJobID; + byId("filter-job-url").checked = filterJobURL; + byId("filter-job-note").checked = filterJobNote; + byId("filter-job-pipeline").checked = filterJobPipe; + if (showNicks) { + byId("filter-job-nick").checked = filterJobNick; + } + + if (args.initialFilter != null) { + byId("set-filter-none").after( + h("input", { + className: "button", + type: "button", + id: "set-filter-initial", + onclick: () => { ds.setFilter(ds.initialFilter) }, + value: "Initial", + }) + ); + byId("set-filter-none").after("\n"); } + this.setFilter(this.initialFilter); - this.setFilter(initialFilter); + this.showAllHeaders(showAllHeaders); + + this.showRunningJobs(showRunningJobs); + this.showFinishedJobs(showFinishedJobs); + this.showFailedJobs(showFailedJobs); + this.showFatalJobs(showFatalJobs); + this.showAbortedJobs(showAbortedJobs); const finishSetup = () => { byId("meta-info").innerHTML = ""; @@ -1135,6 +1266,31 @@ ${String(kbPerSec).padStart(3, "0")} KB/s`; } } + loadPipelines() { + return new Promise((resolve, reject) => { + const xhr = new XMLHttpRequest(); + xhr.onload = () => { + try { + let pipelines = {}; + const json = JSON.parse(xhr.responseText).pipelines; + for (const pipeline of json) { + pipelines[pipeline.id] = pipeline.nickname; + } + this.jobsRenderer.updatePipelines(pipelines); + } catch (e) { + console.log("Failed to load /pipelines data: ", e); + } + resolve(); + }; + xhr.onerror = (ev) => { + reject(ev); + }; + xhr.open("GET", `//${this.host}${this.port}/pipelines`); + xhr.setRequestHeader("Accept", "application/json"); + xhr.send(""); + }); + } + loadRecent() { return new Promise((resolve, reject) => { byId("meta-info").textContent = "Requesting recent data"; @@ -1158,7 +1314,7 @@ ${String(kbPerSec).padStart(3, "0")} KB/s`; const size_mb = Math.round((100 * ev.total) / 1e6) / 100; byId("meta-info").textContent = `Recent data: ${percent}% (${size_mb}MB)`; }; - xhr.open("GET", `//${this.host}/logs/recent?cb=${Date.now()}${Math.random()}`); + xhr.open("GET", `//${this.host}${this.port}/logs/recent?cb=${Date.now()}${Math.random()}`); xhr.setRequestHeader("Accept", "application/json"); xhr.send(""); }); @@ -1191,8 +1347,28 @@ ${String(kbPerSec).padStart(3, "0")} KB/s`; ev.preventDefault(); byId("filter-box").focus(); byId("filter-box").select(); + } else if (ev.which === 105 /* i */) { + ds.setFilter(ds.initialFilter); } else if (ev.which === 118 /* v */) { window.open(this.jobsRenderer.firstFilterMatch.url); + } else if (ev.which === 104 /* h */) { + ds.showAllHeaders(!byId("show-all-headers").checked); + } else if (ev.which === 114 /* r */) { + ds.showRunningJobs(!byId("show-running-jobs").checked); + } else if (ev.which === 100 /* d */) { + ds.showFinishedJobs(!byId("show-finished-jobs").checked); + } else if (ev.which === 98 /* b */) { + ds.showFailedJobs(!byId("show-failed-jobs").checked); + } else if (ev.which === 99 /* c */) { + ds.showFatalJobs(!byId("show-fatal-jobs").checked); + } else if (ev.which === 115 /* s */) { + ds.showAbortedJobs(!byId("show-aborted-jobs").checked); + } else if (ev.which === 117 /* u */) { + byId("filter-job-url").click(); + } else if (ev.which === 101 /* e */) { + byId("filter-job-note").click(); + } else if (ev.which === 112 /* p */) { + byId("filter-job-pipeline").click(); } } @@ -1205,9 +1381,7 @@ ${String(kbPerSec).padStart(3, "0")} KB/s`; } connectWebSocket() { - const wsproto = window.location.protocol === "https:" ? "wss:" : "ws:"; - - this.ws = new WebSocket(`${wsproto}//${this.host}:4568/stream`); + this.ws = new WebSocket(this.websocketUrl); this.ws.onmessage = (ev) => { this.newItemsReceived += 1; @@ -1249,6 +1423,36 @@ ${String(kbPerSec).padStart(3, "0")} KB/s`; byId("filter-box").value = value; byId("filter-box").onchange(); } + + showAllHeaders(value) { + byId('show-all-headers').checked = value; + byId('hide-headers').sheet.disabled = value; + } + + showRunningJobs(value) { + byId('show-running-jobs').checked = value; + byId('hide-running').sheet.disabled = value; + } + + showFinishedJobs(value) { + byId('show-finished-jobs').checked = value; + byId('hide-done').sheet.disabled = value; + } + + showFailedJobs(value) { + byId('show-failed-jobs').checked = value; + byId('hide-failed').sheet.disabled = value; + } + + showFatalJobs(value) { + byId('show-fatal-jobs').checked = value; + byId('hide-fatal').sheet.disabled = value; + } + + showAbortedJobs(value) { + byId('show-aborted-jobs').checked = value; + byId('hide-aborted').sheet.disabled = value; + } } const ds = new Dashboard(); diff --git a/dashboard/dashboard.html b/dashboard/dashboard.html index a0304387..d98b4355 100644 --- a/dashboard/dashboard.html +++ b/dashboard/dashboard.html @@ -57,6 +57,11 @@ font-size: 18px; } +.drop-down { + display: inline flex; + flex-direction: column; +} + .padded-page { padding: 20px 27px 20px 27px; } @@ -128,6 +133,10 @@ color: #DD0000 !important; } +.job-info-failed { + color: #CC7676 !important; +} + .inline-stat { /* Needed for 'Align!' feature */ display: inline-block; @@ -391,6 +400,47 @@ top: 0; } + + + + + +
@@ -398,9 +448,38 @@ ArchiveBot tracking 0 crawls. See also pipeline or job reports. - Show: - - + + + + + + +
😊
@@ -418,16 +497,20 @@ This page shows all of the crawls that ArchiveBot is currently running.

- To show or hide a job, click anywhere on its stats line. + Each job has a header with the URL, note, request/queue stats, options and an identifier. +

+

+ To show or hide a job, click anywhere on its header. - The color coding for the job stats line is: + The color coding for the job header is: in progress, finished normally, + finished with failure, finished with abort, finished with fatal exception.

- Mouse over the job start date or the response count for additional information. + Mouse over the job URL, start date, response count or the queue count for additional information.

To pause scrolling, move your mouse inside a log window. @@ -439,12 +522,22 @@ Keyboard shortcuts:

@@ -461,12 +554,32 @@

- To use ArchiveBot, drop by #archivebot on hackint. Issue commands by typing them into the channel. You will need channel operator (@) or voice (+) status to issue archiving jobs; just ask for help or leave a message with the website you want to archive. + To use ArchiveBot, drop by #archivebot on hackint. Issue commands by typing them into the channel. You will need channel operator (@) or voice (+) status to issue archiving jobs; just ask for help or leave a message with the website you want to archive.

These ignore sets are available for crawls. The global ignore set automatically applies to all crawls. diff --git a/dashboard/dashboard3.html b/dashboard/dashboard3.html index a431ff01..8edc3352 100644 --- a/dashboard/dashboard3.html +++ b/dashboard/dashboard3.html @@ -156,7 +156,7 @@ If your adblocker is enabled for this domain, you will see slower performance, and some URLs will not be displayed.

- To use ArchiveBot, drop by #archivebot on hackint. Issue commands by typing them into the channel. You will need channel operator (@) or voice (+) status to issue archiving jobs; just ask for help or leave a message with the website you want to archive. + To use ArchiveBot, drop by #archivebot on hackint. Issue commands by typing them into the channel. You will need channel operator (@) or voice (+) status to issue archiving jobs; just ask for help or leave a message with the website you want to archive.

These ignore sets are available for crawls. The global ignore set automatically applies to all crawls. diff --git a/dashboard/finished.html b/dashboard/finished.html index 9b3ded20..7a7d3744 100644 --- a/dashboard/finished.html +++ b/dashboard/finished.html @@ -43,7 +43,6 @@