|
1 | | -import fetch from "node-fetch"; |
2 | | -import cheerio, { Cheerio, CheerioAPI, Node } from "cheerio"; |
3 | | -import { Format, output, Result } from "./utils"; |
| 1 | +import fetch from 'node-fetch' |
| 2 | +import cheerio, { Cheerio, CheerioAPI, Node } from 'cheerio' |
| 3 | +import { Format, output, Result } from './utils' |
4 | 4 |
|
5 | 5 | interface Options { |
6 | | - url?: string; |
7 | | - html?: string; |
8 | | - selector?: string; |
9 | | - hSelector?: string; |
10 | | - bSelector?: string[]; |
11 | | - format?: Format; |
| 6 | + url?: string |
| 7 | + html?: string |
| 8 | + selector?: string |
| 9 | + hSelector?: string |
| 10 | + bSelector?: [string, string] |
| 11 | + format?: Format |
12 | 12 | } |
13 | 13 |
|
14 | 14 | /** |
15 | 15 | * Get JSON, Object, Array from html tables |
16 | 16 | * |
17 | 17 | * @param options Options |
18 | 18 | */ |
19 | | -function jsonFromTable<T extends Format = "object">( |
| 19 | +function jsonFromTable<T extends Format = 'object'>( |
20 | 20 | options: { url: string; format?: T } & Options |
21 | | -): Promise<Result<T>>; |
| 21 | +): Promise<Result<T>> |
22 | 22 |
|
23 | | -function jsonFromTable<T extends Format = "object">( |
| 23 | +function jsonFromTable<T extends Format = 'object'>( |
24 | 24 | options: { html: string; format?: T } & Options |
25 | | -): Result<T>; |
| 25 | +): Result<T> |
26 | 26 |
|
27 | 27 | function jsonFromTable<T extends Format>(options: Options = {}) { |
28 | 28 | const { |
29 | 29 | html, |
30 | 30 | url, |
31 | | - selector = "table", |
32 | | - hSelector = "tr:first-child th", |
33 | | - bSelector = ["tr:not(:first-child)", "td"], |
34 | | - format = "object", |
35 | | - } = options; |
| 31 | + selector = 'table', |
| 32 | + hSelector = 'tr:first-child th', |
| 33 | + bSelector = ['tr:not(:first-child)', 'td'], |
| 34 | + format = 'object', |
| 35 | + } = options |
36 | 36 | // prettier-ignore |
37 | 37 | const hSelectors = [hSelector, "thead tr:first-child th", "tr:first-child th", "tr:first-child td"]; |
38 | 38 | // prettier-ignore |
39 | 39 | const bSelectors = [bSelector, ["tbody tr", "td"], ["tr:not(:first-child)", "td"], ["tr", "td"]]; |
40 | 40 |
|
41 | 41 | if (html) { |
42 | | - return htmlTableToJson(html); |
| 42 | + return htmlTableToJson(html) |
43 | 43 | } else if (url) { |
44 | 44 | return fetch(url).then(async (res) => { |
45 | | - const html = await res.text(); |
46 | | - return htmlTableToJson(html); |
47 | | - }); |
| 45 | + const html = await res.text() |
| 46 | + return htmlTableToJson(html) |
| 47 | + }) |
48 | 48 | } else { |
49 | | - throw new Error(`You need to provide at least a url or html`); |
| 49 | + throw new Error(`You need to provide at least a url or html`) |
50 | 50 | } |
51 | 51 |
|
52 | 52 | function htmlTableToJson(html: string) { |
53 | | - const $ = cheerio.load(html); |
| 53 | + const $ = cheerio.load(html) |
54 | 54 |
|
55 | | - const table = $(selector); |
| 55 | + const table = $(selector) |
56 | 56 |
|
57 | 57 | if (table.html() === null) |
58 | | - throw new Error(`Couldn't find table with selector "${selector}"`); |
| 58 | + throw new Error(`Couldn't find table with selector "${selector}"`) |
59 | 59 |
|
60 | | - let headers = getHeaders($, table, hSelectors); |
61 | | - let body = getBody($, table, bSelectors); |
| 60 | + const headers = getHeaders($, table, hSelectors) |
| 61 | + const body = getBody($, table, bSelectors) |
62 | 62 |
|
63 | 63 | if (headers.values.length !== body.values.length) { |
64 | 64 | console.warn( |
65 | 65 | `Length of body and head is not same:\nHeader: ${headers.values.length}\nBody: ${body.values.length}` |
66 | | - ); |
| 66 | + ) |
67 | 67 | } |
68 | 68 |
|
69 | | - return output(headers, body, format) as Result<T>; |
| 69 | + return output(headers, body, format) as Result<T> |
70 | 70 | } |
71 | 71 | } |
72 | 72 |
|
73 | 73 | function getHeaders($: CheerioAPI, table: Cheerio<Node>, selectors: string[]) { |
74 | 74 | for (const selector of selectors) { |
75 | | - const list = $(selector, table.html()); |
| 75 | + const list = $(selector, table.html()) |
76 | 76 |
|
77 | 77 | if (list.html() !== null) { |
78 | | - const values = list.toArray().map((v) => $(v).text().trim()); |
79 | | - return values; |
| 78 | + const values = list.toArray().map((v) => $(v).text().trim()) |
| 79 | + return values |
80 | 80 | } |
81 | 81 | } |
82 | 82 |
|
83 | | - return []; |
| 83 | + return [] |
84 | 84 | } |
85 | 85 |
|
86 | 86 | function getBody($: CheerioAPI, table: Cheerio<Node>, selectors: string[][]) { |
87 | 87 | for (const selector of selectors) { |
88 | | - const rows = $(selector[0], table.html()).toArray(); |
| 88 | + const rows = $(selector[0], table.html()).toArray() |
89 | 89 |
|
90 | 90 | if (rows.length > 0) { |
91 | | - let values: any[] = []; |
| 91 | + const values: string[][] = [] |
92 | 92 |
|
93 | 93 | for (const row of rows) { |
94 | 94 | const tds = $(selector[1], $(row).html()) |
95 | 95 | .toArray() |
96 | | - .map((v) => $(v).text()); |
| 96 | + .map((v) => $(v).text()) |
97 | 97 |
|
98 | | - values.push(tds); |
| 98 | + values.push(tds) |
99 | 99 | } |
100 | 100 |
|
101 | | - return values; |
| 101 | + return values |
102 | 102 | } |
103 | 103 | } |
104 | 104 |
|
105 | | - return []; |
| 105 | + return [] |
106 | 106 | } |
107 | 107 |
|
108 | | -export { jsonFromTable, Format, Result, Options, output }; |
| 108 | +export { jsonFromTable, Format, Result, Options, output } |
0 commit comments