From ba250e72389bda19cd7e11817b4edd1516a99572 Mon Sep 17 00:00:00 2001 From: MrKampla Date: Mon, 12 May 2025 13:55:53 +0200 Subject: [PATCH 1/3] fix: allow specifying encoding when reading as Text --- README.md | 7 +- package.json | 2 +- src/helpers/encodings.ts | 341 +++++++++++++++++++++++++++++++++++++++ src/interfaces.ts | 9 +- src/useFilePicker.ts | 2 +- 5 files changed, 351 insertions(+), 10 deletions(-) create mode 100644 src/helpers/encodings.ts diff --git a/README.md b/README.md index f827080..1e7abd5 100644 --- a/README.md +++ b/README.md @@ -352,6 +352,7 @@ const Imperative = () => { | readFilesContent | Ignores files content and omits reading process if set to false | true | true, false | | validators | Add validation logic. You can use some of the [built-in validators](#built-in-validators) like FileAmountLimitValidator or create your own [custom validation](#custom-validation) logic | [] | [MyValidator, MySecondValidator] | | initializeWithCustomParameters | allows for customization of the input element that is created by the file picker. It accepts a function that takes in the input element as a parameter and can be used to set any desired attributes or styles on the element. | n/a | (input) => input.setAttribute("disabled", "") | +| encoding | Specifies the encoding to use when reading text files. Only applicable when readAs is set to "Text". Available options include all standard encodings. | "utf-8" | "latin1", "utf-8", "windows-1252" | | onFilesSelected | A callback function that is called when files are successfully selected. The function is passed an array of objects with information about each successfully selected file | n/a | (data) => console.log(data) | | onFilesSuccessfullySelected | A callback function that is called when files are successfully selected. The function is passed an array of objects with information about each successfully selected file | n/a | (data) => console.log(data) | | onFilesRejected | A callback function that is called when files are rejected due to validation errors or other issues. The function is passed an array of objects with information about each rejected file | n/a | (data) => console.log(data) | @@ -462,12 +463,6 @@ class CustomValidator extends Validator { - Github: [@MrKampla](https://github.com/MrKampla) - LinkedIn: [@https://www.linkedin.com/in/kamil-planer/](https://www.linkedin.com/in/kamil-planer/) -👤 **Adam Dobrzeniewski** - -- Twitter: [@twitter.com/xForsect](https://twitter.com/xForsect) -- Github: [@Forsect](https://github.com/Forsect) -- LinkedIn: [@https://www.linkedin.com/in/adam-dobrzeniewski](https://www.linkedin.com/in/adam-dobrzeniewski) - ## [](https://github.com/Jaaneek/useFilePicker#-contributing)🤝 Contributing Contributions, issues and feature requests are welcome! diff --git a/package.json b/package.json index 9a4273c..8c7b2fb 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "use-file-picker", "description": "Simple react hook to open browser file selector.", - "version": "2.1.2", + "version": "2.1.3", "license": "MIT", "author": "Milosz Jankiewicz", "homepage": "https://github.com/Jaaneek/useFilePicker", diff --git a/src/helpers/encodings.ts b/src/helpers/encodings.ts new file mode 100644 index 0000000..9be6aaa --- /dev/null +++ b/src/helpers/encodings.ts @@ -0,0 +1,341 @@ +export const ENCODINGS = [ + { + encodings: [ + { + labels: ['unicode-1-1-utf-8', 'unicode11utf8', 'unicode20utf8', 'utf-8', 'utf8', 'x-unicode20utf8'], + name: 'UTF-8', + }, + ], + heading: 'The Default Encoding', + }, + { + encodings: [ + { + labels: ['866', 'cp866', 'csibm866', 'ibm866'], + name: 'IBM866', + }, + { + labels: [ + 'csisolatin2', + 'iso-8859-2', + 'iso-ir-101', + 'iso8859-2', + 'iso88592', + 'iso_8859-2', + 'iso_8859-2:1987', + 'l2', + 'latin2', + ], + name: 'ISO-8859-2', + }, + { + labels: [ + 'csisolatin3', + 'iso-8859-3', + 'iso-ir-109', + 'iso8859-3', + 'iso88593', + 'iso_8859-3', + 'iso_8859-3:1988', + 'l3', + 'latin3', + ], + name: 'ISO-8859-3', + }, + { + labels: [ + 'csisolatin4', + 'iso-8859-4', + 'iso-ir-110', + 'iso8859-4', + 'iso88594', + 'iso_8859-4', + 'iso_8859-4:1988', + 'l4', + 'latin4', + ], + name: 'ISO-8859-4', + }, + { + labels: [ + 'csisolatincyrillic', + 'cyrillic', + 'iso-8859-5', + 'iso-ir-144', + 'iso8859-5', + 'iso88595', + 'iso_8859-5', + 'iso_8859-5:1988', + ], + name: 'ISO-8859-5', + }, + { + labels: [ + 'arabic', + 'asmo-708', + 'csiso88596e', + 'csiso88596i', + 'csisolatinarabic', + 'ecma-114', + 'iso-8859-6', + 'iso-8859-6-e', + 'iso-8859-6-i', + 'iso-ir-127', + 'iso8859-6', + 'iso88596', + 'iso_8859-6', + 'iso_8859-6:1987', + ], + name: 'ISO-8859-6', + }, + { + labels: [ + 'csisolatingreek', + 'ecma-118', + 'elot_928', + 'greek', + 'greek8', + 'iso-8859-7', + 'iso-ir-126', + 'iso8859-7', + 'iso88597', + 'iso_8859-7', + 'iso_8859-7:1987', + 'sun_eu_greek', + ], + name: 'ISO-8859-7', + }, + { + labels: [ + 'csiso88598e', + 'csisolatinhebrew', + 'hebrew', + 'iso-8859-8', + 'iso-8859-8-e', + 'iso-ir-138', + 'iso8859-8', + 'iso88598', + 'iso_8859-8', + 'iso_8859-8:1988', + 'visual', + ], + name: 'ISO-8859-8', + }, + { + labels: ['csiso88598i', 'iso-8859-8-i', 'logical'], + name: 'ISO-8859-8-I', + }, + { + labels: ['csisolatin6', 'iso-8859-10', 'iso-ir-157', 'iso8859-10', 'iso885910', 'l6', 'latin6'], + name: 'ISO-8859-10', + }, + { + labels: ['iso-8859-13', 'iso8859-13', 'iso885913'], + name: 'ISO-8859-13', + }, + { + labels: ['iso-8859-14', 'iso8859-14', 'iso885914'], + name: 'ISO-8859-14', + }, + { + labels: ['csisolatin9', 'iso-8859-15', 'iso8859-15', 'iso885915', 'iso_8859-15', 'l9'], + name: 'ISO-8859-15', + }, + { + labels: ['iso-8859-16'], + name: 'ISO-8859-16', + }, + { + labels: ['cskoi8r', 'koi', 'koi8', 'koi8-r', 'koi8_r'], + name: 'KOI8-R', + }, + { + labels: ['koi8-ru', 'koi8-u'], + name: 'KOI8-U', + }, + { + labels: ['csmacintosh', 'mac', 'macintosh', 'x-mac-roman'], + name: 'macintosh', + }, + { + labels: ['dos-874', 'iso-8859-11', 'iso8859-11', 'iso885911', 'tis-620', 'windows-874'], + name: 'windows-874', + }, + { + labels: ['cp1250', 'windows-1250', 'x-cp1250'], + name: 'windows-1250', + }, + { + labels: ['cp1251', 'windows-1251', 'x-cp1251'], + name: 'windows-1251', + }, + { + labels: [ + 'ansi_x3.4-1968', + 'ascii', + 'cp1252', + 'cp819', + 'csisolatin1', + 'ibm819', + 'iso-8859-1', + 'iso-ir-100', + 'iso8859-1', + 'iso88591', + 'iso_8859-1', + 'iso_8859-1:1987', + 'l1', + 'latin1', + 'us-ascii', + 'windows-1252', + 'x-cp1252', + ], + name: 'windows-1252', + }, + { + labels: ['cp1253', 'windows-1253', 'x-cp1253'], + name: 'windows-1253', + }, + { + labels: [ + 'cp1254', + 'csisolatin5', + 'iso-8859-9', + 'iso-ir-148', + 'iso8859-9', + 'iso88599', + 'iso_8859-9', + 'iso_8859-9:1989', + 'l5', + 'latin5', + 'windows-1254', + 'x-cp1254', + ], + name: 'windows-1254', + }, + { + labels: ['cp1255', 'windows-1255', 'x-cp1255'], + name: 'windows-1255', + }, + { + labels: ['cp1256', 'windows-1256', 'x-cp1256'], + name: 'windows-1256', + }, + { + labels: ['cp1257', 'windows-1257', 'x-cp1257'], + name: 'windows-1257', + }, + { + labels: ['cp1258', 'windows-1258', 'x-cp1258'], + name: 'windows-1258', + }, + { + labels: ['x-mac-cyrillic', 'x-mac-ukrainian'], + name: 'x-mac-cyrillic', + }, + ], + heading: 'Legacy single-byte encodings', + }, + { + encodings: [ + { + labels: [ + 'chinese', + 'csgb2312', + 'csiso58gb231280', + 'gb2312', + 'gb_2312', + 'gb_2312-80', + 'gbk', + 'iso-ir-58', + 'x-gbk', + ], + name: 'GBK', + }, + { + labels: ['gb18030'], + name: 'gb18030', + }, + ], + heading: 'Legacy multi-byte Chinese (simplified) encodings', + }, + { + encodings: [ + { + labels: ['big5', 'big5-hkscs', 'cn-big5', 'csbig5', 'x-x-big5'], + name: 'Big5', + }, + ], + heading: 'Legacy multi-byte Chinese (traditional) encodings', + }, + { + encodings: [ + { + labels: ['cseucpkdfmtjapanese', 'euc-jp', 'x-euc-jp'], + name: 'EUC-JP', + }, + { + labels: ['csiso2022jp', 'iso-2022-jp'], + name: 'ISO-2022-JP', + }, + { + labels: ['csshiftjis', 'ms932', 'ms_kanji', 'shift-jis', 'shift_jis', 'sjis', 'windows-31j', 'x-sjis'], + name: 'Shift_JIS', + }, + ], + heading: 'Legacy multi-byte Japanese encodings', + }, + { + encodings: [ + { + labels: [ + 'cseuckr', + 'csksc56011987', + 'euc-kr', + 'iso-ir-149', + 'korean', + 'ks_c_5601-1987', + 'ks_c_5601-1989', + 'ksc5601', + 'ksc_5601', + 'windows-949', + ], + name: 'EUC-KR', + }, + ], + heading: 'Legacy multi-byte Korean encodings', + }, + { + encodings: [ + { + labels: ['csiso2022kr', 'hz-gb-2312', 'iso-2022-cn', 'iso-2022-cn-ext', 'iso-2022-kr', 'replacement'], + name: 'replacement', + }, + { + labels: ['unicodefffe', 'utf-16be'], + name: 'UTF-16BE', + }, + { + labels: ['csunicode', 'iso-10646-ucs-2', 'ucs-2', 'unicode', 'unicodefeff', 'utf-16', 'utf-16le'], + name: 'UTF-16LE', + }, + ], + heading: 'Legacy miscellaneous encodings', + }, +] as const; + +type KnownEncoding = (typeof ENCODINGS)[number]['encodings'][number]['labels'][number]; + +/** + * Type that represents text encodings supported by the system. + * + * The encoding standards are organized into the following categories: + * + * - **The Default Encoding**: UTF-8 + * - **Legacy single-byte encodings**: IBM866, ISO-8859-2 through ISO-8859-16, KOI8-R, KOI8-U, macintosh, windows-874 through windows-1258, x-mac-cyrillic + * - **Legacy multi-byte Chinese (simplified) encodings**: GBK, gb18030 + * - **Legacy multi-byte Chinese (traditional) encodings**: Big5 + * - **Legacy multi-byte Japanese encodings**: EUC-JP, ISO-2022-JP, Shift_JIS + * - **Legacy multi-byte Korean encodings**: EUC-KR + * - **Legacy miscellaneous encodings**: replacement, UTF-16BE, UTF-16LE + */ +export type Encoding = KnownEncoding | (string & {}); // this is a TS hack to allow any string to be used as an encoding, apart from the known encodings diff --git a/src/interfaces.ts b/src/interfaces.ts index f667f99..b3bf19a 100644 --- a/src/interfaces.ts +++ b/src/interfaces.ts @@ -96,7 +96,6 @@ type UseFilePickerConfigCommon = { onClear?: () => void; initializeWithCustomParameters?: (inputElement: HTMLInputElement) => void; }; - type ReadFileContentConfig = | ({ readFilesContent?: true | undefined | never; @@ -107,7 +106,13 @@ type ReadFileContentConfig = onFilesSuccessfullySelected?: (data: SelectedFiles) => void; } | { - readAs?: Exclude; + readAs?: 'Text'; + encoding?: string; + onFilesSelected?: (data: SelectedFilesOrErrors) => void; + onFilesSuccessfullySelected?: (data: SelectedFiles) => void; + } + | { + readAs?: Exclude; onFilesSelected?: (data: SelectedFilesOrErrors) => void; onFilesSuccessfullySelected?: (data: SelectedFiles) => void; } diff --git a/src/useFilePicker.ts b/src/useFilePicker.ts index 1c37b6d..579903a 100644 --- a/src/useFilePicker.ts +++ b/src/useFilePicker.ts @@ -59,7 +59,7 @@ function useFilePicker< //availible reader methods: readAsText, readAsBinaryString, readAsArrayBuffer, readAsDataURL const readStrategy = reader[`readAs${readAs}` as ReaderMethod] as typeof reader.readAsText; - readStrategy.call(reader, file); + readStrategy.call(reader, file, props.readAs === 'Text' ? props.encoding : undefined); const addError = ({ ...others }: UseFilePickerError) => { reject({ ...others }); From 355da8cdc2dca2306c316abcdae3d58c0692ec2a Mon Sep 17 00:00:00 2001 From: MrKampla Date: Mon, 12 May 2025 14:01:04 +0200 Subject: [PATCH 2/3] fix: move the encoding type to interfaces file --- src/helpers/encodings.ts | 17 ----------------- src/interfaces.ts | 20 +++++++++++++++++++- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/helpers/encodings.ts b/src/helpers/encodings.ts index 9be6aaa..609f611 100644 --- a/src/helpers/encodings.ts +++ b/src/helpers/encodings.ts @@ -322,20 +322,3 @@ export const ENCODINGS = [ heading: 'Legacy miscellaneous encodings', }, ] as const; - -type KnownEncoding = (typeof ENCODINGS)[number]['encodings'][number]['labels'][number]; - -/** - * Type that represents text encodings supported by the system. - * - * The encoding standards are organized into the following categories: - * - * - **The Default Encoding**: UTF-8 - * - **Legacy single-byte encodings**: IBM866, ISO-8859-2 through ISO-8859-16, KOI8-R, KOI8-U, macintosh, windows-874 through windows-1258, x-mac-cyrillic - * - **Legacy multi-byte Chinese (simplified) encodings**: GBK, gb18030 - * - **Legacy multi-byte Chinese (traditional) encodings**: Big5 - * - **Legacy multi-byte Japanese encodings**: EUC-JP, ISO-2022-JP, Shift_JIS - * - **Legacy multi-byte Korean encodings**: EUC-KR - * - **Legacy miscellaneous encodings**: replacement, UTF-16BE, UTF-16LE - */ -export type Encoding = KnownEncoding | (string & {}); // this is a TS hack to allow any string to be used as an encoding, apart from the known encodings diff --git a/src/interfaces.ts b/src/interfaces.ts index b3bf19a..f3e3870 100644 --- a/src/interfaces.ts +++ b/src/interfaces.ts @@ -1,6 +1,7 @@ import { FileWithPath as FileWithPathFromSelector } from 'file-selector'; import { Validator } from './validators/validatorBase'; import { XOR } from 'ts-xor'; +import { ENCODINGS } from './helpers/encodings'; export type FileWithPath = FileWithPathFromSelector; @@ -88,6 +89,23 @@ export type SelectedFilesOrErrors = XOR< FileErrors >; +type KnownEncoding = (typeof ENCODINGS)[number]['encodings'][number]['labels'][number]; + +/** + * Type that represents text encodings supported by the system. + * + * The encoding standards are organized into the following categories: + * + * - **The Default Encoding**: UTF-8 + * - **Legacy single-byte encodings**: IBM866, ISO-8859-2 through ISO-8859-16, KOI8-R, KOI8-U, macintosh, windows-874 through windows-1258, x-mac-cyrillic + * - **Legacy multi-byte Chinese (simplified) encodings**: GBK, gb18030 + * - **Legacy multi-byte Chinese (traditional) encodings**: Big5 + * - **Legacy multi-byte Japanese encodings**: EUC-JP, ISO-2022-JP, Shift_JIS + * - **Legacy multi-byte Korean encodings**: EUC-KR + * - **Legacy miscellaneous encodings**: replacement, UTF-16BE, UTF-16LE + */ +export type Encoding = KnownEncoding | (string & {}); // this is a TS hack to allow any string to be used as an encoding, apart from the known encodings + type UseFilePickerConfigCommon = { multiple?: boolean; accept?: string | string[]; @@ -107,7 +125,7 @@ type ReadFileContentConfig = } | { readAs?: 'Text'; - encoding?: string; + encoding?: Encoding; onFilesSelected?: (data: SelectedFilesOrErrors) => void; onFilesSuccessfullySelected?: (data: SelectedFiles) => void; } From 33a32aa320fbd258f7e6316004d22e20970686c6 Mon Sep 17 00:00:00 2001 From: MrKampla Date: Thu, 29 May 2025 17:02:11 +0200 Subject: [PATCH 3/3] fix: make encodings import type only --- src/interfaces.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/interfaces.ts b/src/interfaces.ts index f3e3870..a1722b4 100644 --- a/src/interfaces.ts +++ b/src/interfaces.ts @@ -1,7 +1,7 @@ import { FileWithPath as FileWithPathFromSelector } from 'file-selector'; import { Validator } from './validators/validatorBase'; import { XOR } from 'ts-xor'; -import { ENCODINGS } from './helpers/encodings'; +import type { ENCODINGS } from './helpers/encodings'; export type FileWithPath = FileWithPathFromSelector; @@ -96,7 +96,7 @@ type KnownEncoding = (typeof ENCODINGS)[number]['encodings'][number]['labels'][n * * The encoding standards are organized into the following categories: * - * - **The Default Encoding**: UTF-8 + * - **The Default Encoding by W3C File API specification**: UTF-8 * - **Legacy single-byte encodings**: IBM866, ISO-8859-2 through ISO-8859-16, KOI8-R, KOI8-U, macintosh, windows-874 through windows-1258, x-mac-cyrillic * - **Legacy multi-byte Chinese (simplified) encodings**: GBK, gb18030 * - **Legacy multi-byte Chinese (traditional) encodings**: Big5