From f407cfc43a0af68f339d228cf07c71eaaa0cea10 Mon Sep 17 00:00:00 2001 From: Adro Morelos Date: Fri, 1 May 2026 12:44:24 -0500 Subject: [PATCH 1/7] fix(host): replace regex suffix removal with string operations `Host::tld()` built a PCRE pattern from `'.' . $tld` after escaping only dots. Every other PCRE metacharacter (`* + ? | ^ $ ( ) [ ] { } \`) passed through unescaped, and `$tld` is reachable from a public setter, so malicious or surprising input could change the regex shape, trigger PCRE compile errors, or open a small ReDoS surface. The regex was unnecessary: `$tld` is the rightmost suffix of `$host`, so `str_ends_with` + `substr` removes it cleanly without building any pattern. Behavior for every existing test case is unchanged. --- src/Host/Host.php | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/Host/Host.php b/src/Host/Host.php index 5ba320d..83d1348 100644 --- a/src/Host/Host.php +++ b/src/Host/Host.php @@ -38,10 +38,17 @@ public function tld(?string $tld = null): string|self $this->tld = $tld; - $escaped = str_replace('.', '\.', '.' . strval($this->tld)); - - /** @var string $root */ - $root = preg_replace("/$escaped$/", '', strval($this->host)); + $tldValue = strval($this->tld); + $hostValue = strval($this->host); + $suffix = '.' . $tldValue; + + if ($hostValue === $tldValue) { + $root = ''; + } elseif (str_ends_with($hostValue, $suffix)) { + $root = substr($hostValue, 0, -strlen($suffix)); + } else { + $root = $hostValue; + } if (!validate_domain_root(trim($root, '.'))) { $this->isValid = false; From 1e774d6c97c37b9608984e1764de13ab8e2cbbb1 Mon Sep 17 00:00:00 2001 From: Adro Morelos Date: Fri, 1 May 2026 12:44:52 -0500 Subject: [PATCH 2/7] fix(parser): use str_starts_with for scheme detection `strpos($host, 'http://') !== false` matches the literal anywhere in the string, not at position 0, so an input like `evil.example.com/?u=http://x` was treated as already-schemed and the `http://` prefix was not prepended before `parse_url`. The intent was "does the input start with a scheme?", so use `str_starts_with`. --- src/Parse/HostParser.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parse/HostParser.php b/src/Parse/HostParser.php index ac92f82..281b57d 100644 --- a/src/Parse/HostParser.php +++ b/src/Parse/HostParser.php @@ -13,8 +13,8 @@ class HostParser */ public static function parse(string $host): array { - $isSchemePresent = strpos($host, 'https://') !== false || - strpos($host, 'http://') !== false; + $isSchemePresent = str_starts_with($host, 'https://') || + str_starts_with($host, 'http://'); if (!$isSchemePresent) { $host = 'http://' . $host; From 1e5f0bda0254ee694dbbe3ed67515e0b36724796 Mon Sep 17 00:00:00 2001 From: Adro Morelos Date: Fri, 1 May 2026 12:46:10 -0500 Subject: [PATCH 3/7] test: add regression tests for security fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - `Host::tld()` accepts regex metacharacters without crashing. - `HostParser` only treats a scheme as present when the input *starts* with `http://`/`https://`, not when those literals appear elsewhere in a URL's path/query. - `HostParser` still rejects input without an extractable host. - Sanity-check the substring-TLD case (`a.b.c.compass.com`) survives the regex→string-op swap in `Host::tld()`. --- tests/Unit/SecurityTest.php | 55 +++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 tests/Unit/SecurityTest.php diff --git a/tests/Unit/SecurityTest.php b/tests/Unit/SecurityTest.php new file mode 100644 index 0000000..92989d0 --- /dev/null +++ b/tests/Unit/SecurityTest.php @@ -0,0 +1,55 @@ +validate('example.com'); + + it('accepts a TLD containing PCRE metacharacters without raising', function () use ($host) { + $result = $host->tld('(*+?['); + + expect($result)->toBeInstanceOf(Host::class); + }); + + it('still allows downstream inspection after the unusual setter call', function () use ($host) { + $host->tld('(*+?['); + + expect($host->toString())->toBe('example.com'); + }); +}); + +describe('HostParser scheme detection only triggers on a leading scheme', function () { + $validator = getInstance(); + $url = 'evil.example.com/path?redirect=http://attacker.example'; + $host = $validator->validate($url); + + it('extracts the host as evil.example.com regardless of `http://` later in the URL', function () use ($host) { + expect($host->toString())->toBe('evil.example.com'); + }); + + it('still resolves a com TLD for the leading host', function () use ($host) { + expect($host->tld())->toBe('com'); + }); + + it('still resolves the domain as example.com', function () use ($host) { + expect($host->domain())->toBe('example.com'); + }); +}); + +describe('HostParser still rejects unparseable input', function () { + $validator = getInstance(); + + it('throws InvalidArgumentException when the input has no extractable host', function () use ($validator) { + expect(fn () => $validator->validate('http://'))->toThrow(InvalidArgumentException::class); + }); +}); + +describe('Regression: substring TLD match still resolves the right root', function () { + $validator = getInstance(); + $host = $validator->validate('a.b.c.compass.com'); + + it('keeps compass.com as the domain after the regex-to-string-op refactor', function () use ($host) { + expect($host->domain())->toBe('compass.com'); + }); +}); From 49aab3b5ddae9d1690b7eb0c19f4d54ee6685e63 Mon Sep 17 00:00:00 2001 From: Adro Morelos Date: Fri, 1 May 2026 12:47:13 -0500 Subject: [PATCH 4/7] docs(usage): add full usage guide and API reference Standalone guide covering installation, secure PSL fetch, caching, worked examples, full API surface, error-handling model, and security notes. README will be slimmed in a follow-up commit and link here. --- docs/USAGE.md | 301 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 301 insertions(+) create mode 100644 docs/USAGE.md diff --git a/docs/USAGE.md b/docs/USAGE.md new file mode 100644 index 0000000..68e037f --- /dev/null +++ b/docs/USAGE.md @@ -0,0 +1,301 @@ +# Doma(in)Validity — Usage Guide + +A complete walkthrough of how to install, configure, and use +`domainvalidity/php-domain-validator` (v3.0.x). + +--- + +## Contents + +- [Installation](#installation) +- [Quick start](#quick-start) +- [Working with the Public Suffix List](#working-with-the-public-suffix-list) + - [Fetching it securely](#fetching-it-securely) + - [Caching strategy](#caching-strategy) +- [API reference](#api-reference) + - [`Factory`](#factory) + - [`Validator`](#validator) + - [`Host`](#host) +- [Worked examples](#worked-examples) +- [Error handling](#error-handling) +- [Security notes](#security-notes) + +--- + +## Installation + +Requires **PHP 8.2 or newer** (v3.x). Install via Composer: + +```bash +composer require domainvalidity/php-domain-validator +``` + +The package has zero runtime dependencies. + +--- + +## Quick start + +```php +use DomainValidity\Factory; + +// Pass the raw contents of the Public Suffix List (PSL). +$psl = file_get_contents(__DIR__ . '/storage/public_suffix_list.dat'); +$validator = Factory::make($psl); + +$host = $validator->validate('https://www.example.co.uk/path?x=1'); + +$host->isValid(); // bool — true if a known TLD was matched +$host->tld(); // 'co.uk' +$host->domain(); // 'example.co.uk' +$host->toString(); // 'www.example.co.uk' +$host->isPrivate(); // false (true for entries in the PSL "PRIVATE DOMAINS" section) +``` + +`validate()` accepts plain hostnames *or* full URLs. The scheme, +userinfo, port, path, query, and fragment are all stripped — only the +host portion is validated. + +--- + +## Working with the Public Suffix List + +This package ships **no PSL data of its own** at runtime. You provide +the raw `public_suffix_list.dat` contents to `Factory::make()`. The +canonical source is . + +### Fetching it securely + +The README shows a one-liner for brevity; in production you should +fetch the PSL through a properly-configured HTTPS context, with +timeouts, error handling, and an explicit cache: + +```php +$cachePath = __DIR__ . '/storage/public_suffix_list.dat'; +$cacheAge = is_file($cachePath) ? time() - filemtime($cachePath) : PHP_INT_MAX; + +if ($cacheAge > 86_400) { + $context = stream_context_create([ + 'http' => [ + 'timeout' => 10, + 'follow_location' => 1, + 'header' => "User-Agent: my-app/1.0\r\n", + ], + 'ssl' => [ + 'verify_peer' => true, + 'verify_peer_name' => true, + ], + ]); + + $contents = @file_get_contents( + 'https://publicsuffix.org/list/public_suffix_list.dat', + false, + $context + ); + + if ($contents !== false) { + file_put_contents($cachePath, $contents, LOCK_EX); + } +} + +$psl = file_get_contents($cachePath); +if ($psl === false || $psl === '') { + throw new RuntimeException('Public Suffix List unavailable.'); +} + +$validator = \DomainValidity\Factory::make($psl); +``` + +For long-running services, prefer a real HTTP client (Guzzle, Symfony +HttpClient) and a job/cron that refreshes the PSL daily. + +### Caching strategy + +The PSL is updated a few times per week, so refreshing it more than +once per day is wasteful. Two practical patterns: + +1. **Filesystem cache** (above) — refresh on read, gated by file + `mtime`. Good for small apps and CLI tools. +2. **Job-driven cache** — a daily cron writes the PSL to disk or a + shared store; application processes read from there only. Good for + web apps where you don't want a request to ever block on an external + download. + +You can also keep the parsed `Validator` in memory (e.g. a singleton) +to avoid re-parsing the PSL on every request: + +```php +final class ValidatorRegistry +{ + private static ?\DomainValidity\Validator $instance = null; + + public static function get(): \DomainValidity\Validator + { + return self::$instance ??= \DomainValidity\Factory::make( + (string) file_get_contents(__DIR__ . '/storage/public_suffix_list.dat') + ); + } +} +``` + +--- + +## API reference + +### `Factory` + +```php +namespace DomainValidity; + +final class Factory +{ + public static function make(string $dotDatContent): Validator; +} +``` + +- **`$dotDatContent`** — the raw bytes of `public_suffix_list.dat`. +- **Returns** — a configured `Validator`. +- **Throws** — nothing directly. Malformed PSL content yields a + `Validator` whose internal lookup tables are empty; `validate()` will + return `Host` instances flagged invalid. + +### `Validator` + +```php +namespace DomainValidity; + +class Validator +{ + public function validate(string $host): \DomainValidity\Host\Host; +} +``` + +- **`$host`** — a hostname (`example.com`), a host with port, or a full + URL with scheme/path/query. +- **Returns** — a `Host` instance describing the parsed input. +- **Throws** — `\InvalidArgumentException` if the input cannot be parsed + as a URL/host (e.g. an empty string or `http://`). + +### `Host` + +```php +namespace DomainValidity\Host; + +class Host +{ + public string $original; // exactly what was passed in + public ?string $host; // extracted host portion (lowercase ASCII) + public ?string $domain; // root + TLD (e.g. example.co.uk) + public ?string $tld; // matched public suffix (e.g. co.uk) + public ?bool $isPrivate; // true for PSL "PRIVATE DOMAINS" matches + + public function original(?string $value = null): string|self; + public function tld(?string $value = null): string|self; + public function domain(?string $value = null): string|self; + public function isValid(): bool; + public function isPrivate(?bool $value = null): bool|self; + public function toString(): string; // alias of (string) $host + public function toArray(): array; // ['valid','original','host','domain','tld','private'] + public function __toString(): string; +} +``` + +The accessor/setter methods follow a fluent pattern: call with no +argument to read, call with an argument to mutate and chain. + +`isValid()` returns `true` if a TLD from the PSL was matched **and** +the remaining root passes a basic charset check +(`/^[a-zA-Z0-9.-]+$/`). + +`isPrivate()` is `true` for hosts that fall under a private suffix +(e.g. `*.amazonaws.com`, `*.github.io`). + +--- + +## Worked examples + +### Standard domain + +```php +$h = $v->validate('https://www.example.com/'); +$h->tld(); // 'com' +$h->domain(); // 'example.com' +$h->toString(); // 'www.example.com' +$h->isValid(); // true +``` + +### Multi-level TLD + +```php +$h = $v->validate('https://www.example.co.uk/'); +$h->tld(); // 'co.uk' +$h->domain(); // 'example.co.uk' +``` + +### Private suffix + +```php +$h = $v->validate('d-abc123.execute-api.us-west-1.amazonaws.com'); +$h->tld(); // 'com' +$h->domain(); // 'amazonaws.com' +$h->isPrivate(); // true +``` + +### Invalid TLD + +```php +$h = $v->validate('https://adro.is.a.rocker.and/'); +$h->isValid(); // false +$h->tld(); // '' +$h->domain(); // '' +``` + +### URL with query/path containing `http://` + +```php +$h = $v->validate('evil.example.com/redirect?to=http://attacker.example'); +$h->toString(); // 'evil.example.com' +$h->domain(); // 'example.com' +``` + +--- + +## Error handling + +`validate()` follows a "return a result object, throw only on +unparseable input" model: + +| Input shape | Behavior | +| ---------------------------------------- | ---------------------------------------------------- | +| Valid hostname or URL | Returns `Host` with `isValid()` reflecting the TLD match | +| Hostname with no matching public suffix | Returns `Host` with `isValid() === false` | +| Empty string, `http://`, malformed input | Throws `\InvalidArgumentException` (from `HostParser`) | + +Defensive callers should wrap the call: + +```php +try { + $host = $validator->validate($userInput); +} catch (\InvalidArgumentException $e) { + // unparseable input — log, reject, etc. + return null; +} + +if (!$host->isValid()) { + // parsed but no public suffix matched + return null; +} +``` + +--- + +## Security notes + +- The validator does **not** perform DNS resolution. A `valid` result + means the hostname is well-formed and uses a known public suffix — + not that the domain is registered, reachable, or trustworthy. +- Always refresh the Public Suffix List (we recommend daily). Stale + PSL data leads to false negatives (new TLDs not recognized) and + false positives (deprecated entries treated as live). +- See [`SECURITY.md`](../SECURITY.md) for the package's supported + versions and how to report vulnerabilities responsibly. From 13b23b11fb12a74eb970f14b4ea7bd089dd3890d Mon Sep 17 00:00:00 2001 From: Adro Morelos Date: Fri, 1 May 2026 12:47:35 -0500 Subject: [PATCH 5/7] docs(readme): slim README and link to docs/USAGE.md Keep README as a landing page: tagline, requirements, installation, 10-line quick start, and links to USAGE/SECURITY/CHANGELOG. The full usage guide now lives in docs/USAGE.md. --- README.md | 49 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 9f1ea43..c860486 100644 --- a/README.md +++ b/README.md @@ -2,35 +2,58 @@

-# Doma(in)Validity PHP package. +# Doma(in)Validity PHP package -Light PHP package to validate domains. +Light PHP package to validate domains using the Mozilla +[Public Suffix List](https://publicsuffix.org/). -[Doma(in)Validity](https://api.domainvalidity.dev/) was born because I found myself searching online about how to check if a domain was valid. I always ended up using regular expressions that were too complex to account for several scenarios (mainly the TLD having different formats), it was just a pain in the butt because I always had to go back to that code to fix the regex to account for an edge case that I didn't think about. +[Doma(in)Validity](https://api.domainvalidity.dev/) was born because the +usual "validate a domain" regex grows new edge cases every time you +look away — multi-level TLDs (`co.uk`, `com.mx`), IDN labels, private +suffixes (`*.amazonaws.com`). This package outsources the hard part to +the PSL. -### Requirements +## Requirements -- PHP >= 8.2.0 (for v3.x) -- PHP >= 8.1.0 (for v2.x) +- PHP **>= 8.2.0** (for v3.x) +- PHP **>= 8.1.0** (for v2.x) ## Installation -You can install the package via composer: - ```bash composer require domainvalidity/php-domain-validator ``` -## Usage +## Quick start ```php use DomainValidity\Factory; -$contents = file_get_contents('https://publicsuffix.org/list/public_suffix_list.dat'); - -$validator = Factory::make($contents); +$psl = file_get_contents('path/to/public_suffix_list.dat'); +$validator = Factory::make($psl); $host = $validator->validate('www.domainvalidity.dev'); + +$host->isValid(); // true +$host->tld(); // 'dev' +$host->domain(); // 'domainvalidity.dev' +$host->toString(); // 'www.domainvalidity.dev' ``` -> **Note:** You should cache the contents of the public suffix list and download them no more than once per day, as it is not updated more than a few times per week; more frequent downloading is pointless. \ No newline at end of file +> Cache the Public Suffix List and refresh at most once per day. It is +> updated only a few times per week, so more frequent fetching is +> wasteful. See the usage guide below for a secure, cached fetch +> pattern. + +## Documentation + +- **[`docs/USAGE.md`](docs/USAGE.md)** — full usage guide, secure PSL + fetching, caching strategy, complete API reference, worked examples, + and error-handling model. +- **[`SECURITY.md`](SECURITY.md)** — supported versions and how to + report vulnerabilities. +- **[`CHANGELOG.md`](CHANGELOG.md)** — release history. + +## License + +MIT — see [`LICENSE`](LICENSE). From 41a53263db1f7535c41dc8a283fec344f01b69a7 Mon Sep 17 00:00:00 2001 From: Adro Morelos Date: Fri, 1 May 2026 12:48:16 -0500 Subject: [PATCH 6/7] docs(security): document hardening fix and deprecate v3.0.0 - CHANGELOG: add 3.0.1 entry with Security/Deprecated/Docs subsections. - SECURITY: mark v3.0.0 as deprecated, add a "Known insecure versions" subsection pointing v3.0.0 users at v3.0.1. No exploit detail is published. See the CHANGELOG for the high-level description of the two fixes. --- CHANGELOG.md | 33 +++++++++++++++++++++++++++++++++ SECURITY.md | 13 ++++++++++++- 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9704d1a..9116ba7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,39 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [3.0.1] - 2026-05-01 + +### Security + +- `Host::tld()` no longer builds a PCRE pattern from a publicly-settable + value. The previous implementation only escaped `.` characters, so any + other PCRE metacharacter in the supplied TLD (e.g. `* + ? | ^ $ ( ) + [ ] { } \`) was inserted into a dynamic regex unescaped, allowing + unintended match behavior, PCRE compile errors, and a small ReDoS + surface. The suffix is now removed with `str_ends_with` + `substr` — + no regex, nothing to escape. +- `HostParser` now uses `str_starts_with` for scheme detection. The + previous `strpos(...) !== false` check matched the literals + `http://` / `https://` anywhere in the input, so URLs containing + those substrings in their path or query (e.g. + `evil.example.com/?u=http://x`) were mis-classified as + already-schemed and skipped the `http://` prefix needed by + `parse_url`. + +### Deprecated + +- **v3.0.0 is deprecated** due to the issues above. All consumers should + upgrade to **v3.0.1**. See `SECURITY.md` for the supported-versions + matrix. + +### Docs + +- New `docs/USAGE.md` — full usage guide, secure PSL fetch pattern, + caching strategy, complete API reference, worked examples, error + handling. +- README slimmed down to a landing page that links to the usage guide, + security policy, and changelog. + ## [3.0.0] - 2026-01-31 ### Breaking Changes diff --git a/SECURITY.md b/SECURITY.md index f166e90..7f04202 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -6,10 +6,21 @@ | Version | Supported | | ------- | ------------------ | -| 3.x | :white_check_mark: | +| 3.0.1+ | :white_check_mark: | +| 3.0.0 | :x: (deprecated — see below) | | 2.x | :white_check_mark: | | 1.x | :x: | +## Known insecure versions + +- **v3.0.0** — contained two defects in the host/TLD parsing path: + a regex-construction issue in `Host::tld()` that escaped only `.` + characters, and a substring-match scheme detection in `HostParser` + that mis-classified URLs containing `http://` later in their + path/query. Both are fixed in **v3.0.1**. We recommend upgrading + immediately. No exploit details are published; see `CHANGELOG.md` + for the high-level description. + ## Reporting a Vulnerability If you discover a security vulnerability within this package, please send an email to Alejandro Morelos at info@domainvalidity.dev. All security vulnerabilities will be promptly addressed. \ No newline at end of file From ca4ce2524fdf278592b6f09f5b13c31333601f23 Mon Sep 17 00:00:00 2001 From: Adro Morelos Date: Fri, 1 May 2026 12:48:29 -0500 Subject: [PATCH 7/7] chore(release): prepare v3.0.1 release Bump composer.json to 3.0.1. Final commit before tagging. --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 303f730..2ec7f9c 100644 --- a/composer.json +++ b/composer.json @@ -1,6 +1,6 @@ { "name": "domainvalidity/php-domain-validator", - "version": "3.0.0", + "version": "3.0.1", "description": "Light PHP package to validate domains.", "type": "library", "license": "MIT",