Skip to content

Commit 8bba891

Browse files
authored
Merge pull request #6 from utopia-php/feat-new-canonical-rules
Enhance email normalization providers by adding custom behavior for h…
2 parents aebfd79 + 00af9c7 commit 8bba891

File tree

18 files changed

+507
-417
lines changed

18 files changed

+507
-417
lines changed

composer.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"require": {
2828
"php": ">=8.0",
29-
"utopia-php/framework": "0.33.*",
29+
"utopia-php/validators": "^0.0.1",
3030
"utopia-php/cli": "^0.15",
3131
"utopia-php/domains": "^0.8",
3232
"utopia-php/fetch": "^0.4"

src/Emails/Canonicals/Provider.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ protected function removePlusAddressing(string $local): string
4848

4949
/**
5050
* Remove all dots from local part
51+
* Can be overridden by providers for custom behavior
5152
*/
5253
protected function removeDots(string $local): string
5354
{
@@ -62,6 +63,17 @@ protected function removeHyphens(string $local): string
6263
return str_replace('-', '', $local);
6364
}
6465

66+
/**
67+
* Remove hyphen-based subaddress (Yahoo style)
68+
* Removes everything after the last hyphen
69+
*/
70+
protected function removeHyphenSubaddress(string $local): string
71+
{
72+
$components = explode('-', $local);
73+
74+
return count($components) > 1 ? implode('-', array_slice($components, 0, -1)) : $components[0];
75+
}
76+
6577
/**
6678
* Convert local part to lowercase
6779
*/

src/Emails/Canonicals/Providers/Fastmail.php

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
* Fastmail
99
*
1010
* Handles Fastmail email normalization
11-
* - TODO: Plus addressing and dots removal commented out until manual confirmation
12-
* - Preserves dots and hyphens in local part
11+
* - Preserves all characters in local part (no subaddress or dot removal)
1312
* - Normalizes to fastmail.com domain
1413
*/
1514
class Fastmail extends Provider
@@ -28,17 +27,8 @@ public function getCanonical(string $local, string $domain): array
2827
// Convert to lowercase
2928
$normalizedLocal = $this->toLowerCase($local);
3029

31-
// TODO: Commented out until manual confirmation of Fastmail's plus addressing and dots support
32-
// Check if there's plus addressing
33-
// $hasPlus = strpos($normalizedLocal, '+') !== false && strpos($normalizedLocal, '+') > 0;
34-
35-
// Remove plus addressing (everything after +)
36-
// $normalizedLocal = $this->removePlusAddressing($normalizedLocal);
37-
38-
// Remove dots only if there was plus addressing (Fastmail treats dots as aliases only with plus)
39-
// if ($hasPlus) {
40-
// $normalizedLocal = $this->removeDots($normalizedLocal);
41-
// }
30+
// Fastmail doesn't remove subaddresses or dots
31+
// Just normalize case and domain
4232

4333
return [
4434
'local' => $normalizedLocal,

src/Emails/Canonicals/Providers/Generic.php

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
* Generic
99
*
1010
* Handles generic email normalization for unsupported providers
11-
* - TODO: Plus addressing, dots, and hyphens removal commented out until manual confirmation
12-
* - Preserves all other characters
11+
* - Preserves all characters in local part (no subaddress or dot removal)
12+
* - Only converts to lowercase
1313
*/
1414
class Generic extends Provider
1515
{
@@ -24,18 +24,8 @@ public function getCanonical(string $local, string $domain): array
2424
// Convert to lowercase
2525
$normalizedLocal = $this->toLowerCase($local);
2626

27-
// TODO: Commented out until manual confirmation of generic providers' plus addressing, dots, and hyphens support
28-
// Check if there's plus addressing
29-
// $hasPlus = strpos($normalizedLocal, '+') !== false && strpos($normalizedLocal, '+') > 0;
30-
31-
// Remove plus addressing (everything after +)
32-
// $normalizedLocal = $this->removePlusAddressing($normalizedLocal);
33-
34-
// Remove dots and hyphens only if there was plus addressing (generic providers treat these as aliases only with plus)
35-
// if ($hasPlus) {
36-
// $normalizedLocal = $this->removeDots($normalizedLocal);
37-
// $normalizedLocal = $this->removeHyphens($normalizedLocal);
38-
// }
27+
// Generic providers don't remove subaddresses or dots
28+
// Just normalize case
3929

4030
return [
4131
'local' => $normalizedLocal,

src/Emails/Canonicals/Providers/Gmail.php

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@
77
/**
88
* Gmail
99
*
10-
* Handles Gmail and Googlemail email normalization
10+
* Handles Gmail and Googlemail email normalization based on validator.js rules
1111
* - Removes all dots from local part
12-
* - Removes plus addressing
12+
* - Removes plus addressing (subaddress)
1313
* - Normalizes to gmail.com domain
14+
* - Converts googlemail.com to gmail.com
1415
*/
1516
class Gmail extends Provider
1617
{
@@ -28,11 +29,16 @@ public function getCanonical(string $local, string $domain): array
2829
// Convert to lowercase
2930
$normalizedLocal = $this->toLowerCase($local);
3031

31-
// Remove all dots from local part
32+
// Remove plus addressing (subaddress) - everything after +
33+
$normalizedLocal = $this->removePlusAddressing($normalizedLocal);
34+
35+
// Remove dots from local part
3236
$normalizedLocal = $this->removeDots($normalizedLocal);
3337

34-
// Remove plus addressing (everything after +)
35-
$normalizedLocal = $this->removePlusAddressing($normalizedLocal);
38+
// Ensure local part is not empty after normalization
39+
if (empty($normalizedLocal)) {
40+
throw new \InvalidArgumentException('Email local part cannot be empty after normalization');
41+
}
3642

3743
return [
3844
'local' => $normalizedLocal,

src/Emails/Canonicals/Providers/Icloud.php

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
/**
88
* iCloud
99
*
10-
* Handles Apple iCloud email normalization
11-
* - TODO: Plus addressing and dots removal commented out until manual confirmation
12-
* - Preserves dots and hyphens in local part
10+
* Handles Apple iCloud email normalization based on validator.js rules
11+
* - Removes plus addressing (subaddress)
12+
* - Preserves dots in local part
1313
* - Normalizes to icloud.com domain
1414
*/
1515
class Icloud extends Provider
@@ -28,17 +28,13 @@ public function getCanonical(string $local, string $domain): array
2828
// Convert to lowercase
2929
$normalizedLocal = $this->toLowerCase($local);
3030

31-
// TODO: Commented out until manual confirmation of iCloud's plus addressing and dots support
32-
// Check if there's plus addressing
33-
// $hasPlus = strpos($normalizedLocal, '+') !== false && strpos($normalizedLocal, '+') > 0;
31+
// Remove plus addressing (subaddress) - everything after +
32+
$normalizedLocal = $this->removePlusAddressing($normalizedLocal);
3433

35-
// Remove plus addressing (everything after +)
36-
// $normalizedLocal = $this->removePlusAddressing($normalizedLocal);
37-
38-
// Remove dots only if there was plus addressing (iCloud treats dots as aliases only with plus)
39-
// if ($hasPlus) {
40-
// $normalizedLocal = $this->removeDots($normalizedLocal);
41-
// }
34+
// Ensure local part is not empty after normalization
35+
if (empty($normalizedLocal)) {
36+
throw new \InvalidArgumentException('Email local part cannot be empty after normalization');
37+
}
4238

4339
return [
4440
'local' => $normalizedLocal,

src/Emails/Canonicals/Providers/Outlook.php

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,26 @@
77
/**
88
* Outlook
99
*
10-
* Handles Outlook, Hotmail, and Live email normalization
11-
* - TODO: Plus addressing removal commented out until manual confirmation
10+
* Handles Outlook, Hotmail, and Live email normalization based on validator.js rules
11+
* - Removes plus addressing (subaddress)
1212
* - Preserves dots in local part
1313
* - Normalizes to outlook.com domain
1414
*/
1515
class Outlook extends Provider
1616
{
1717
private const SUPPORTED_DOMAINS = [
18-
'outlook.com', 'hotmail.com', 'live.com',
19-
'outlook.co.uk', 'hotmail.co.uk', 'live.co.uk',
18+
'outlook.com', 'outlook.at', 'outlook.be', 'outlook.cl', 'outlook.co.il', 'outlook.co.nz', 'outlook.co.th', 'outlook.co.uk',
19+
'outlook.com.ar', 'outlook.com.au', 'outlook.com.br', 'outlook.com.gr', 'outlook.com.pe', 'outlook.com.tr', 'outlook.com.vn',
20+
'outlook.cz', 'outlook.de', 'outlook.dk', 'outlook.es', 'outlook.fr', 'outlook.hu', 'outlook.id', 'outlook.ie',
21+
'outlook.in', 'outlook.it', 'outlook.jp', 'outlook.kr', 'outlook.lv', 'outlook.my', 'outlook.ph', 'outlook.pt',
22+
'outlook.sa', 'outlook.sg', 'outlook.sk',
23+
'hotmail.com', 'hotmail.at', 'hotmail.be', 'hotmail.ca', 'hotmail.cl', 'hotmail.co.il', 'hotmail.co.nz', 'hotmail.co.th', 'hotmail.co.uk',
24+
'hotmail.com.ar', 'hotmail.com.au', 'hotmail.com.br', 'hotmail.com.gr', 'hotmail.com.mx', 'hotmail.com.pe', 'hotmail.com.tr', 'hotmail.com.vn',
25+
'hotmail.cz', 'hotmail.de', 'hotmail.dk', 'hotmail.es', 'hotmail.fr', 'hotmail.hu', 'hotmail.id', 'hotmail.ie',
26+
'hotmail.in', 'hotmail.it', 'hotmail.jp', 'hotmail.kr', 'hotmail.lv', 'hotmail.my', 'hotmail.ph', 'hotmail.pt',
27+
'hotmail.sa', 'hotmail.sg', 'hotmail.sk',
28+
'live.com', 'live.be', 'live.co.uk', 'live.com.ar', 'live.com.mx', 'live.de', 'live.es', 'live.eu', 'live.fr', 'live.it', 'live.nl',
29+
'msn.com', 'passport.com',
2030
];
2131

2232
private const CANONICAL_DOMAIN = 'outlook.com';
@@ -31,9 +41,13 @@ public function getCanonical(string $local, string $domain): array
3141
// Convert to lowercase
3242
$normalizedLocal = $this->toLowerCase($local);
3343

34-
// TODO: Commented out until manual confirmation of Outlook's plus addressing support
35-
// Remove plus addressing (everything after +)
36-
// $normalizedLocal = $this->removePlusAddressing($normalizedLocal);
44+
// Remove plus addressing (subaddress) - everything after +
45+
$normalizedLocal = $this->removePlusAddressing($normalizedLocal);
46+
47+
// Ensure local part is not empty after normalization
48+
if (empty($normalizedLocal)) {
49+
throw new \InvalidArgumentException('Email local part cannot be empty after normalization');
50+
}
3751

3852
return [
3953
'local' => $normalizedLocal,

src/Emails/Canonicals/Providers/Protonmail.php

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
* ProtonMail
99
*
1010
* Handles ProtonMail email normalization
11-
* - TODO: Plus addressing and dots removal commented out until manual confirmation
12-
* - Preserves dots and hyphens in local part
11+
* - Preserves all characters in local part (no subaddress or dot removal)
1312
* - Normalizes to protonmail.com domain
1413
*/
1514
class Protonmail extends Provider
@@ -28,17 +27,8 @@ public function getCanonical(string $local, string $domain): array
2827
// Convert to lowercase
2928
$normalizedLocal = $this->toLowerCase($local);
3029

31-
// TODO: Commented out until manual confirmation of ProtonMail's plus addressing and dots support
32-
// Check if there's plus addressing
33-
// $hasPlus = strpos($normalizedLocal, '+') !== false && strpos($normalizedLocal, '+') > 0;
34-
35-
// Remove plus addressing (everything after +)
36-
// $normalizedLocal = $this->removePlusAddressing($normalizedLocal);
37-
38-
// Remove dots only if there was plus addressing (ProtonMail treats dots as aliases only with plus)
39-
// if ($hasPlus) {
40-
// $normalizedLocal = $this->removeDots($normalizedLocal);
41-
// }
30+
// ProtonMail doesn't remove subaddresses or dots
31+
// Just normalize case and domain
4232

4333
return [
4434
'local' => $normalizedLocal,

src/Emails/Canonicals/Providers/Yahoo.php

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,15 @@
77
/**
88
* Yahoo
99
*
10-
* Handles Yahoo email normalization
11-
* - TODO: Plus addressing, dots, and hyphens removal commented out until manual confirmation
12-
* - Preserves dots and hyphens in local part
10+
* Handles Yahoo email normalization based on validator.js rules
11+
* - Removes hyphen-based subaddress (everything after last -)
12+
* - Preserves dots in local part
1313
* - Normalizes to yahoo.com domain
1414
*/
1515
class Yahoo extends Provider
1616
{
1717
private const SUPPORTED_DOMAINS = [
18-
'yahoo.com', 'yahoo.co.uk', 'yahoo.ca',
18+
'yahoo.com', 'yahoo.co.uk', 'yahoo.ca', 'yahoo.de', 'yahoo.fr', 'yahoo.in', 'yahoo.it',
1919
'ymail.com', 'rocketmail.com',
2020
];
2121

@@ -31,20 +31,13 @@ public function getCanonical(string $local, string $domain): array
3131
// Convert to lowercase
3232
$normalizedLocal = $this->toLowerCase($local);
3333

34-
// TODO: Commented out until manual confirmation of Yahoo's plus addressing, dots, and hyphens support
35-
// Check if there's plus addressing
36-
// $hasPlus = strpos($normalizedLocal, '+') !== false && strpos($normalizedLocal, '+') > 0;
34+
// Remove hyphen-based subaddress (everything after last -)
35+
$normalizedLocal = $this->removeHyphenSubaddress($normalizedLocal);
3736

38-
// Remove plus addressing (everything after +)
39-
// $normalizedLocal = $this->removePlusAddressing($normalizedLocal);
40-
41-
// Remove dots only if there was plus addressing (Yahoo treats dots as aliases only with plus)
42-
// if ($hasPlus) {
43-
// $normalizedLocal = $this->removeDots($normalizedLocal);
44-
// }
45-
46-
// Remove hyphens (Yahoo treats hyphens as aliases)
47-
// $normalizedLocal = $this->removeHyphens($normalizedLocal);
37+
// Ensure local part is not empty after normalization
38+
if (empty($normalizedLocal)) {
39+
throw new \InvalidArgumentException('Email local part cannot be empty after normalization');
40+
}
4841

4942
return [
5043
'local' => $normalizedLocal,
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
<?php
2+
3+
namespace Utopia\Emails\Canonicals\Providers;
4+
5+
use Utopia\Emails\Canonicals\Provider;
6+
7+
/**
8+
* Yandex
9+
*
10+
* Handles Yandex email normalization based on validator.js rules
11+
* - Preserves all characters in local part (no subaddress removal)
12+
* - Normalizes to yandex.ru domain
13+
*/
14+
class Yandex extends Provider
15+
{
16+
private const SUPPORTED_DOMAINS = [
17+
'yandex.ru', 'yandex.ua', 'yandex.kz', 'yandex.com', 'yandex.by', 'ya.ru',
18+
];
19+
20+
private const CANONICAL_DOMAIN = 'yandex.ru';
21+
22+
public function supports(string $domain): bool
23+
{
24+
return in_array($domain, self::SUPPORTED_DOMAINS, true);
25+
}
26+
27+
public function getCanonical(string $local, string $domain): array
28+
{
29+
// Convert to lowercase
30+
$normalizedLocal = $this->toLowerCase($local);
31+
32+
// Yandex doesn't remove subaddresses or dots
33+
// Just normalize case and domain
34+
35+
return [
36+
'local' => $normalizedLocal,
37+
'domain' => self::CANONICAL_DOMAIN,
38+
];
39+
}
40+
41+
public function getCanonicalDomain(): string
42+
{
43+
return self::CANONICAL_DOMAIN;
44+
}
45+
46+
public function getSupportedDomains(): array
47+
{
48+
return self::SUPPORTED_DOMAINS;
49+
}
50+
}

0 commit comments

Comments
 (0)