Skip to content

Commit e5da180

Browse files
committed
Replace all parse_url use with Psr7 Uris
1 parent 5a4f8f4 commit e5da180

File tree

5 files changed

+34
-92
lines changed

5 files changed

+34
-92
lines changed

src/Crawler.php

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
use GuzzleHttp\Client;
1212
use GuzzleHttp\Psr7\Request;
1313
use GuzzleHttp\Psr7\Response;
14+
use GuzzleHttp\Psr7\Uri;
15+
use GuzzleHttp\Psr7\Utils as Psr7Utils;
1416
use Psr\Http\Message\ResponseInterface;
1517
use GuzzleHttp\Exception\RequestException;
1618
use GuzzleHttp\Exception\TooManyRedirectsException;
@@ -28,10 +30,7 @@ class Crawler {
2830
*/
2931
private $client;
3032

31-
/**
32-
* @var string
33-
*/
34-
private $site_path;
33+
private readonly Uri $site_uri;
3534

3635
/**
3736
* @var integer
@@ -51,21 +50,20 @@ class Crawler {
5150
public function __construct(
5251
public CrawlConfig $crawl_config,
5352
) {
54-
$this->site_path = rtrim( SiteInfo::getURL( 'site' ), '/' );
53+
$site_path = rtrim( SiteInfo::getURL( 'site' ), '/' );
54+
$this->site_uri = Psr7Utils::uriFor( $site_path );
5555

5656
$port_override = apply_filters(
5757
Controller::getHookName( 'curl_port' ),
5858
null
5959
);
60-
61-
$base_uri = $this->site_path;
62-
60+
$base_uri = $this->site_uri;
6361
if ( $port_override ) {
64-
$base_uri = "{$base_uri}:{$port_override}";
62+
$base_uri = $base_uri->withPort( $port_override );
6563
}
6664

6765
$opts = [
68-
'base_uri' => $base_uri,
66+
'base_uri' => (string) $base_uri,
6967
'verify' => false,
7068
'http_errors' => false,
7169
'allow_redirects' => false,
@@ -148,7 +146,7 @@ public function crawlComplete(): void {
148146
}
149147

150148
public function crawlPath( PathInfo $detected, array $site_urls ): PromiseInterface {
151-
$absolute_uri = URLHelper::normalize( $this->site_path . $detected->path );
149+
$absolute_uri = URLHelper::normalize( $this->site_uri . $detected->path );
152150
try {
153151
if ( $detected->filename ) {
154152
$request = new Request( 'HEAD', $absolute_uri );
@@ -234,8 +232,8 @@ public function crawlIter( \Iterator $path_iter ): \Iterator {
234232
$path_iter->rewind();
235233
}
236234

237-
$site_host = parse_url( $this->site_path, PHP_URL_HOST );
238-
$site_port = parse_url( $this->site_path, PHP_URL_PORT );
235+
$site_host = $this->site_uri->getHost();
236+
$site_port = $this->site_uri->getPort();
239237
$site_host = $site_port ? $site_host . ":$site_port" : $site_host;
240238
$site_urls = [ "http://$site_host", "https://$site_host" ];
241239

src/SiteInfo.php

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
namespace StaticDeploy;
44

5+
use GuzzleHttp\Psr7\Utils as Psr7Utils;
6+
57
/*
68
Singleton instance to allow instantiating once and allow reading
79
static properties throughout plugin
@@ -195,9 +197,9 @@ public static function getSiteURLHost(): string {
195197
*/
196198
$site_url = self::$info['site_url'];
197199

198-
$url_host = parse_url( $site_url, PHP_URL_HOST );
200+
$url_host = Psr7Utils::uriFor( $site_url )->getHost();
199201

200-
if ( ! is_string( $url_host ) ) {
202+
if ( ! $url_host ) {
201203
$err = 'Failed to get hostname from Site URL';
202204
throw WsLog::ex( $err );
203205
}

src/SitemapParser.php

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
namespace StaticDeploy;
44

55
use GuzzleHttp;
6+
use GuzzleHttp\Psr7\Utils as Psr7Utils;
67
use SimpleXMLElement;
78

89
/**
@@ -415,14 +416,14 @@ protected function parseString( $str ) {
415416
* @return bool
416417
*/
417418
protected function isSitemapURL( $url ) {
418-
$path = parse_url( $this->urlEncode( $url ), PHP_URL_PATH );
419+
$path = Psr7Utils::uriFor( $this->urlEncode( $url ) )->getPath();
419420
return $this->urlValidate( $url ) && (
420421
substr(
421-
(string) $path,
422+
$path,
422423
-strlen( self::XML_EXTENSION ) - 1
423424
) === '.' . self::XML_EXTENSION ||
424425
substr(
425-
(string) $path,
426+
$path,
426427
-strlen( self::XML_EXTENSION_COMPRESSED ) - 1
427428
) === '.' . self::XML_EXTENSION_COMPRESSED
428429
);

src/URLHelper.php

Lines changed: 3 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -81,31 +81,9 @@ public static function modifyUrl( array $changes, string $url = '' ): string {
8181
$url = self::getCurrent();
8282
}
8383

84-
// Parse the url into pieces
85-
$url_array = (array) parse_url( $url );
86-
87-
// The original URL had a query string, modify it.
88-
if ( array_key_exists( 'query', $url_array ) ) {
89-
parse_str( $url_array['query'], $query_array );
90-
foreach ( $changes as $key => $value ) {
91-
$query_array[ $key ] = $value;
92-
}
93-
} else {
94-
// The original URL didn't have a query string, add it.
95-
$query_array = $changes;
96-
}
97-
98-
if (
99-
! isset( $url_array['scheme'] ) ||
100-
! isset( $url_array['host'] ) ||
101-
! isset( $url_array['path'] )
102-
) {
103-
throw WsLog::ex( 'Unable to parse URL' );
104-
}
84+
$uri = Psr7Utils::uriFor( $url );
10585

106-
return $url_array['scheme'] . '://' .
107-
$url_array['host'] . $url_array['path'] . '?' .
108-
http_build_query( $query_array );
86+
return Uri::withQueryValues( $uri, $changes );
10987
}
11088

11189
/**
@@ -193,7 +171,7 @@ public static function isInternalLink(
193171
}
194172
}
195173

196-
$url_host = parse_url( $url, PHP_URL_HOST );
174+
$url_host = Psr7Utils::uriFor( $url )->getHost();
197175

198176
if ( $url_host === $site_url_host ) {
199177
return true;

src/URLParser.php

Lines changed: 12 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
namespace StaticDeploy;
44

5+
use GuzzleHttp\Psr7\Utils as Psr7Utils;
6+
57
trait URLParser {
68

79
/**
@@ -48,56 +50,17 @@ protected function urlEncode( $url ) {
4850

4951
/**
5052
* Validate URL
51-
*
52-
* @param string $url
53-
* @return bool
54-
*/
55-
protected function urlValidate( $url ) {
56-
return (
57-
filter_var( $url, FILTER_VALIDATE_URL ) &&
58-
( $parsed = parse_url( $url ) ) !== false &&
59-
isset( $parsed['host'] ) &&
60-
isset( $parsed['scheme'] ) &&
61-
$this->urlValidateHost( $parsed['host'] ) &&
62-
$this->urlValidateScheme( $parsed['scheme'] )
63-
);
64-
}
65-
66-
/**
67-
* Validate host name
68-
*
69-
* @link https://stackoverflow.com/q/1755144/1668057
70-
*
71-
* @param string $host
72-
* @return bool
7353
*/
74-
protected static function urlValidateHost( $host ) {
75-
return (
76-
// valid chars check
77-
preg_match(
78-
'/^([a-z\d](-*[a-z\d])*)(\.([a-z\d](-*[a-z\d])*))*$/i',
79-
$host
80-
)
81-
// overall length check
82-
&& preg_match( '/^.{1,253}$/', $host )
83-
// length of each label
84-
&& preg_match( '/^[^\.]{1,63}(\.[^\.]{1,63})*$/', $host )
85-
);
86-
}
54+
protected function urlValidate( string $url ): bool {
55+
if ( filter_var( $url, FILTER_VALIDATE_URL ) ) {
56+
try {
57+
Psr7Utils::uriFor( $url );
58+
return true;
59+
} catch ( \Exception ) {
60+
return false;
61+
}
62+
}
8763

88-
/**
89-
* Validate URL scheme
90-
*
91-
* @param string $scheme
92-
* @return bool
93-
*/
94-
protected static function urlValidateScheme( $scheme ) {
95-
return in_array(
96-
$scheme,
97-
[
98-
'http',
99-
'https',
100-
]
101-
);
64+
return false;
10265
}
10366
}

0 commit comments

Comments
 (0)