Skip to content

Commit 196bab8

Browse files
committed
prevent to throw exceptions if the html is not parsed correcly #58
1 parent 45f7418 commit 196bab8

File tree

3 files changed

+8
-3
lines changed

3 files changed

+8
-3
lines changed

src/Providers/Html.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ public function run()
2929
self::extractFromLink($html, $this->bag);
3030
self::extractFromMeta($html, $this->bag);
3131

32-
$main = self::getMainElement($html);
33-
34-
self::extractImages($main, $this->bag, $this->request->getDomain());
32+
if ($main = self::getMainElement($html)) {
33+
self::extractImages($main, $this->bag, $this->request->getDomain());
34+
}
3535

3636
//Title
3737
$title = $html->getElementsByTagName('title');

src/Request.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,8 @@ public function getHtmlContent()
190190
}
191191

192192
$errors = libxml_use_internal_errors(true);
193+
$entities = libxml_disable_entity_loader(true);
194+
193195
$this->htmlContent = new \DOMDocument();
194196

195197
if ((mb_detect_encoding($content) === 'UTF-8') && mb_check_encoding($content, 'UTF-8')) {
@@ -198,7 +200,9 @@ public function getHtmlContent()
198200
}
199201

200202
$this->htmlContent->loadHTML($content);
203+
201204
libxml_use_internal_errors($errors);
205+
libxml_disable_entity_loader($entities);
202206
} catch (\Exception $E) {
203207
return $this->htmlContent = false;
204208
}

src/Url.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,7 @@ private function setPath($path)
516516

517517
if ($this->getScheme() === 'data') {
518518
$this->info['content'] = $path;
519+
519520
return;
520521
}
521522

0 commit comments

Comments
 (0)