Skip to content

Commit 402f2e5

Browse files
authored
Merge pull request #7 from blaaat/master
Ignore ; or space after charset in meta tag
2 parents 7486765 + 4046e83 commit 402f2e5

File tree

5 files changed

+29
-4
lines changed

5 files changed

+29
-4
lines changed

src/PHPHtmlParser/Dom.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,7 @@ protected function detectCharset()
633633
return false;
634634
}
635635
$matches = [];
636-
if (preg_match('/charset=(.+)/', $content, $matches)) {
636+
if (preg_match('/charset=([^; ]+)/', $content, $matches)) {
637637
$encode->from(trim($matches[1]));
638638
$this->root->propagateEncoding($encode);
639639

src/PHPHtmlParser/Dom/AbstractNode.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ public function __construct()
7070
public function __get($key)
7171
{
7272
// check attribute first
73-
if ( ! is_null($this->getAttribute($key))) {
74-
return $this->getAttribute($key);
73+
if ( ! is_null($value = $this->getAttribute($key))) {
74+
return $value;
7575
}
7676
switch (strtolower($key)) {
7777
case 'outerhtml':

src/PHPHtmlParser/Dom/Tag.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ public function setAttribute($key, $value)
146146
$value = [
147147
'value' => $value,
148148
'doubleQuote' => true,
149+
'didConvertCharset' => true,
149150
];
150151
}
151152
$this->attr[$key] = $value;
@@ -217,9 +218,10 @@ public function getAttribute($key)
217218
return null;
218219
}
219220
$value = $this->attr[$key]['value'];
220-
if (is_string($value) && ! is_null($this->encode)) {
221+
if (is_string($value) && ! is_null($this->encode) && ! ($this->attr[$key]['didConvertCharset'] ?? false)) {
221222
// convert charset
222223
$this->attr[$key]['value'] = $this->encode->convert($value);
224+
$this->attr[$key]['didConvertCharset'] = true;
223225
}
224226

225227
return $this->attr[$key];

tests/DomTest.php

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,21 @@ public function testIncorrectAccess()
3333
$div = $dom->find('div', 0);
3434
$this->assertEquals(null, $div->foo);
3535
}
36+
public function testIncorrectContentType()
37+
{
38+
$dom = new Dom;
39+
$dom->load('<html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8;"></head><body><div class="all"><p>Hey bro, <a href="google.com">click here</a><br /> :)</p></div></body></html>');
40+
$a = $dom->find('a', 0);
41+
$this->assertEquals($a->text, 'click here');
42+
}
43+
44+
public function testCharsetConvertInAttribute()
45+
{
46+
$dom = new Dom;
47+
$dom->loadFromFile(__DIR__ . '/files/ISO-8859-7.html', ['preserveLineBreaks' => true]);
48+
$a = $dom->find('a', 0);
49+
$this->assertEquals('/testη', $a->href);
50+
}
3651

3752
public function testLoadSelfclosingAttr()
3853
{

tests/files/ISO-8859-7.html

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<html>
2+
<head>
3+
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-7">
4+
</head>
5+
<body>
6+
<a href="/testç">EEç<span>XX</span></a>
7+
</body>
8+
</html>

0 commit comments

Comments
 (0)