Skip to content

Commit 8e69411

Browse files
author
Cristi Constantin
committed
Basic ignore empty OpenGraph props
1 parent 50a0915 commit 8e69411

File tree

5 files changed

+11
-1
lines changed

5 files changed

+11
-1
lines changed

extruct/_extruct.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def extract(htmlstring,
5454
if errors not in ['log', 'ignore', 'strict']:
5555
raise ValueError('Invalid error command, valid values are either "log"'
5656
', "ignore" or "strict"')
57+
5758
try:
5859
tree = parse_xmldom_html(htmlstring, encoding=encoding)
5960
except Exception as e:
@@ -65,6 +66,7 @@ def extract(htmlstring,
6566
return {}
6667
if errors == 'strict':
6768
raise
69+
6870
processors = []
6971
if 'microdata' in syntaxes:
7072
processors.append(
@@ -95,6 +97,7 @@ def extract(htmlstring,
9597
('rdfa', RDFaExtractor().extract_items,
9698
tree,
9799
))
100+
98101
output = {}
99102
for syntax, extract, document in processors:
100103
try:
@@ -108,6 +111,7 @@ def extract(htmlstring,
108111
pass
109112
if errors == 'strict':
110113
raise
114+
111115
if uniform:
112116
uniform_processors = []
113117
if 'microdata' in syntaxes:
@@ -131,6 +135,7 @@ def extract(htmlstring,
131135
output['opengraph'],
132136
None,
133137
))
138+
134139
for syntax, uniform, raw, schema_context in uniform_processors:
135140
try:
136141
if syntax == 'opengraph':

extruct/opengraph.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ def extract_items(self, document, base_url=None):
3232
for el in head.xpath('meta[@property and @content]'):
3333
prop = el.attrib['property']
3434
val = el.attrib['content']
35+
if prop == '' or val == '':
36+
continue
3537
ns = prop.partition(':')[0]
3638
if ns in _OG_NAMESPACES:
3739
namespaces[ns] = _OG_NAMESPACES[ns]

tests/samples/songkick/elysianfields.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
<meta property="og:type" content="songkick-concerts:artist">
2929
<meta property="og:title" content="Elysian Fields">
3030
<meta property="og:description" content="Buy tickets for an upcoming Elysian Fields concert near you. List of all Elysian Fields tickets and tour dates for 2017.">
31+
<meta property="og:description" content="" />
3132
<meta property="og:url" content="http://www.songkick.com/artists/236156-elysian-fields">
3233
<meta property="og:image" content="http://images.sk-static.com/images/media/img/col4/20100330-103600-169450.jpg">
3334
<meta property="og:image" content="http://images.sk-static.com/SECONDARY_IMAGE.jpg">

tests/samples/songkick/elysianfields.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,9 @@
232232
"http://ogp.me/ns#description": [
233233
{
234234
"@value": "Buy tickets for an upcoming Elysian Fields concert near you. List of all Elysian Fields tickets and tour dates for 2017."
235+
},
236+
{
237+
"@value": ""
235238
}
236239
],
237240
"http://ogp.me/ns#image": [

tests/test_extruct.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import pytest
66

77
import extruct
8-
from extruct import SYNTAXES
98
from tests import get_testdata, jsonize_dict, replace_node_ref_with_node_id
109

1110

0 commit comments

Comments
 (0)