Skip to content

Commit cdbb1c0

Browse files
committed
Update to include support for basic Pandas types.
1 parent 7ed0e48 commit cdbb1c0

File tree

4 files changed

+96
-32
lines changed

4 files changed

+96
-32
lines changed

docs/source/index.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ RdfPandas
1515
Introduction
1616
============
1717

18-
RdfPandas is a module providing RDF support for Pandas. It consists of
19-
two simple functions for graph conversion, one is to create DataFrame from
20-
RDFLib Graph data, and another one to create Graph data from DataFrame.
18+
RdfPandas is a module providing RDF support for Pandas. It consists initially
19+
of a simple function for graph conversion to create RDFLib Graph data from
20+
Pandas DataFrame.
2121

2222
The graph data can then be serialized using RDFLib serialize method on the
2323
graph.

rdfpandas/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
from .graph import to_graph, from_graph
1+
from .graph import to_graph

rdfpandas/graph.py

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# -*- coding: utf-8 -*-
22
import pandas as pd
33
import rdflib
4+
import logging
5+
46

57
def to_graph(df: pd.DataFrame) -> rdflib.Graph:
68
"""
@@ -22,26 +24,15 @@ def to_graph(df: pd.DataFrame) -> rdflib.Graph:
2224

2325
g = rdflib.Graph()
2426

25-
return g
26-
27-
def from_graph(g: rdflib.Graph) -> pd.DataFrame:
28-
"""
29-
Takes RDFLib Graph and returns Pandas DataFrame using subjects as row
30-
indices and predicates as column indices. Object types are inferred from
31-
the object types.
32-
33-
Parameters
34-
----------
35-
g : rdflib.Graph
36-
Graph to be converted into Pandas DataFrame
37-
38-
Returns
39-
-------
40-
pandas.DataFrame
41-
DataFrame created from Graph.
27+
for (index, series) in df.iterrows():
28+
for (column, value) in series.iteritems():
29+
if (type(value) == 'bytes'):
30+
g.add((rdflib.URIRef(index),
31+
rdflib.URIRef(column),
32+
rdflib.Literal(value.decode('utf-8'))))
33+
else:
34+
g.add((rdflib.URIRef(index),
35+
rdflib.URIRef(column),
36+
rdflib.Literal(value)))
4237

43-
"""
44-
45-
df = pd.DataFrame()
46-
47-
return df
38+
return g

tests/test_data_frame_to_graph.py

Lines changed: 79 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
from .context import rdfpandas
44

55
import pandas as pd
6+
import numpy as np
67
import rdflib
78
import rdflib.compare
89

910
import unittest
11+
import logging
1012

1113

1214
class DataFrameToGraphConversionTestCase(unittest.TestCase):
@@ -18,14 +20,85 @@ def test_should_convert_empty_data_frame_to_emty_graph(self):
1820
g_expected = rdflib.Graph()
1921
g_result = rdfpandas.to_graph(df)
2022
self.assertEquals(rdflib.compare.isomorphic(g_expected, g_result), True)
23+
24+
def test_should_convert_data_frame_to_graph_with_fully_qualified_indices(self):
25+
"""Should return Graph with a single String literal.
26+
Assume that we rely on URIs for indices in the first release,
27+
that String is the only datatype supported, and that language handling
28+
of literals is not required.
29+
"""
30+
31+
idx1= pd.Index(data=['http://github.com/cadmiumkitty/rdfpandas/one'])
32+
33+
ds01 = pd.Series(data=['Bytes'], index=[idx1], dtype = np.string_, name = 'http://github.com/cadmiumkitty/rdfpandas/stringu')
34+
35+
ds02 = pd.Series(data=['String'], index=[idx1], dtype = np.unicode_, name = 'http://github.com/cadmiumkitty/rdfpandas/unicodeu')
36+
37+
ds03 = pd.Series(data=[0], index=[idx1], dtype = np.int64, name = 'http://github.com/cadmiumkitty/rdfpandas/int64_1')
38+
ds04 = pd.Series(data=[-9223372036854775808], index=[idx1], dtype = np.int64, name = 'http://github.com/cadmiumkitty/rdfpandas/int64_2')
39+
ds05 = pd.Series(data=[9223372036854775807], index=[idx1], dtype = np.int64, name = 'http://github.com/cadmiumkitty/rdfpandas/int64_3')
40+
41+
ds06 = pd.Series(data=[0], index=[idx1], dtype = np.uint64, name = 'http://github.com/cadmiumkitty/rdfpandas/uint64_1')
42+
ds07 = pd.Series(data=[18446744073709551615], index=[idx1], dtype = np.uint64, name = 'http://github.com/cadmiumkitty/rdfpandas/uint64_2')
2143

22-
def test_should_convert_empty_graph_to_empty_data_frame(self):
23-
"""Should return empty DataFrame for empty Graph"""
24-
g = rdflib.Graph()
25-
df_expected = pd.DataFrame()
26-
df_result = rdfpandas.from_graph(g)
27-
self.assertEquals(df_expected.equals(df_result), True)
44+
ds08 = pd.Series(data=[0.0], index=[idx1], dtype = np.float64, name = 'http://github.com/cadmiumkitty/rdfpandas/float64_1')
45+
ds09 = pd.Series(data=[-1.7976931348623157e+308], index=[idx1], dtype = np.float64, name = 'http://github.com/cadmiumkitty/rdfpandas/float64_2')
46+
ds10 = pd.Series(data=[1.7976931348623157e+308], index=[idx1], dtype = np.float64, name = 'http://github.com/cadmiumkitty/rdfpandas/float64_3')
47+
48+
ds11 = pd.Series(data=[True], index=[idx1], dtype = np.bool_, name = 'http://github.com/cadmiumkitty/rdfpandas/true')
49+
ds12 = pd.Series(data=[False], index=[idx1], dtype = np.bool_, name = 'http://github.com/cadmiumkitty/rdfpandas/false')
50+
51+
df = pd.DataFrame([ds01, ds02, ds03, ds04, ds05, ds06, ds07, ds08, ds09, ds10, ds11, ds12]).T
2852

53+
logging.debug('DF: %s', df)
54+
55+
g_expected = rdflib.Graph()
56+
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
57+
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/stringu'),
58+
rdflib.Literal('Bytes')))
59+
60+
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
61+
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/unicodeu'),
62+
rdflib.Literal('String')))
63+
64+
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
65+
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/int64_1'),
66+
rdflib.Literal(0)))
67+
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
68+
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/int64_2'),
69+
rdflib.Literal(-9223372036854775808)))
70+
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
71+
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/int64_3'),
72+
rdflib.Literal(9223372036854775807)))
73+
74+
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
75+
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/uint64_1'),
76+
rdflib.Literal(0)))
77+
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
78+
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/uint64_2'),
79+
rdflib.Literal(18446744073709551615)))
80+
81+
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
82+
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/float64_1'),
83+
rdflib.Literal(0.0)))
84+
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
85+
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/float64_2'),
86+
rdflib.Literal(-1.7976931348623157e+308)))
87+
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
88+
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/float64_3'),
89+
rdflib.Literal(1.7976931348623157e+308)))
90+
91+
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
92+
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/true'),
93+
rdflib.Literal(True)))
94+
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
95+
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/false'),
96+
rdflib.Literal(False)))
97+
98+
g_result = rdfpandas.to_graph(df)
99+
100+
self.assertEquals(rdflib.compare.isomorphic(g_expected, g_result), True)
101+
29102

30103
if __name__ == '__main__':
31104
unittest.main()

0 commit comments

Comments
 (0)