diff --git a/.gitignore b/.gitignore index a2c9c9e..9a919f4 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,5 @@ Examples/panini_stickers/upload/* Examples/panini_stickers/output_data/* Examples/adventureWorks/output_data/adventureworks.rdf.ttl + +.DS_Store diff --git a/custom/conversions.class.php b/custom/conversions.class.php new file mode 100644 index 0000000..99e703a --- /dev/null +++ b/custom/conversions.class.php @@ -0,0 +1,23 @@ + . + +This document assumes basic knowledge of RDF and the Turtle syntax. For more +information, visit the [W3C team submission on Turtle - Terse RDF Triple +Language](http://www.w3.org/TeamSubmission/turtle/) + +## Vertere Vocabulary + +All Vertere-specific instructions are defined in the Vertere vocabulary +namespace, which is introduced as the base namespace at the top of the +conversion document. The namespace is not yet formally defined, so the user is +free to fill in any URI they like. + + + @prefix : . + +Next, the user is able to declare extra prefixes to vocabulary (or ontology) +namespaces. This allows using the qualified name later in the spec, instead of +having to write the full URI to a resource. For example, if we want to map a +CSV file to concepts in the [TRANSIT +vocabulary](http://vocab.org/transit/terms/), we can declare the prefix +`transit`. This way, we can use `transit:Stop` instead of having to write +``. + + + @prefix transit: . + +## Entry Point + +The entry point to the mapping specification is a `:Spec` object (here +identified as the document itself), which gives basic information about the +data source. Such an object can be declared by stating that the root URI of +the type `:Spec` is. + + + <#> a :Spec . + +The spec resource expects the following metadata about the data source: + + * **Format** +Supplies the format of the datafile that needs to be mapped. possible values +(for now) are `:CSV` or `:TSV`, depending on whether your mapping a file using +commas or tabs to separate the values. + + + <#> :format :CSV . + + * **Header rows** +Supplies a binary value that indicates if the file contains a header row (`1`) +or not (`0`). + + + <#> :header_rows 0 . + + * **Expected header** +When `:header_rows` is set to 1, you can supply the header row you expect in +the document. + + + <#> :expected_header ( "\"id\",\"code\",\"local_code\",\"name\",\"continent\" ). + + * **Resource** +With resource, you declare the resources that the mapping process will output. +The resources that are Each mapping for each resource that is defined here, + + + <#> :resource <#customer>, <#email>, <#phone>, <#affiliation>, <#sales_person> . + + * **Base URI** +The base URI define the namespace where the created resources will be based +under. + + + <#> :base_uri "" ; + +Turtle allows a shorter notation when several triples share the same subject. +Declaring all above triples can be written like such: + + + + <#> a :Spec; + :format :CSV; + :header_rows 1; + :expected_header ( "\"id\",\"code\",\"local_code\",\"name\",\"continent\",\"iso_country\",\"wikipedia_link\",\"keywords\"" ); + :resource <#continent>, <#country>, <#region>, <#ourairports_region_page>, <#wikipedia_page>, <#dbpedia_resource>; + :base_uri "http://data.kasabi.com/dataset/world_geography/". + + +# Declaring mappings + +## Declaring and identifying resources + +RDF uses unique URIs to identify resources. This means that these URIs need to +be created from the dataset. This is done by declaring the resources and +adding `:identify` property. + +Declaring a resource works the same way as creating the `:Spec` object. This +time, we choose a variable name, for example _Continent_, and say it is a +`:Resource`. + + + + <#continent> a :Resource . + +Next, we specify how the identifying URI should be created. The value of +`:identify` is a _blank node_, which is a resource that does not really exists +(and has no URI), and is added in Turtle between `[]`. Between these brackets, +you can add predicates and objects as you would with any other resource. We +use two predicates with our blank node: + + * **Source column** +With source column, we define from which column, the values will be used to +create the unique URI. This is typically a column with IDs (or primary keys), +where the values are surely unique. For this, the predicate `:source_column` +is used, the value is an _integer_, representing the column index (starting +from 1). + + * **Base URI** +Sometimes we want to use collections in our URIs. You can specify a different +base URI using `:base_uri`, the value is a _string_. + +This example will, for example, create the uri _http://data.kasabi.com/dataset +/world-geography/continents/Europe_ if a value in the fifth column would be +"Europe". + + + + <#continent> a :Resource; + :identity [ + :source_column 5; + :base_uri "http://data.kasabi.com/dataset/world-geography/continents/" + ] . + + +In some case, columns will need to be combined in order to obtain a unique +value (e.g., firstname and lastname). With the predicate `:source_columns` you +can refer to the indexes of the columns you want to combine. The value are two +or more integers between brackets, separated by a space. By adding +`:source_columns_glue`, you can define a string that will be used to glue the +pieces together (e.g., underscore). A slash `/` is used to create a deeper +hierarchy in the URI. + + + + <#ourairports_region_page> a :Resource; + :type bibo:Webpage; + :identity [ + :source_columns ( 6 3 ) ; + :source_column_glue "/" ; + :base_uri "http://www.ourairports.com/countries/" + ]. + +### Using URI templates + +Besides simple concatenation, the URI might be constructed in more complex +manner. For that reason, are URI templates supported. URI templates provide a +flexible way to specify a URI using variables, and asign values afterwards. +The spec can be found [here](http://tools.ietf.org/html/rfc6570). + +A URI template is added to value of `:identify`, by using the following +predicates: + + * **Template** +You can specify a template using `:template`, the value is a _string_. + + * **Template variables** +The variables that need to fill the variables used in the template, are added +using `:template_vars`. The value is a list of blank nodes, each specifying a +variable name and a source column: + + * **Variable name** +This value represents the name of the variable used in the template. For this, +the predicate `:variable` is used. + + * **Source column** +With source column, we define from which column, the values will be used to +create the unique URI. This is typically a column with IDs (or primary keys), +where the values are surely unique. For this, the predicate `:source_column` +is used, the value is an _integer_, representing the column index (starting +from 1). + +The above example can also be written as: + + + + <#ourairports_region_page> a :Resource; + :type bibo:Webpage; + :identity [ + :template "http://www.ourairports.com/countries/{country_id}/{local_id}"; + :template_vars [ + :variable "country_id"; + :source_column 6 + ],[ + :variable "local_id"; + :source_column 3 + ] + ]. + +## Typing resources + +Typing is the most basic operation in mapping. In the output RDF, it will +create triples using the predicate `rdf:type` (or in Turtle the shorthand +`a`). The object of this triple, will typically be a class, defined in the +ontology that we are mapping to. The created resource, will therefore be an +instance of this type. In this mapping language, we type a resource by using +the `:type` predicate, followed by the URI of the concept. + + + <#wikipedia_page> :type bibo:Webpage . + +In this example, `bibo` represents the [Bibliographic Ontology +Specification](http://purl.org/ontology/bibo/) namespace. Therefore, +`bibo:Webpage` can also be written as +`` . + +## Adding attributes and relations to resources + +Besides typing, you might want to add some related data to your resource. In +RDF (or typically in [Linked Data](http://linkeddata.org/)), this is done in +two ways. They both use a blank node as object. + +Firstly, by adding _links_ to other resources, created from your data set or +already existing on the Web. In this mapping language defined with the +`:relationship` predicate. The object, a blank node, typically has two +predicates: + + * The `:property` predicate, which refers to the property in the ontology you want to use. + * The `:object_from` predicate, which refers to another resource, declared elsewhere in the document. + + + <#region> :relationship [ + :property owl:sameAs; + :object_from <#dbpedia_resource> + ] . + + owl:sameAs . + +Secondly, by adding triples with _literals_ as object value. In many cases, +the values of a column in your CSV file just need to be added as a string or +integer. In this language defined with the `:attribute` predicate. The object, +a blank node, typically has these predicates: + + * The `:property` predicate, which refers to the property in the ontology you want to use to add the literal value. + * The `:source_column` predicate, which refers to a column using its index. + * The `:language` predicate, which can add a language code to a string value. + * The `:datatype` predicate, which adds a XSD datatype to the value + +Mapping Output example + + + + <#region> [ + :property geo:alt; + :source_column 7; + :datatype xsd:float; + ] . + + geo:alt "3.14"^^xsd:float . + + + <#region> [ + :property rdfs:label; + :source_column 7; + :language "en"; + ] . + + rdfs:label "Provence"@en . + +A fully mapped resource typically consists of typing, identity, relations and +attributes, as shown in the combined example below: + + + + <#region> a :Resource; + :type places:Region; + :identity [ + :source_column 2; + :base_uri "http://data.kasabi.com/dataset/world-geography/regions/" + ]; + :relationship [ + :property owl:sameAs; + :object_from <#dbpedia_resource> + ], + [ + :property foaf:isPrimaryTopicOf; + :object_from <#wikipedia_page> + ], + :attribute [ + :property fly:iso_code; + :source_column 2 + ], + [ + :property foaf:name; + :source_column 4 + ]. + +## Processing column values + +Column values can be processed before using them. This is done by calling a +process function, which is specified by adding the predicate `:process`. Its +object is an ordered list of defined functions which will be executed +sequentially. We discuss some of them in detail. + +### Using conversions + +Conversions transform a value into another value and are specified as custom +PHP functions. These function are specified in the class `Conversions` located +at _custom/conversions.class.php_. + + + + + + +This class can be extended with extra functions. In the mapping file, a +function is called by adding `:functionname` to the object list. In the +following example, we add the altitude of a region. The values in the data +columns are in feet, so we use a process function to convert them. + + + + <#region> a :Resource; + + :attribute [ + :property geo:alt; + :source_column 7; + :datatype xsd:float; + :process (:feet_to_metres) + ] . + + +### Using regular expressions + +With the Vertere mapping language, you can also specify regular expression +patterns to transform the values in certain columns. + +In this example, we transform the Wikipedia URIs, defined in another resource +`<#wikipedia_page>`, to DBpedia URIs. We can use this `<#dbpedia_resource>` +elsewhere as an object. (e.g., to create _sameAs_ links) + + + + <#dbpedia_resource> a :Resource; + :identity [ + :source_resource <#wikipedia_page>; + :base_uri "" ; + :process ( :regex ); + :regex_match "http://[^/]*/wiki/(.*)"; + :regex_output "http://dbpedia.org/resource/${1}"; + ] . + +## Using lookup + +## Transforming values with process functions + +## Combining techniques + +Many of the above techniques can be combined. + +# Example + +The following example maps a CSV file with airports using the TRANSIT, PLACES, +GEO, GEORSS, NAPTAN, FOAF, FLY, SPACEREL and BIBO volcabulary. + + + + "id","ident","type","name","latitude_deg","longitude_deg","elevation_ft","continent","iso_country","iso_region","municipality","scheduled_service","gps_code","iata_code","local_code","home_link","wikipedia_link","keywords" + 6523,"00A","heliport","Total Rf Heliport",40.07080078125,-74.9336013793945,11,"NA","US","US-PA","Bensalem","no","00A",,"00A",,, + 6524,"00AK","small_airport","Lowell Field",59.94919968,-151.695999146,450,"NA","US","US-AK","Anchor Point","no","00AK",,"00AK",,, + 6525,"00AL","small_airport","Epps Airpark",34.8647994995117,-86.7703018188477,820,"NA","US","US-AL","Harvest","no","00AL",,"00AL",,, + 6526,"00AR","heliport","Newport Hospital & Clinic Heliport",35.608699798584,-91.2548980712891,237,"NA","US","US-AR","Newport","no","00AR",,"00AR",,, + 6527,"00AZ","small_airport","Cordes Airport",34.3055992126465,-112.165000915527,3810,"NA","US","US-AZ","Cordes","no","00AZ",,"00AZ",,, + 6528,"00CA","small_airport","Goldstone /Gts/ Airport",35.3504981995,-116.888000488,3038,"NA","US","US-CA","Barstow","no","00CA",,"00CA",,, + 6529,"00CO","small_airport","Cass Field",40.622200012207,-104.34400177002,4830,"NA","US","US-CO","Briggsdale","no","00CO",,"00CO",,, + 6531,"00FA","small_airport","Grass Patch Airport",28.6455001831055,-82.2190017700195,53,"NA","US","US-FL","Bushnell","no","00FA",,"00FA",,, + 6532,"00FD","heliport","Ringhaver Heliport",28.8465995788574,-82.3453979492188,25,"NA","US","US-FL","Riverview","no","00FD",,"00FD",,, + + + + @prefix : . + @prefix bibo: . + @prefix fly: . + @prefix foaf: . + @prefix geo: . + @prefix georss: . + @prefix naptan: . + @prefix owl: . + @prefix places: . + @prefix rdf: . + @prefix rdfs: . + @prefix spacerel: . + @prefix transit: . + @prefix xsd: . + + # 1 id + # 2 ident + # 3 type + # 4 name + # 5 latitude_deg + # 6 longitude_deg + # 7 elevation_ft + # 8 continent + # 9 iso_country + # 10 iso_region + # 11 municipality + # 12 scheduled_service + # 13 gps_code + # 14 iata_code + # 15 local_code + # 16 home_link + # 17 wikipedia_link + # 18 keywords + + <#> a :Spec; + :format :CSV; + :header_rows 1; + :expected_header ( "\"id\",\"ident\",\"type\",\"name\",\"latitude_deg\",\"longitude_deg\",\"elevation_ft\",\"continent\",\"iso_country\",\"iso_region\",\"municipality\",\"scheduled_service\",\"gps_code\",\"iata_code\",\"local_code\",\"home_link\",\"wikipedia_link\",\"keywords\"" ); + :resource <#airport>, <#airport_type>, <#country>, <#continent>, <#region>, <#municipality>, <#wikipedia_page>, <#dbpedia_resource>, <#ourairports_page>, <#naptan_resource>; + :base_uri "http://data.kasabi.com/dataset/airports/" . + + <#airport> a :Resource + ; :identity [ :source_column 2; ] + ; :type fly:Airport, transit:Stop, naptan:Airport + ; :relationship + [ :property rdf:type; :object_from <#airport_type> ], + [ :property spacerel:within; :object_from <#municipality> ], + [ :property spacerel:within; :object_from <#region> ], + [ :property spacerel:within; :object_from <#country> ], + [ :property spacerel:within; :object_from <#continent> ], + [ :property foaf:isPrimaryTopicOf; :object_from <#wikipedia_page> ], + [ :property foaf:isPrimaryTopicOf; :object_from <#ourairports_page> ], + [ :property owl:sameAs; :object_from <#dbpedia_resource> ], + [ :property owl:sameAs; :object_from <#naptan_resource> ] + ; :attribute + [ :property geo:lat; :source_column 5; :datatype xsd:float ], + [ :property geo:long; :source_column 6; :datatype xsd:float ], + [ :property geo:alt; :source_column 7; :datatype xsd:float; :process ( :feet_to_metres ); ], + [ :property georss:point; :source_columns (5 6); :source_column_glue " " ], + [ :property foaf:name; :source_column 4; :language "en" ], + [ :property fly:icao_code; :source_column 2 ], + [ :property fly:scheduled_service; :source_column 12; :lookup <#boolean_lookup> ] + . + + <#airport_type> a :Resource + ; :identity [ :source_column 3; :process ( :normalise :title_case ); :base_uri "http://data.kasabi.com/dataset/airports/schema/" ] + ; :type rdfs:Class + ; :attribute + [ + :property rdfs:label; + :source_column 3; + :process ( :regex :title_case ); + :regex_match "_"; + :regex_output " "; + ] + . + + <#continent> a :Resource + ; :identity [ + :source_column 8; + :base_uri "http://data.kasabi.com/dataset/world-geography/continents/" + ] + . + + <#country> a :Resource; + :identity [ + :source_column 9; + :base_uri "http://data.kasabi.com/dataset/world-geography/countries/" + ] . + + <#region> a :Resource; + :type places:Region; + :identity [ + :source_column 10; + :base_uri "http://data.kasabi.com/dataset/world-geography/regions/" + ]; + :relationship [ + :property spacerel:contains; + :object_from <#municipality> + ] . + + <#municipality> a :Resource; + :type places:Municipality; + :identity [ + :source_column 11; + :container "municipalities"; + :process ( :flatten_utf8 :normalise ); + ]; + :relationship [ + :property spacerel:within; + :object_from <#region> + ]; + :attribute [ + :property rdfs:label; + :source_column 11 + ] . + + <#wikipedia_page> a :Resource; + :type bibo:Webpage; + :identity [ + :source_column 17; + :base_uri ""; + :process ( :regex ); + :regex_match " "; + :regex_output ""; + ] . + + <#dbpedia_resource> a :Resource; + :identity [ + :source_resource <#wikipedia_page>; + :process ( :regex ); + :regex_match "http://[^/]*/wiki/(.*)"; + :regex_output "http://dbpedia.org/resource/${1}"; + ] . + + <#ourairports_page> a :Resource; + :type bibo:Webpage; + :identity [ + :source_column 2; + :base_uri ""; + :process ( :regex ); + :regex_match "^(.*)$"; + :regex_output "http://www.ourairports.com/airports/${1}/"; + ] . + + <#naptan_resource> a :Resource; + :identity [ + :source_column 14; + :base_uri "http://transport.data.gov.uk/id/airport/" + ]. + + <#boolean_lookup> a :Lookup; + :lookup_entry [ + :lookup_key "yes"; + :lookup_value "true"^^xsd:boolean + ]; + :lookup_entry [ + :lookup_key "true"; + :lookup_value "true"^^xsd:boolean + ]; + :lookup_entry [ + :lookup_key "no"; + :lookup_value "false"^^xsd:boolean + ]; + :lookup_entry [ + :lookup_key "false"; + :lookup_value "false"^^xsd:boolean + ] . + + diff --git a/documentation/index.html b/documentation/index.html index d07a9c2..97437b3 100644 --- a/documentation/index.html +++ b/documentation/index.html @@ -1,55 +1,507 @@ + "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> - - Vertere CSV2RDF Mapping Language - - + + Vertere CSV2RDF Mapping Language + + + +

Vertere CSV2RDF Mapping Language

+

+ Vertere is a tool for converting data in CSV format into RDF. + The Vertere mapping language described in this document uses + Turtle syntax, and is designed so as to resemble to target RDF + graph as much as possible. Another design choice is to ensure that + each line in the source CSV file can be processed individually, + ensuring that the conversion process can be parallelised easily. +

+

The Verter mapping language is based on RDF. This means that everything is declared using triples, that consist of a subject, predicate and an object. Each part of a triple is a resource, which is identified by a unique URI. Each URI typically has a namespace (e.g., http://example.com/namespace/), followed by the name of the resource (e.g., Concept). + In Turtle, triples are declared using the following notation: +

+
<http://example.com/Subject> <http://example.com/predicate> <http://example.com/Object> .
+

+ This document assumes basic knowledge of RDF and the Turtle syntax. For more information, visit the W3C team submission on Turtle - Terse RDF Triple Language +

+

Vertere Vocabulary

- +

+ All Vertere-specific instructions are defined in the Vertere vocabulary + namespace, which is introduced as the base namespace at the top of the + conversion document. The namespace is not yet formally defined, so the user is free to fill in any URI they like. +

-

Vertere CSV2RDF Mapping Language

+
@prefix : <http://example.com/schema/data_conversion#> .
-

- Vertere is a tool for converting data in CSV format into RDF. - The Vertere mapping language described in this document uses - Turtle syntax, and is designed so as to resemble to target RDF - graph as much as possible. Another design choice is to ensure that - each line in the source CSV file can be processed individually, - ensuring that the conversion process can be parallelised easily. -

+

+ Next, the user is able to declare extra prefixes to vocabulary (or ontology) namespaces. This allows using the qualified name later in the spec, instead of having to write the full URI to a resource. For example, if we want to map a CSV file to concepts in the TRANSIT vocabulary, we can declare the prefix transit. This way, we can use transit:Stop instead of having to write <http://vocab.org/transit/terms/Stop>. +

+
@prefix transit: <http://vocab.org/transit/terms/> .
-

This document assumes basic knowledge of RDF and the Turtle syntax.

+

Entry Point

-

Vertere Vocabulary

+

+ The entry point to the mapping specification is a :Spec object + (here identified as the document itself), which gives basic information about the data source. Such an object can be declared by stating that the root URI of the type :Spec is. +

+
<#> a :Spec .
-

- All Vertere-specific instructions are defined in the Vertere vocabulary - namespace, which is introduced as the base namespace at the top of the - conversion document: -

+

The spec resource expects the following metadata about the data source:

+
    +
  • + Format
    Supplies the format of the datafile that needs to be mapped. possible values (for now) are :CSV or :TSV, depending on whether your mapping a file using commas or tabs to separate the values. +
    <#> :format :CSV .
    +
  • +
  • + Header rows
    Supplies a binary value that indicates if the file contains a header row (1) or not (0). +
    <#> :header_rows 0 .
    +
  • +
  • + Expected header
    When :header_rows is set to 1, you can supply the header row you expect in the document. +
    <#> :expected_header ( "\"id\",\"code\",\"local_code\",\"name\",\"continent\" ).
    +
  • +
  • + Resource
    With resource, you declare the resources that the mapping process will output. The resources that are Each mapping for each resource that is defined here, +
    <#> :resource <#customer>, <#email>, <#phone>, <#affiliation>, <#sales_person> .
    +
  • +
  • + Base URI
    The base URI define the namespace where the created resources will be based under. +
    <#> :base_uri "<http://my.data.source/test>" ;
    +
  • +
+

+ Turtle allows a shorter notation when several triples share the same subject. Declaring all above triples can be written like such: +

+ +
+<#> a :Spec;
+  :format :CSV;
+  :header_rows 1;
+  :expected_header ( "\"id\",\"code\",\"local_code\",\"name\",\"continent\",\"iso_country\",\"wikipedia_link\",\"keywords\"" );
+  :resource <#continent>, <#country>, <#region>, <#ourairports_region_page>, <#wikipedia_page>, <#dbpedia_resource>;
+  :base_uri "http://data.kasabi.com/dataset/world_geography/".
+    
+ +

Declaring mappings

+

Declaring and identifying resources

+

+ RDF uses unique URIs to identify resources. This means that these URIs need to be created from the dataset. This is done by declaring the resources and adding :identify property. +

+ Declaring a resource works the same way as creating the :Spec object. This time, we choose a variable name, for example Continent, and say it is a :Resource. +

+
+<#continent> a :Resource .
+

+ Next, we specify how the identifying URI should be created. The value of :identify is a blank node, which is a resource that does not really exists (and has no URI), and is added in Turtle between []. Between these brackets, you can add predicates and objects as you would with any other resource. We use two predicates with our blank node: + +

+
    +
  • Source column
    + With source column, we define from which column, the values will be used to create the unique URI. This is typically a column with IDs (or primary keys), where the values are surely unique. For this, the predicate :source_column is used, the value is an integer, representing the column index (starting from 1). +
  • +
  • Base URI
    + Sometimes we want to use collections in our URIs. You can specify a different base URI using :base_uri, the value is a string. +
  • +
+

This example will, for example, create the uri http://data.kasabi.com/dataset/world-geography/continents/Europe if a value in the fifth column would be "Europe".

+
+<#continent> a :Resource;
+  :identity [
+    :source_column 5;
+    :base_uri "http://data.kasabi.com/dataset/world-geography/continents/"
+  ] .
+    
+

+ In some case, columns will need to be combined in order to obtain a unique value (e.g., firstname and lastname). With the predicate :source_columns you can refer to the indexes of the columns you want to combine. The value are two or more integers between brackets, separated by a space. By adding :source_columns_glue, you can define a string that will be used to glue the pieces together (e.g., underscore). A slash / is used to create a deeper hierarchy in the URI. +

+
+<#ourairports_region_page> a :Resource;
+  :type bibo:Webpage;
+  :identity [
+	:source_columns ( 6 3 ) ;
+	:source_column_glue "/" ;
+	:base_uri "http://www.ourairports.com/countries/"
+  ].
+

Using URI templates

+

+ Besides simple concatenation, the URI might be constructed in more complex manner. For that reason, are URI templates supported. URI templates provide a flexible way to specify a URI using variables, and asign values afterwards. The spec can be found here. +

+

+ A URI template is added to value of :identify, by using the following predicates: +

+
    +
  • Template
    + You can specify a template using :template, the value is a string. +
  • +
  • Template variables
    + The variables that need to fill the variables used in the template, are added using :template_vars. The value is a list of blank nodes, each specifying a variable name and a source column: +
      +
    • Variable name
      + This value represents the name of the variable used in the template. For this, the predicate :variable is used. +
    • +
    • Source column
      + With source column, we define from which column, the values will be used to create the unique URI. This is typically a column with IDs (or primary keys), where the values are surely unique. For this, the predicate :source_column is used, the value is an integer, representing the column index (starting from 1). +
    • +
    +
  • + + +
+

The above example can also be written as:

+
+<#ourairports_region_page> a :Resource;
+  :type bibo:Webpage;
+  :identity [
+    :template "http://www.ourairports.com/countries/{country_id}/{local_id}";
+    :template_vars [
+      :variable "country_id";
+      :source_column 6
+    ],[
+      :variable "local_id";
+      :source_column 3
+    ]
+  ].
+

Typing resources

+

Typing is the most basic operation in mapping. In the output RDF, it will create triples using the predicate rdf:type (or in Turtle the shorthand a). The object of this triple, will typically be a class, defined in the ontology that we are mapping to. The created resource, will therefore be an instance of this type. In this mapping language, we type a resource by using the :type predicate, followed by the URI of the concept.

+
<#wikipedia_page> :type bibo:Webpage .
+

In this example, bibo represents the Bibliographic Ontology Specification namespace. Therefore, bibo:Webpage can also be written as <http://purl.org/ontology/bibo/Webpage> .

+

Adding attributes and relations to resources

+

Besides typing, you might want to add some related data to your resource. In RDF (or typically in Linked Data), this is done in two ways. They both use a blank node as object.

+ +

Firstly, by adding links to other resources, created from your data set or already existing on the Web. In this mapping language defined with the :relationship predicate. The object, a blank node, typically has two predicates:

+
    +
  • The :property predicate, which refers to the property in the ontology you want to use.
  • +
  • The :object_from predicate, which refers to another resource, declared elsewhere in the document.
  • +
+ + + + + +
+
+<#region> :relationship [
+  :property owl:sameAs;
+  :object_from <#dbpedia_resource>
+] .
+
<http://test.com/REGION11> owl:sameAs <http://dbpedia.org/resource/Aix-en-Provence> .
+

Secondly, by adding triples with literals as object value. In many cases, the values of a column in your CSV file just need to be added as a string or integer. In this language defined with the :attribute predicate. The object, a blank node, typically has these predicates:

+
    +
  • The :property predicate, which refers to the property in the ontology you want to use to add the literal value.
  • +
  • The :source_column predicate, which refers to a column using its index.
  • +
  • The :language predicate, which can add a language code to a string value.
  • +
  • The :datatype predicate, which adds a XSD datatype to the value
  • +
+ + + + + + + + + + + + + +
+ Mapping + + Output example +
+
+<#region> [
+  :property geo:alt;
+  :source_column 7;
+  :datatype xsd:float;
+] .
+
<http://test.com/REGION11> geo:alt "3.14"^^xsd:float .
+
+<#region> [
+  :property rdfs:label;
+  :source_column 7;
+  :language "en";
+] .
+
<http://test.com/REGION11> rdfs:label "Provence"@en .
+

Attributes and relationships can also be used to add constant literals or URIs that are hard coded in the mapping file (e.g., adding unit information for values). The ptedicates :value (attributes) and :object (relationships) are used for this.

+

An example with attributes:

+
<#region> :attribute [ :property ex:prop; :value "something"]
+

An example with relationships:

+
<#region> :relationship [ :property ex:prop; :object ]
+

+You can also use the current subject as object, in order to create inverse relations:

-@prefix : <http://example.com/schema/data_conversion#> .
+:relationship [ :property ex:parent; :subject  ] 
 
+

A fully mapped resource typically consists of typing, identity, relations and attributes, as shown in the combined example below:

-

Entry Point

+
+<#region> a :Resource;
+  :type places:Region;
+  :identity [
+	:source_column 2;
+	:base_uri "http://data.kasabi.com/dataset/world-geography/regions/"
+  ];
+  :relationship [
+    :property owl:sameAs;
+    :object_from <#dbpedia_resource>
+  ],
+  [
+    :property foaf:isPrimaryTopicOf;
+    :object_from <#wikipedia_page>
+  ],
+  :attribute [
+    :property fly:iso_code;
+    :source_column 2
+  ],
+  [
+    :property foaf:name;
+    :source_column 4
+  ].
+ +

Processing column values

+

Column values can be processed before using them. This is done by calling a process function, which is specified by adding the predicate :process. Its object is an ordered list of defined functions which will be executed sequentially. We discuss some of them in detail.

+

Using conversions

+

Conversions transform a value into another value and are specified as custom PHP functions. These function are specified in the class Conversions located at custom/conversions.class.php.

+
+<?php
+/*
+ * Class for custom conversion methods
+ */
+class Conversions {
+    /*
+     * Converts a value from feet to metres
+     */
+	public static function feet_to_metres($value) {
+		return ($value * 0.3048);
+	}
 
-

- The entry point to the mapping specification is a :Spec object - (here identified as the document itself), which -

+ public static function metres_to_feet($value) { + return ($value * 3.2808); + } -
-<#> a :Spec ; 
-	:format :TSV ;
-	:header_rows 0 ;
-	:resource <#customer>, <#email>, <#phone>, <#affiliation>, <#sales_person> ;
-	:base_uri "http://data.kasabi.com/dataset/adventureworks-2008r2lt/" ;
+}
+?>
+    
+

This class can be extended with extra functions. In the mapping file, a function is called by adding :functionname to the object list. In the following example, we add the altitude of a region. The values in the data columns are in feet, so we use a process function to convert them.

+
+<#region> a :Resource;
+
+:attribute [
+  :property geo:alt;
+  :source_column 7;
+  :datatype xsd:float;
+  :process (:feet_to_metres)
+] .
+    
+

Using regular expressions

+

With the Vertere mapping language, you can also specify regular expression patterns to transform the values in certain columns. +

+

+ In this example, we transform the Wikipedia URIs, defined in another resource <#wikipedia_page>, to DBpedia URIs. We can use this <#dbpedia_resource> elsewhere as an object. (e.g., to create sameAs links) +

+
+<#dbpedia_resource> a :Resource; 
+  :identity [
+    :source_resource <#wikipedia_page>;
+    :base_uri "" ;
+    :process ( :regex );
+    :regex_match "http://[^/]*/wiki/(.*)";
+    :regex_output "http://dbpedia.org/resource/${1}";
+  ] .
+

Using lookup

+

Transforming values with process functions

+

Combining techniques

+

Many of the above techniques can be combined.

+ +

Example

+

The following example maps a CSV file with airports using the TRANSIT, PLACES, GEO, GEORSS, NAPTAN, FOAF, FLY, SPACEREL and BIBO volcabulary.

+
+"id","ident","type","name","latitude_deg","longitude_deg","elevation_ft","continent","iso_country","iso_region","municipality","scheduled_service","gps_code","iata_code","local_code","home_link","wikipedia_link","keywords"
+6523,"00A","heliport","Total Rf Heliport",40.07080078125,-74.9336013793945,11,"NA","US","US-PA","Bensalem","no","00A",,"00A",,,
+6524,"00AK","small_airport","Lowell Field",59.94919968,-151.695999146,450,"NA","US","US-AK","Anchor Point","no","00AK",,"00AK",,,
+6525,"00AL","small_airport","Epps Airpark",34.8647994995117,-86.7703018188477,820,"NA","US","US-AL","Harvest","no","00AL",,"00AL",,,
+6526,"00AR","heliport","Newport Hospital & Clinic Heliport",35.608699798584,-91.2548980712891,237,"NA","US","US-AR","Newport","no","00AR",,"00AR",,,
+6527,"00AZ","small_airport","Cordes Airport",34.3055992126465,-112.165000915527,3810,"NA","US","US-AZ","Cordes","no","00AZ",,"00AZ",,,
+6528,"00CA","small_airport","Goldstone /Gts/ Airport",35.3504981995,-116.888000488,3038,"NA","US","US-CA","Barstow","no","00CA",,"00CA",,,
+6529,"00CO","small_airport","Cass Field",40.622200012207,-104.34400177002,4830,"NA","US","US-CO","Briggsdale","no","00CO",,"00CO",,,
+6531,"00FA","small_airport","Grass Patch Airport",28.6455001831055,-82.2190017700195,53,"NA","US","US-FL","Bushnell","no","00FA",,"00FA",,,
+6532,"00FD","heliport","Ringhaver Heliport",28.8465995788574,-82.3453979492188,25,"NA","US","US-FL","Riverview","no","00FD",,"00FD",,,
+    
+
+@prefix : <http://example.com/schema/data_conversion#> .
+@prefix bibo: <http://purl.org/ontology/bibo/> .
+@prefix fly: <http://vocab.org/fly/schema/> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+@prefix geo: <http://www.w3.org/2003/01/geo/wgs84_pos#> .
+@prefix georss: <http://www.georss.org/georss/> .
+@prefix naptan: <http://transport.data.gov.uk/def/naptan/> .
+@prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix places: <http://purl.org/ontology/places#> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix spacerel: <http://data.ordnancesurvey.co.uk/ontology/spatialrelations/> .
+@prefix transit: <http://vocab.org/transit/terms/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+# 1    id
+# 2    ident
+# 3    type
+# 4    name
+# 5    latitude_deg
+# 6    longitude_deg
+# 7    elevation_ft
+# 8    continent
+# 9    iso_country
+# 10   iso_region
+# 11   municipality
+# 12   scheduled_service
+# 13   gps_code
+# 14   iata_code
+# 15   local_code
+# 16   home_link
+# 17   wikipedia_link
+# 18   keywords
+
+<#> a :Spec;
+:format :CSV;
+:header_rows 1;
+:expected_header ( "\"id\",\"ident\",\"type\",\"name\",\"latitude_deg\",\"longitude_deg\",\"elevation_ft\",\"continent\",\"iso_country\",\"iso_region\",\"municipality\",\"scheduled_service\",\"gps_code\",\"iata_code\",\"local_code\",\"home_link\",\"wikipedia_link\",\"keywords\"" );
+:resource <#airport>, <#airport_type>, <#country>, <#continent>, <#region>, <#municipality>, <#wikipedia_page>, <#dbpedia_resource>, <#ourairports_page>, <#naptan_resource>;
+:base_uri "http://data.kasabi.com/dataset/airports/" .
+
+<#airport> a :Resource
+; :identity [ :source_column 2; ]
+; :type fly:Airport, transit:Stop, naptan:Airport
+; :relationship
+	[ :property rdf:type; :object_from <#airport_type> ],
+	[ :property spacerel:within; :object_from <#municipality> ],
+	[ :property spacerel:within; :object_from <#region> ],
+	[ :property spacerel:within; :object_from <#country> ],
+	[ :property spacerel:within; :object_from <#continent> ],
+	[ :property foaf:isPrimaryTopicOf; :object_from <#wikipedia_page> ],
+	[ :property foaf:isPrimaryTopicOf; :object_from <#ourairports_page> ],
+	[ :property owl:sameAs; :object_from <#dbpedia_resource> ],
+	[ :property owl:sameAs; :object_from <#naptan_resource> ]
+; :attribute
+	[ :property geo:lat; :source_column 5; :datatype xsd:float ],
+	[ :property geo:long; :source_column 6; :datatype xsd:float ],
+	[ :property geo:alt; :source_column 7; :datatype xsd:float; :process ( :feet_to_metres ); ],
+	[ :property georss:point; :source_columns (5 6); :source_column_glue " " ],
+	[ :property foaf:name; :source_column 4; :language "en" ],
+	[ :property fly:icao_code; :source_column 2 ],
+	[ :property fly:scheduled_service; :source_column 12; :lookup <#boolean_lookup> ]
 .
-
- +<#airport_type> a :Resource +; :identity [ :source_column 3; :process ( :normalise :title_case ); :base_uri "http://data.kasabi.com/dataset/airports/schema/" ] +; :type rdfs:Class +; :attribute + [ + :property rdfs:label; + :source_column 3; + :process ( :regex :title_case ); + :regex_match "_"; + :regex_output " "; + ] +. + +<#continent> a :Resource +; :identity [ + :source_column 8; + :base_uri "http://data.kasabi.com/dataset/world-geography/continents/" +] +. + +<#country> a :Resource; + :identity [ + :source_column 9; + :base_uri "http://data.kasabi.com/dataset/world-geography/countries/" + ] . + +<#region> a :Resource; + :type places:Region; + :identity [ + :source_column 10; + :base_uri "http://data.kasabi.com/dataset/world-geography/regions/" + ]; + :relationship [ + :property spacerel:contains; + :object_from <#municipality> + ] . + +<#municipality> a :Resource; + :type places:Municipality; + :identity [ + :source_column 11; + :container "municipalities"; + :process ( :flatten_utf8 :normalise ); + ]; + :relationship [ + :property spacerel:within; + :object_from <#region> + ]; + :attribute [ + :property rdfs:label; + :source_column 11 + ] . + +<#wikipedia_page> a :Resource; + :type bibo:Webpage; + :identity [ + :source_column 17; + :base_uri ""; + :process ( :regex ); + :regex_match " "; + :regex_output ""; + ] . + +<#dbpedia_resource> a :Resource; + :identity [ + :source_resource <#wikipedia_page>; + :process ( :regex ); + :regex_match "http://[^/]*/wiki/(.*)"; + :regex_output "http://dbpedia.org/resource/${1}"; + ] . + +<#ourairports_page> a :Resource; + :type bibo:Webpage; + :identity [ + :source_column 2; + :base_uri ""; + :process ( :regex ); + :regex_match "^(.*)$"; + :regex_output "http://www.ourairports.com/airports/${1}/"; + ] . + +<#naptan_resource> a :Resource; + :identity [ + :source_column 14; + :base_uri "http://transport.data.gov.uk/id/airport/" + ]. + +<#boolean_lookup> a :Lookup; + :lookup_entry [ + :lookup_key "yes"; + :lookup_value "true"^^xsd:boolean + ]; + :lookup_entry [ + :lookup_key "true"; + :lookup_value "true"^^xsd:boolean + ]; + :lookup_entry [ + :lookup_key "no"; + :lookup_value "false"^^xsd:boolean + ]; + :lookup_entry [ + :lookup_key "false"; + :lookup_value "false"^^xsd:boolean + ] . + +
+ + diff --git a/inc/UriTemplate/UriTemplate.php b/inc/UriTemplate/UriTemplate.php new file mode 100644 index 0000000..50e3bda --- /dev/null +++ b/inc/UriTemplate/UriTemplate.php @@ -0,0 +1,269 @@ + + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +*/ + +namespace Guzzle\Parser\UriTemplate; +include_once 'UriTemplateInterface.php'; + +/** + * Expands URI templates using an array of variables + * + * @link http://tools.ietf.org/html/draft-gregorio-uritemplate-08 + */ +class UriTemplate implements UriTemplateInterface +{ + /** + * @var string URI template + */ + private $template; + + /** + * @var array Variables to use in the template expansion + */ + private $variables; + + /** + * @var string Regex used to parse expressions + */ + private static $regex = '/\{([^\}]+)\}/'; + + /** + * @var array Hash for quick operator lookups + */ + private static $operatorHash = array( + '+' => true, '#' => true, '.' => true, '/' => true, ';' => true, '?' => true, '&' => true + ); + + /** + * @var array Delimiters + */ + private static $delims = array( + ':', '/', '?', '#', '[', ']', '@', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=' + ); + + /** + * @var array Percent encoded delimiters + */ + private static $delimsPct = array( + '%3A', '%2F', '%3F', '%23', '%5B', '%5D', '%40', '%21', '%24', '%26', '%27', '%28', '%29', '%2A', '%2B', '%2C', + '%3B', '%3D' + ); + + /** + * {@inheritdoc} + */ + public function expand($template, array $variables) + { + $this->template = $template; + $this->variables = $variables; + + // Check to ensure that the preg_* function is needed + if (false === strpos($this->template, '{')) { + return $this->template; + } + + return preg_replace_callback(self::$regex, array($this, 'expandMatch'), $this->template); + } + + /** + * Parse an expression into parts + * + * @param string $expression Expression to parse + * + * @return array Returns an associative array of parts + */ + private function parseExpression($expression) + { + // Check for URI operators + $operator = ''; + + if (isset(self::$operatorHash[$expression[0]])) { + $operator = $expression[0]; + $expression = substr($expression, 1); + } + + $values = explode(',', $expression); + foreach ($values as &$value) { + $value = trim($value); + $varspec = array(); + $substrPos = strpos($value, ':'); + if ($substrPos) { + $varspec['value'] = substr($value, 0, $substrPos); + $varspec['modifier'] = ':'; + $varspec['position'] = (int) substr($value, $substrPos + 1); + } elseif (substr($value, -1) == '*') { + $varspec['modifier'] = '*'; + $varspec['value'] = substr($value, 0, -1); + } else { + $varspec['value'] = (string) $value; + $varspec['modifier'] = ''; + } + $value = $varspec; + } + + return array( + 'operator' => $operator, + 'values' => $values + ); + } + + /** + * Process an expansion + * + * @param array $matches Matches met in the preg_replace_callback + * + * @return string Returns the replacement string + */ + private function expandMatch(array $matches) + { + static $rfc1738to3986 = array( + '+' => '%20', + '%7e' => '~' + ); + + $parsed = self::parseExpression($matches[1]); + $replacements = array(); + + $prefix = $parsed['operator']; + $joiner = $parsed['operator']; + $useQueryString = false; + if ($parsed['operator'] == '?') { + $joiner = '&'; + $useQueryString = true; + } elseif ($parsed['operator'] == '&') { + $useQueryString = true; + } elseif ($parsed['operator'] == '#') { + $joiner = ','; + } elseif ($parsed['operator'] == ';') { + $useQueryString = true; + } elseif ($parsed['operator'] == '' || $parsed['operator'] == '+') { + $joiner = ','; + $prefix = ''; + } + + foreach ($parsed['values'] as $value) { + + if (!array_key_exists($value['value'], $this->variables) || $this->variables[$value['value']] === null) { + continue; + } + + $variable = $this->variables[$value['value']]; + $actuallyUseQueryString = $useQueryString; + $expanded = ''; + + if (is_array($variable)) { + + $isAssoc = $this->isAssoc($variable); + $kvp = array(); + foreach ($variable as $key => $var) { + + if ($isAssoc) { + $key = rawurlencode($key); + $isNestedArray = is_array($var); + } else { + $isNestedArray = false; + } + + if (!$isNestedArray) { + $var = rawurlencode($var); + if ($parsed['operator'] == '+' || $parsed['operator'] == '#') { + $var = $this->decodeReserved($var); + } + } + + if ($value['modifier'] == '*') { + if ($isAssoc) { + if ($isNestedArray) { + // Nested arrays must allow for deeply nested structures + $var = strtr(http_build_query(array($key => $var)), $rfc1738to3986); + } else { + $var = $key . '=' . $var; + } + } elseif ($key > 0 && $actuallyUseQueryString) { + $var = $value['value'] . '=' . $var; + } + } + + $kvp[$key] = $var; + } + + if (empty($variable)) { + $actuallyUseQueryString = false; + } elseif ($value['modifier'] == '*') { + $expanded = implode($joiner, $kvp); + if ($isAssoc) { + // Don't prepend the value name when using the explode modifier with an associative array + $actuallyUseQueryString = false; + } + } else { + if ($isAssoc) { + // When an associative array is encountered and the explode modifier is not set, then the + // result must be a comma separated list of keys followed by their respective values. + foreach ($kvp as $k => &$v) { + $v = $k . ',' . $v; + } + } + $expanded = implode(',', $kvp); + } + + } else { + if ($value['modifier'] == ':') { + $variable = substr($variable, 0, $value['position']); + } + $expanded = rawurlencode($variable); + if ($parsed['operator'] == '+' || $parsed['operator'] == '#') { + $expanded = $this->decodeReserved($expanded); + } + } + + if ($actuallyUseQueryString) { + if (!$expanded && $joiner != '&') { + $expanded = $value['value']; + } else { + $expanded = $value['value'] . '=' . $expanded; + } + } + + $replacements[] = $expanded; + } + + $ret = implode($joiner, $replacements); + if ($ret && $prefix) { + return $prefix . $ret; + } + + return $ret; + } + + /** + * Determines if an array is associative + * + * @param array $array Array to check + * + * @return bool + */ + private function isAssoc(array $array) + { + return (bool) count(array_filter(array_keys($array), 'is_string')); + } + + /** + * Removes percent encoding on reserved characters (used with + and # modifiers) + * + * @param string $string String to fix + * + * @return string + */ + private function decodeReserved($string) + { + return str_replace(self::$delimsPct, self::$delims, $string); + } +} diff --git a/inc/UriTemplate/UriTemplateInterface.php b/inc/UriTemplate/UriTemplateInterface.php new file mode 100644 index 0000000..c75805d --- /dev/null +++ b/inc/UriTemplate/UriTemplateInterface.php @@ -0,0 +1,32 @@ + + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +*/ + +namespace Guzzle\Parser\UriTemplate; + +/** + * Expands URI templates using an array of variables + * + * @link http://tools.ietf.org/html/rfc6570 + */ +interface UriTemplateInterface +{ + /** + * Expand the URI template using the supplied variables + * + * @param string $template URI Template to expand + * @param array $variables Variables to use with the expansion + * + * @return string Returns the expanded template + */ + public function expand($template, array $variables); +} diff --git a/inc/conversions.class.php b/inc/conversions.class.php deleted file mode 100644 index c1dc354..0000000 --- a/inc/conversions.class.php +++ /dev/null @@ -1,12 +0,0 @@ -spec = $spec; - $this->spec_uri = $spec_uri; - - // Find resource specs - $this->resources = $spec->get_resource_triple_values($this->spec_uri, NS_CONV.'resource'); - if (empty($this->resources)) { throw new Exception('Unable to find any resource specs to work from'); } - - $this->base_uri = $spec->get_first_literal($this->spec_uri, NS_CONV.'base_uri'); - - // :null_values is a list of strings that indicate NULL in the source data - $null_value_list = $spec->get_first_resource($this->spec_uri, NS_CONV.'null_values'); - if ($null_value_list) { - foreach($spec->get_list_values($null_value_list) as $null_value_resource) { - if ($null_value_resource["type"] == "literal") { - array_push($this->null_values, $null_value_resource["value"]); - } - } - } else { - array_push($this->null_values, ""); - } - foreach($this->null_values as $value) { - } - } - - public function convert_array_to_graph($record) { - $uris = $this->create_uris($record); - $graph = new SimpleGraph(); - $this->add_default_types($graph, $uris); - $this->create_relationships($graph, $uris, $record); - $this->create_attributes($graph, $uris, $record); - return $graph; - } - - private function add_default_types($graph, $uris) { - foreach ( $this->resources as $resource ) { - $types = $this->spec->get_resource_triple_values($resource, NS_CONV.'type'); - foreach ($types as $type) { - if (!empty($type) && isset($uris[$resource])) { - $graph->add_resource_triple($uris[$resource], NS_RDF.'type', $type); - } - } - } - } - - private function create_attributes(&$graph, $uris, $record) { - foreach ( $this->resources as $resource ) { - $attributes = $this->spec->get_resource_triple_values($resource, NS_CONV.'attribute'); - foreach ($attributes as $attribute) { - $this->create_attribute($graph, $uris, $record, $resource, $attribute); - } - } - } - - private function create_attribute(&$graph, $uris, $record, $resource, $attribute) { - if (!isset($uris[$resource])) { return; } - $subject = $uris[$resource]; - $property = $this->spec->get_first_resource($attribute, NS_CONV.'property'); - $language = $this->spec->get_first_literal($attribute, NS_CONV.'language'); - $datatype = $this->spec->get_first_resource($attribute, NS_CONV.'datatype'); - - $value = $this->spec->get_first_literal($attribute, NS_CONV.'value'); - $source_column = $this->spec->get_first_literal($attribute, NS_CONV.'source_column'); - $source_columns = $this->spec->get_first_resource($attribute, NS_CONV.'source_columns'); - - if ($value) { - $source_value = $value; - } else if ($source_column) { - $source_column--; - $source_value = $record[$source_column]; - } else if ($source_columns) { - $source_columns = $this->spec->get_list_values($source_columns); - $glue = $this->spec->get_first_literal($attribute, NS_CONV.'source_column_glue'); - $filter = $this->spec->get_first_literal($attribute, NS_CONV.'source_column_filter'); - if (!isset($filter)) { - // default: accept anything - $filter = "//"; - } - $source_values = array(); - foreach ($source_columns as $source_column) { - $source_column = $source_column['value']; - $source_column--; - $value = $record[$source_column]; - if (preg_match($filter, $value) != 0 && !in_array($value, $this->null_values)) { - $source_values[] = $value; - } - } - $source_value = implode($glue, $source_values); - } else { - return; - } - - $lookup = $this->spec->get_first_resource($attribute, NS_CONV.'lookup'); - if($lookup != null) { - $lookup_value = $this->lookup($lookup, $source_value); - if ($lookup_value != null && $lookup_value['type'] == 'uri') { - $graph->add_resource_triple($subject, $property, $lookup_value['value']); - return; - } else { - $source_value = $lookup_value['value']; - } - } - - if (empty($source_value)) { return; } - - $source_value = $this->process($attribute, $source_value); - - $graph->add_literal_triple($subject, $property, $source_value, $language, $datatype); - - } - - private function create_relationships(&$graph, $uris, $record) { - foreach ( $this->resources as $resource ) { - $relationships = $this->spec->get_resource_triple_values($resource, NS_CONV.'relationship'); - foreach ($relationships as $relationship) { - $this->create_relationship($graph, $uris, $resource, $relationship, $record); - } - } - } - - private function create_relationship(&$graph, $uris, $resource, $relationship, $record) { - $subject = $uris[$resource]; - $property = $this->spec->get_first_resource($relationship, NS_CONV.'property'); - - $object_from = $this->spec->get_first_resource($relationship, NS_CONV.'object_from'); - $identity = $this->spec->get_first_resource($relationship, NS_CONV.'identity'); - $object = $this->spec->get_first_resource($relationship, NS_CONV.'object'); - $new_subject = $this->spec->get_first_resource($relationship, NS_CONV.'subject'); - - if ($object_from) { - $object = $uris[$object_from]; - } else if ($identity) { - // we create a link in situ, from a colum value - // TODO: this should be merged with the create_uri() code - $source_column = $this->spec->get_first_literal($identity, NS_CONV.'source_column'); - $source_column--; - $source_value = $record[$source_column]; - if (empty($source_value)) { return; } - $base_uri = $this->spec->get_first_literal($identity, NS_CONV.'base_uri'); - if ($base_uri === null) { $base_uri = $this->base_uri; } - $source_value = $this->process($identity, $source_value); - $object = "${base_uri}${source_value}" ; - } else if ($new_subject) { - $object = $subject ; - $subject = $new_subject ; - } - - if ($subject && $property && $object) { - $graph->add_resource_triple($subject, $property, $object); - } else { - return; - } - } - - private function create_uris($record) { - $uris = array(); - foreach ( $this->resources as $resource ) { - if (!isset($uris[$resource])) { - $this->create_uri($record, $uris, $resource); - } - } - return $uris; - } - - private function create_uri($record, &$uris, $resource, $identity = null) { - if (!$identity) { $identity = $this->spec->get_first_resource($resource, NS_CONV.'identity'); } - $source_column = $this->spec->get_first_literal($identity, NS_CONV.'source_column'); - $source_columns = $this->spec->get_first_resource($identity, NS_CONV.'source_columns'); - $source_resource = $this->spec->get_first_resource($identity, NS_CONV.'source_resource'); - - if ($source_column) { - $source_column--; - $source_value = $record[$source_column]; - } else if ($source_columns) { - $source_columns = $this->spec->get_list_values($source_columns); - $glue = $this->spec->get_first_literal($identity, NS_CONV.'source_column_glue'); - $source_values = array(); - foreach ($source_columns as $source_column) { - $source_column = $source_column['value']; - $source_column--; - // if (!empty($record[$source_column])) { // empty() is not a good idea: empty(0) == TRUE - if (!in_array($record[$source_column], $this->null_values)) { - $source_values[] = $record[$source_column]; - } - } - $source_value = implode('', $source_values); - if (!empty($source_value)) { - $source_value = implode($glue, $source_values); - } - } else if ($source_resource) { - if (!isset($uris[$source_resource])) { - $this->create_uri($record, $uris, $source_resource); - } - $source_value = $uris[$source_resource]; - } else { - return; - } - - //Check for lookups - $lookup = $this->spec->get_first_resource($identity, NS_CONV.'lookup'); - if($lookup != null) { - $lookup_value = $this->lookup($lookup, $source_value); - if ($lookup_value != null && $lookup_value['type'] == 'uri') { - $uris[$resource] = $lookup_value['value']; - return; - } else { - $source_value = $lookup_value['value']; - } - } - - //Decide on base_uri - $base_uri = $this->spec->get_first_literal($identity, NS_CONV.'base_uri'); - if ($base_uri === null) { $base_uri = $this->base_uri; } - - //Decide if the resource should be nested (overrides the base_uri) - $nest_under = $this->spec->get_first_resource($identity, NS_CONV.'nest_under'); - if ($nest_under != null) { - if (!isset($uris[$nest_under])) { - $this->create_uri($record, $uris, $nest_under); - } - $base_uri = $uris[$nest_under]; - if (!preg_match('%[/#]$%', $base_uri)) { $base_uri .= '/'; } - } - - $container = $this->spec->get_first_literal($identity, NS_CONV.'container'); - if (!empty($container) && !preg_match('%[/#]$%', $container)) { $container .= '/'; } - - $source_value = $this->process($identity, $source_value); - - if (!empty($source_value)) { - $uri = "${base_uri}${container}${source_value}"; - $uris[$resource] = $uri; - } else { - $identity = $this->spec->get_first_resource($resource, NS_CONV.'alternative_identity'); - if ($identity) { - $this->create_uri($record, $uris, $resource, $identity); - } - } - } - - public function process($resource, $value) { - $processes = $this->spec->get_first_resource($resource, NS_CONV.'process'); - if ($processes != null) { - $process_steps = $this->spec->get_list_values($processes); - foreach ($process_steps as $step) { - switch ($step['value']) { - case NS_CONV.'normalise': - $value = strtolower(str_replace(' ', '_', trim($value))); - break; - - case NS_CONV.'trim_quotes': - $value = trim($value, '"'); - break; - - case NS_CONV.'flatten_utf8': - $value = preg_replace('/[^-\w]+/', '', iconv('UTF-8', 'ascii//TRANSLIT', $value)); - break; - - case NS_CONV.'title_case': - $value = ucwords($value); - break; - - case NS_CONV.'regex': - $regex_pattern = $this->spec->get_first_literal($resource, NS_CONV.'regex_match'); - foreach (array('%','/','@','!','^',',','.','-') as $candidate_delimeter) { - if(strpos($candidate_delimeter, $regex_pattern) === false) { - $delimeter = $candidate_delimeter; - break; - } - } - $regex_output = $this->spec->get_first_literal($resource, NS_CONV.'regex_output'); - $value = preg_replace("${delimeter}${regex_pattern}${delimeter}", $regex_output, $value); - break; - - case NS_CONV.'feet_to_metres': - $value = Conversions::feet_to_metres($value); - break; - - case NS_CONV.'round': - $value = round($value); - break; - - case NS_CONV.'substr': - $substr_start = $this->spec->get_first_literal($resource, NS_CONV.'substr_start'); - $substr_length = $this->spec->get_first_literal($resource, NS_CONV.'substr_length'); - $value = substr($value, $substr_start, $substr_length); - break; - - default: - throw new Exception("Unknown process requested: ${step}"); - } - } - } - return $value; - } - - public function lookup($lookup, $key) { - if($this->spec->get_subject_property_values($lookup, NS_CONV.'lookup_entry')){ - return $this->lookup_config_entries($lookup, $key); - } else if($this->spec->get_subject_property_values($lookup, NS_CONV.'lookup_csv_file')){ - return $this->lookup_csv_file($lookup, $key); + private $spec, $spec_uri, $resources, $base_uri, $lookups = array(), $null_values = array(), $header; + + public function __construct($spec, $spec_uri) { + $this->spec = $spec; + $this->spec_uri = $spec_uri; + + // Find resource specs + $this->resources = $spec->get_resource_triple_values($this->spec_uri, NS_CONV . 'resource'); + if (empty($this->resources)) { + throw new Exception('Unable to find any resource specs to work from'); + } + + $this->base_uri = $spec->get_first_literal($this->spec_uri, NS_CONV . 'base_uri'); + + // :null_values is a list of strings that indicate NULL in the source data + $null_value_list = $spec->get_first_resource($this->spec_uri, NS_CONV . 'null_values'); + if ($null_value_list) { + foreach ($spec->get_list_values($null_value_list) as $null_value_resource) { + if ($null_value_resource["type"] == "literal") { + array_push($this->null_values, $null_value_resource["value"]); + } + } + } else { + array_push($this->null_values, ""); + } + foreach ($this->null_values as $value) { + + } + } + + /* + * Method to support named columns (MVS) + */ + + public function get_record_value($record, $source_column) { + $key = array_search($source_column, $this->header); + if ($key === false) { + if (is_numeric($source_column)) + $source_column--; + else if (!is_string($source_column)) + throw new Exception("Source column value is not valid: string or numeric"); + + if (array_key_exists($source_column, $record)) + return trim($record[$source_column]); + + echo "Column reference $source_column is not found in source\n"; + return; + } + + if (!array_key_exists($key, $record)) + throw new Exception("Source column value is not valid"); + + return trim($record[$key]); } - } - - function lookup_config_entries($lookup, $key){ - if (!isset($this->lookups[$lookup])) { - $entries = $this->spec->get_subject_property_values($lookup, NS_CONV.'lookup_entry'); - if (empty($entries)) { throw new Exception("Lookup ${lookup} had no lookup entries"); } - foreach ($entries as $entry) { - //Accept lookups with several keys mapped to a single value - $lookup_keys = $this->spec->get_subject_property_values($entry['value'], NS_CONV.'lookup_key'); - foreach ($lookup_keys as $lookup_key_array) { - $lookup_key = $lookup_key_array['value']; - if (isset($this->lookups[$lookup][$lookup_key])) { throw new Exception("Lookup <${lookup}> contained a duplicate key"); } - $lookup_values = $this->spec->get_subject_property_values($entry['value'], NS_CONV.'lookup_value'); - if (count($lookup_values) != 1) { throw new Exception("Lookup ${lookup} has an entry ${entry['value']} that does not have exactly one lookup value assigned."); } - $this->lookups[$lookup][$lookup_key] = $lookup_values[0]; - } - } - } - return isset($this->lookups[$lookup][$key]) ? $this->lookups[$lookup][$key] : null; - } - - function lookup_csv_file($lookup, $key){ - if(isset($this->lookups[$lookup]['keys']) AND isset($this->lookups[$lookup]['keys'][$key])){ - return $this->lookups[$lookup]['keys'][$key]; + public function record_key_exists($source_column, $record) { + $key = array_search($source_column, $this->header); + if ($key === false) { + if (is_numeric($source_column)) + $source_column--; + else if (!is_string($source_column)) + throw new Exception("Source column value is not valid: string or numeric"); + + return array_key_exists($source_column, $record); + } + + return !array_key_exists($key, $record); + } + + public function convert_array_to_graph($record, $header = array()) { + if (!is_array($header)) + throw new Exception("Supplied header is no array!"); + + $this->header = $header; + + $uris = $this->create_uris($record); + $graph = new SimpleGraph(); + $this->add_default_types($graph, $uris); + $this->create_relationships($graph, $uris, $record); + $this->create_attributes($graph, $uris, $record); + return $graph; + } + + private function add_default_types($graph, $uris) { + foreach ($this->resources as $resource) { + $types = $this->spec->get_resource_triple_values($resource, NS_CONV . 'type'); + foreach ($types as $type) { + if (!empty($type) && isset($uris[$resource])) { + $graph->add_resource_triple($uris[$resource], NS_RDF . 'type', $type); + } + } + } + } + + private function create_attributes(&$graph, $uris, $record) { + foreach ($this->resources as $resource) { + $attributes = $this->spec->get_resource_triple_values($resource, NS_CONV . 'attribute'); + foreach ($attributes as $attribute) { + $this->create_attribute($graph, $uris, $record, $resource, $attribute); + } + } + } + + private function create_attribute(&$graph, $uris, $record, $resource, $attribute) { + if (!isset($uris[$resource])) { + return; + } + $subject = $uris[$resource]; + $property = $this->spec->get_first_resource($attribute, NS_CONV . 'property'); + $language = $this->spec->get_first_literal($attribute, NS_CONV . 'language'); + $datatype = $this->spec->get_first_resource($attribute, NS_CONV . 'datatype'); + + $value = $this->spec->get_first_literal($attribute, NS_CONV . 'value'); + $source_column = $this->spec->get_first_literal($attribute, NS_CONV . 'source_column'); + $source_columns = $this->spec->get_first_resource($attribute, NS_CONV . 'source_columns'); + + if ($value) { + $source_value = $value; + } else if ($source_column) { +// $source_column--; +// $source_value = $record[$source_column]; + $source_value = $this->get_record_value($record, $source_column); + } else if ($source_columns) { + $source_columns = $this->spec->get_list_values($source_columns); + $glue = $this->spec->get_first_literal($attribute, NS_CONV . 'source_column_glue'); + $filter = $this->spec->get_first_literal($attribute, NS_CONV . 'source_column_filter'); + if (!isset($filter)) { + // default: accept anything + $filter = "//"; + } + $source_values = array(); + foreach ($source_columns as $source_column) { + $source_column = $source_column['value']; + +// $source_column--; +// $value = $record[$source_column]; + $value = $this->get_record_value($record, $source_column); + + if (preg_match($filter, $value) != 0 && !in_array($value, $this->null_values)) { + $source_values[] = $value; + } + } + $source_value = implode($glue, $source_values); + } else { + return; + } + + $lookup = $this->spec->get_first_resource($attribute, NS_CONV . 'lookup'); + if ($lookup != null) { + $lookup_value = $this->lookup($lookup, $source_value); + if ($lookup_value != null && $lookup_value['type'] == 'uri') { + $graph->add_resource_triple($subject, $property, $lookup_value['value']); + return; + } else { + $source_value = $lookup_value['value']; + } + } + + if (empty($source_value)) { + return; + } + + $source_value = $this->process($attribute, $source_value); + + $graph->add_literal_triple($subject, $property, $source_value, $language, $datatype); + } + + private function create_relationships(&$graph, $uris, $record) { + foreach ($this->resources as $resource) { + $relationships = $this->spec->get_resource_triple_values($resource, NS_CONV . 'relationship'); + foreach ($relationships as $relationship) { + $this->create_relationship($graph, $uris, $resource, $relationship, $record); + } + } + } + + private function create_relationship(&$graph, $uris, $resource, $relationship, $record) { + $subject = null; + if (array_key_exists($resource, $uris)) + $subject = $uris[$resource]; + + $property = $this->spec->get_first_resource($relationship, NS_CONV . 'property'); + + $object_from = $this->spec->get_first_resource($relationship, NS_CONV . 'object_from'); + $identity = $this->spec->get_first_resource($relationship, NS_CONV . 'identity'); + $object = $this->spec->get_first_resource($relationship, NS_CONV . 'object'); + $new_subject = $this->spec->get_first_resource($relationship, NS_CONV . 'subject'); + + if ($object_from) { + //Prevents PHP warning on key not being present + if (isset($uris[$object_from])) + $object = $uris[$object_from]; + } else if ($identity) { + // we create a link in situ, from a colum value + // TODO: this should be merged with the create_uri() code + $source_column = $this->spec->get_first_literal($identity, NS_CONV . 'source_column'); +// $source_column--; +// $source_value = $record[$source_column]; + $source_value = $this->get_record_value($record, $source_column); + if (empty($source_value)) { + return; + } + $base_uri = $this->spec->get_first_literal($identity, NS_CONV . 'base_uri'); + if ($base_uri === null) { + $base_uri = $this->base_uri; + } + $source_value = $this->process($identity, $source_value); + $object = "${base_uri}${source_value}"; + } else if ($new_subject) { + $object = $subject; + $subject = $new_subject; + } + + if ($subject && $property && $object) { + $graph->add_resource_triple($subject, $property, $object); + } else { + return; + } } - $filename = $this->spec->get_first_literal($lookup, NS_CONV.'lookup_csv_file'); - $key_column = $this->spec->get_first_literal($lookup, NS_CONV.'lookup_key_column'); - $value_column = $this->spec->get_first_literal($lookup, NS_CONV.'lookup_value_column'); - //retain file handle - if(!isset($this->lookups[$lookup]['filehandle'])){ - $this->lookups[$lookup]['filehandle'] = fopen($filename, 'r'); + private function create_uris($record) { + $uris = array(); + foreach ($this->resources as $resource) { + if (!isset($uris[$resource])) { + $this->create_uri($record, $uris, $resource); + } + } + return $uris; + } + + private function create_template_uri($record, $template, $vars) { + $var_arr = array(); + foreach ($vars as $var) { + $name = $this->spec->get_first_literal($var, NS_CONV . 'variable'); + $source_column = $this->spec->get_first_literal($var, NS_CONV . 'source_column'); + $value = $this->get_record_value($record, $source_column); + $var_arr[$name] = $value; + } + + $processor = new \Guzzle\Parser\UriTemplate\UriTemplate(); + return $processor->expand($template, $var_arr); + } + + private function create_uri($record, &$uris, $resource, $identity = null) { + if (!$identity) { + $identity = $this->spec->get_first_resource($resource, NS_CONV . 'identity'); + } + $source_column = $this->spec->get_first_literal($identity, NS_CONV . 'source_column'); + $source_columns = $this->spec->get_first_resource($identity, NS_CONV . 'source_columns'); + $source_resource = $this->spec->get_first_resource($identity, NS_CONV . 'source_resource'); + //Support for URI templates + $template = $this->spec->get_first_literal($identity, NS_CONV . 'template'); + + if ($template) { + //Retrieve all declared variables and expand template + //For now, only an unprocessed single column value is supported as a template variable + //Future: support source_columns, source_resource, lookup and process as well => refactor whole method + $vars = $this->spec->get_resource_triple_values($identity, NS_CONV . 'template_vars'); + $uri = $this->create_template_uri($record, $template, $vars); + $uris[$resource] = $uri; + return; + } else if ($source_column) { +// $source_column--; +// $source_value = $record[$source_column]; + $source_value = $this->get_record_value($record, $source_column); + } else if ($source_columns) { + $source_columns = $this->spec->get_list_values($source_columns); + $glue = $this->spec->get_first_literal($identity, NS_CONV . 'source_column_glue'); + $source_values = array(); + + foreach ($source_columns as $source_column) { + $source_column = $source_column['value']; + //$source_column--; + //Check if the decremented index exists before using its value + $key = is_numeric($source_column) ? $source_column - 1 : $source_column; + + if (array_key_exists($key, $record)) { + // if (!empty($record[$source_column])) { // empty() is not a good idea: empty(0) == TRUE + if (!in_array($record[$key], $this->null_values)) { + //$source_values[] = $record[$source_column]; + $source_values[] = $this->get_record_value($record, $source_column); + } else { + $source_values[] = ""; + } + } + } + + $source_value = implode('', $source_values); + if (!empty($source_value)) { + $source_value = implode($glue, $source_values); + } + } else if ($source_resource) { + if (!isset($uris[$source_resource])) { + $this->create_uri($record, $uris, $source_resource); + } + //Prevents PHP warning on key not being present + if (isset($uris[$source_resource])) + $source_value = $uris[$source_resource]; + } else { + return; + } + + //Check for lookups + $lookup = $this->spec->get_first_resource($identity, NS_CONV . 'lookup'); + if ($lookup != null) { + $lookup_value = $this->lookup($lookup, $source_value); + if ($lookup_value != null && $lookup_value['type'] == 'uri') { + $uris[$resource] = $lookup_value['value']; + return; + } else { + $source_value = $lookup_value['value']; + } + } + + //Decide on base_uri + $base_uri = $this->spec->get_first_literal($identity, NS_CONV . 'base_uri'); + if ($base_uri === null) { + $base_uri = $this->base_uri; + } + + //Decide if the resource should be nested (overrides the base_uri) + $nest_under = $this->spec->get_first_resource($identity, NS_CONV . 'nest_under'); + if ($nest_under != null) { + if (!isset($uris[$nest_under])) { + $this->create_uri($record, $uris, $nest_under); + } + $base_uri = $uris[$nest_under]; + if (!preg_match('%[/#]$%', $base_uri)) { + $base_uri .= '/'; + } + } + + $container = $this->spec->get_first_literal($identity, NS_CONV . 'container'); + if (!empty($container) && !preg_match('%[/#]$%', $container)) { + $container .= '/'; + } + + //Prevents PHP warning on key not being present + if (!isset($source_value)) + $source_value = null; + + $source_value = $this->process($identity, $source_value); + + if (!empty($source_value)) { + $uri = "${base_uri}${container}${source_value}"; + $uris[$resource] = $uri; + } else { + $identity = $this->spec->get_first_resource($resource, NS_CONV . 'alternative_identity'); + if ($identity) { + $this->create_uri($record, $uris, $resource, $identity); + } + } } - while($row = fgetcsv($this->lookups[$lookup]['filehandle'] )){ - if($row[$key_column]==$key){ - $value = $row[$value_column]; - $this->lookups[$lookup]['keys'][$key] = $value; + + public function process($resource, $value) { + $processes = $this->spec->get_first_resource($resource, NS_CONV . 'process'); + if ($processes != null) { + $process_steps = $this->spec->get_list_values($processes); + foreach ($process_steps as $step) { + $function = str_replace(NS_CONV, "", $step['value']); + switch ($function) { + case 'normalise': + $value = strtolower(str_replace(' ', '_', trim($value))); + break; + + case 'trim_quotes': + $value = trim($value, '"'); + break; + + case 'flatten_utf8': + $value = preg_replace('/[^-\w]+/', '', iconv('UTF-8', 'ascii//TRANSLIT', $value)); + break; + + case 'title_case': + $value = ucwords($value); + break; + + case 'regex': + $regex_pattern = $this->spec->get_first_literal($resource, NS_CONV . 'regex_match'); + foreach (array('%', '/', '@', '!', '^', ',', '.', '-') as $candidate_delimeter) { + if (strpos($candidate_delimeter, $regex_pattern) === false) { + $delimeter = $candidate_delimeter; + break; + } + } + $regex_output = $this->spec->get_first_literal($resource, NS_CONV . 'regex_output'); + $value = preg_replace("${delimeter}${regex_pattern}${delimeter}", $regex_output, $value); + break; +// Now accesible under default +// case 'feet_to_metres': +// $value = Conversions::feet_to_metres($value); +// break; + + case 'round': + $value = round($value); + break; + + case 'substr': + $substr_start = $this->spec->get_first_literal($resource, NS_CONV . 'substr_start'); + $substr_length = $this->spec->get_first_literal($resource, NS_CONV . 'substr_length'); + $value = substr($value, $substr_start, $substr_length); + break; + + default: + //When no built in function matches, a custom process function in called + //Made Conversion a little more flexible + if (method_exists("Conversions", $function)) + $value = Conversions::$function($value); + else + throw new Exception("Unknown process requested: $function\n"); + } + } + } return $value; - } } - return false; - } + + public function lookup($lookup, $key) { + if ($this->spec->get_subject_property_values($lookup, NS_CONV . 'lookup_entry')) { + return $this->lookup_config_entries($lookup, $key); + } else if ($this->spec->get_subject_property_values($lookup, NS_CONV . 'lookup_csv_file')) { + return $this->lookup_csv_file($lookup, $key); + } + } + + function lookup_config_entries($lookup, $key) { + if (!isset($this->lookups[$lookup])) { + $entries = $this->spec->get_subject_property_values($lookup, NS_CONV . 'lookup_entry'); + if (empty($entries)) { + throw new Exception("Lookup ${lookup} had no lookup entries"); + } + foreach ($entries as $entry) { + //Accept lookups with several keys mapped to a single value + $lookup_keys = $this->spec->get_subject_property_values($entry['value'], NS_CONV . 'lookup_key'); + foreach ($lookup_keys as $lookup_key_array) { + $lookup_key = $lookup_key_array['value']; + if (isset($this->lookups[$lookup][$lookup_key])) { + throw new Exception("Lookup <${lookup}> contained a duplicate key"); + } + $lookup_values = $this->spec->get_subject_property_values($entry['value'], NS_CONV . 'lookup_value'); + if (count($lookup_values) != 1) { + throw new Exception("Lookup ${lookup} has an entry ${entry['value']} that does not have exactly one lookup value assigned."); + } + $this->lookups[$lookup][$lookup_key] = $lookup_values[0]; + } + } + } + return isset($this->lookups[$lookup][$key]) ? $this->lookups[$lookup][$key] : null; + } + + function lookup_csv_file($lookup, $key) { + + if (isset($this->lookups[$lookup]['keys']) AND isset($this->lookups[$lookup]['keys'][$key])) { + return $this->lookups[$lookup]['keys'][$key]; + } + + $filename = $this->spec->get_first_literal($lookup, NS_CONV . 'lookup_csv_file'); + $key_column = $this->spec->get_first_literal($lookup, NS_CONV . 'lookup_key_column'); + $value_column = $this->spec->get_first_literal($lookup, NS_CONV . 'lookup_value_column'); + //retain file handle + if (!isset($this->lookups[$lookup]['filehandle'])) { + $this->lookups[$lookup]['filehandle'] = fopen($filename, 'r'); + } + while ($row = fgetcsv($this->lookups[$lookup]['filehandle'])) { + if ($row[$key_column] == $key) { + $value = $row[$value_column]; + $this->lookups[$lookup]['keys'][$key] = $value; + return $value; + } + } + return false; + } + }