diff --git a/README.md b/README.md index 1519603..c8af0a2 100644 --- a/README.md +++ b/README.md @@ -349,6 +349,12 @@ Even more queries can be found [here](https://colab.research.google.com/github/R # Latest updates +## Version 0.2.0 alpha 7 +- rumble.lastResult now returns a pyspark/pandas DataFrame or rdd or tuple and no longer the sequence object. +- Enhance schema detection. When the detected static type of the overall query is DataFrame-compatible, it is now automatically possible to obtain the output as a DataFrame without explicitly giving a schema. +- It is now possible to access a table previously registered as a view via a table() function call. This is an alternative to binding variables. +- Enhancements in the JSONiq Update Facility support to update delta files and Hive metastore tables. + ## Version 0.2.0 alpha 6 - Fix a bug with the config() call of the builder. - add withDelta() to configure Delta Lake tables and files, for use with the JSONiq Update Facility. diff --git a/pyproject.toml b/pyproject.toml index 772f7cc..354ade6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "jsoniq" -version = "0.2.0a6" +version = "0.2.0a7" description = "Python edition of RumbleDB, a JSONiq engine" requires-python = ">=3.11" dependencies = [ diff --git a/src/jsoniq/jars/rumbledb-1.24.0.jar b/src/jsoniq/jars/rumbledb-1.24.0.jar index 4d064ed..5b1a5e3 100644 Binary files a/src/jsoniq/jars/rumbledb-1.24.0.jar and b/src/jsoniq/jars/rumbledb-1.24.0.jar differ diff --git a/tests/test_sample.py b/tests/test_sample.py index 4232ac1..6644b7f 100644 --- a/tests/test_sample.py +++ b/tests/test_sample.py @@ -71,7 +71,11 @@ def test1(self): return [$join] """); + self.assertIn("DataFrame", seq.availableOutputs()) + self.assertIn("RDD", seq.availableOutputs()) + self.assertIn("Local", seq.availableOutputs()) print(seq.json()); + self.assertTrue(json.dumps(seq.json()) == json.dumps(([{'nb': 1, 'state': 'MA', 'sold': 'broiler'}, {'nb': 1, 'state': 'MA', 'sold': 'socks'}, {'nb': 2, 'state': 'MA', 'sold': 'toaster'}, {'nb': 2, 'state': 'MA', 'sold': 'toaster'}, {'nb': 2, 'state': 'MA', 'sold': 'socks'}, {'nb': 3, 'state': 'CA', 'sold': 'toaster'}, {'nb': 3, 'state': 'CA', 'sold': 'blender'}, {'nb': 3, 'state': 'CA', 'sold': 'blender'}, {'nb': 3, 'state': 'CA', 'sold': 'shirt'}],))) seq = rumble.jsoniq(""" for $product in json-lines("http://rumbledb.org/samples/products-small.json", 10) diff --git a/tests/test_test1.py b/tests/test_test1.py index 966cf57..8de677b 100644 --- a/tests/test_test1.py +++ b/tests/test_test1.py @@ -42,3 +42,9 @@ def test1(self): expected = [[{'nb': 1, 'state': 'MA', 'sold': 'broiler'}, {'nb': 1, 'state': 'MA', 'sold': 'socks'}, {'nb': 2, 'state': 'MA', 'sold': 'toaster'}, {'nb': 2, 'state': 'MA', 'sold': 'toaster'}, {'nb': 2, 'state': 'MA', 'sold': 'socks'}, {'nb': 3, 'state': 'CA', 'sold': 'toaster'}, {'nb': 3, 'state': 'CA', 'sold': 'blender'}, {'nb': 3, 'state': 'CA', 'sold': 'blender'}, {'nb': 3, 'state': 'CA', 'sold': 'shirt'}]] self.assertTrue(json.dumps(seq.json()) == json.dumps(expected)) + + self.assertIn("DataFrame", seq.availableOutputs()) + + seq.df().show() + + self.assertEqual(seq.df().count(), 1) diff --git a/tests/test_test2.py b/tests/test_test2.py new file mode 100644 index 0000000..48bc938 --- /dev/null +++ b/tests/test_test2.py @@ -0,0 +1,50 @@ +from jsoniq import RumbleSession +from unittest import TestCase +import json +class TryTesting(TestCase): + def test1(self): + # The syntax to start a session is similar to that of Spark. + # A RumbleSession is a SparkSession that additionally knows about RumbleDB. + # All attributes and methods of SparkSession are also available on RumbleSession. + rumble = RumbleSession.builder.appName("PyRumbleExample").getOrCreate(); + # A more complex, standalone query + + seq = rumble.jsoniq(""" + let $stores := + [ + { "store number" : 1, "state" : "MA" }, + { "store number" : 2, "state" : "MA" }, + { "store number" : 3, "state" : "CA" }, + { "store number" : 4, "state" : "CA" } + ] + let $sales := [ + { "product" : "broiler", "store number" : 1, "quantity" : 20 }, + { "product" : "toaster", "store number" : 2, "quantity" : 100 }, + { "product" : "toaster", "store number" : 2, "quantity" : 50 }, + { "product" : "toaster", "store number" : 3, "quantity" : 50 }, + { "product" : "blender", "store number" : 3, "quantity" : 100 }, + { "product" : "blender", "store number" : 3, "quantity" : 150 }, + { "product" : "socks", "store number" : 1, "quantity" : 500 }, + { "product" : "socks", "store number" : 2, "quantity" : 10 }, + { "product" : "shirt", "store number" : 3, "quantity" : 10 } + ] + let $join := + for $store in $stores[], $sale in $sales[] + where $store."store number" = $sale."store number" + return { + "nb" : $store."store number", + "state" : $store.state, + "sold" : $sale.product + } + return $join + """); + + expected = ({'nb': 1, 'state': 'MA', 'sold': 'broiler'}, {'nb': 1, 'state': 'MA', 'sold': 'socks'}, {'nb': 2, 'state': 'MA', 'sold': 'toaster'}, {'nb': 2, 'state': 'MA', 'sold': 'toaster'}, {'nb': 2, 'state': 'MA', 'sold': 'socks'}, {'nb': 3, 'state': 'CA', 'sold': 'toaster'}, {'nb': 3, 'state': 'CA', 'sold': 'blender'}, {'nb': 3, 'state': 'CA', 'sold': 'blender'}, {'nb': 3, 'state': 'CA', 'sold': 'shirt'}) + + self.assertTrue(json.dumps(seq.json()) == json.dumps(expected)) + + self.assertIn("DataFrame", seq.availableOutputs()) + + seq.df().show() + + self.assertEqual(seq.df().count(), 9)