Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,12 @@ Even more queries can be found [here](https://colab.research.google.com/github/R

# Latest updates

## Version 0.2.0 alpha 7
- rumble.lastResult now returns a pyspark/pandas DataFrame or rdd or tuple and no longer the sequence object.
- Enhance schema detection. When the detected static type of the overall query is DataFrame-compatible, it is now automatically possible to obtain the output as a DataFrame without explicitly giving a schema.
- It is now possible to access a table previously registered as a view via a table() function call. This is an alternative to binding variables.
- Enhancements in the JSONiq Update Facility support to update delta files and Hive metastore tables.

## Version 0.2.0 alpha 6
- Fix a bug with the config() call of the builder.
- add withDelta() to configure Delta Lake tables and files, for use with the JSONiq Update Facility.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "jsoniq"
version = "0.2.0a6"
version = "0.2.0a7"
description = "Python edition of RumbleDB, a JSONiq engine"
requires-python = ">=3.11"
dependencies = [
Expand Down
Binary file modified src/jsoniq/jars/rumbledb-1.24.0.jar
Binary file not shown.
4 changes: 4 additions & 0 deletions tests/test_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,11 @@ def test1(self):
return [$join]
""");

self.assertIn("DataFrame", seq.availableOutputs())
self.assertIn("RDD", seq.availableOutputs())
self.assertIn("Local", seq.availableOutputs())
print(seq.json());
self.assertTrue(json.dumps(seq.json()) == json.dumps(([{'nb': 1, 'state': 'MA', 'sold': 'broiler'}, {'nb': 1, 'state': 'MA', 'sold': 'socks'}, {'nb': 2, 'state': 'MA', 'sold': 'toaster'}, {'nb': 2, 'state': 'MA', 'sold': 'toaster'}, {'nb': 2, 'state': 'MA', 'sold': 'socks'}, {'nb': 3, 'state': 'CA', 'sold': 'toaster'}, {'nb': 3, 'state': 'CA', 'sold': 'blender'}, {'nb': 3, 'state': 'CA', 'sold': 'blender'}, {'nb': 3, 'state': 'CA', 'sold': 'shirt'}],)))

seq = rumble.jsoniq("""
for $product in json-lines("http://rumbledb.org/samples/products-small.json", 10)
Expand Down
6 changes: 6 additions & 0 deletions tests/test_test1.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,9 @@ def test1(self):
expected = [[{'nb': 1, 'state': 'MA', 'sold': 'broiler'}, {'nb': 1, 'state': 'MA', 'sold': 'socks'}, {'nb': 2, 'state': 'MA', 'sold': 'toaster'}, {'nb': 2, 'state': 'MA', 'sold': 'toaster'}, {'nb': 2, 'state': 'MA', 'sold': 'socks'}, {'nb': 3, 'state': 'CA', 'sold': 'toaster'}, {'nb': 3, 'state': 'CA', 'sold': 'blender'}, {'nb': 3, 'state': 'CA', 'sold': 'blender'}, {'nb': 3, 'state': 'CA', 'sold': 'shirt'}]]

self.assertTrue(json.dumps(seq.json()) == json.dumps(expected))

self.assertIn("DataFrame", seq.availableOutputs())

seq.df().show()

self.assertEqual(seq.df().count(), 1)
50 changes: 50 additions & 0 deletions tests/test_test2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from jsoniq import RumbleSession
from unittest import TestCase
import json
class TryTesting(TestCase):
def test1(self):
# The syntax to start a session is similar to that of Spark.
# A RumbleSession is a SparkSession that additionally knows about RumbleDB.
# All attributes and methods of SparkSession are also available on RumbleSession.
rumble = RumbleSession.builder.appName("PyRumbleExample").getOrCreate();
# A more complex, standalone query

seq = rumble.jsoniq("""
let $stores :=
[
{ "store number" : 1, "state" : "MA" },
{ "store number" : 2, "state" : "MA" },
{ "store number" : 3, "state" : "CA" },
{ "store number" : 4, "state" : "CA" }
]
let $sales := [
{ "product" : "broiler", "store number" : 1, "quantity" : 20 },
{ "product" : "toaster", "store number" : 2, "quantity" : 100 },
{ "product" : "toaster", "store number" : 2, "quantity" : 50 },
{ "product" : "toaster", "store number" : 3, "quantity" : 50 },
{ "product" : "blender", "store number" : 3, "quantity" : 100 },
{ "product" : "blender", "store number" : 3, "quantity" : 150 },
{ "product" : "socks", "store number" : 1, "quantity" : 500 },
{ "product" : "socks", "store number" : 2, "quantity" : 10 },
{ "product" : "shirt", "store number" : 3, "quantity" : 10 }
]
let $join :=
for $store in $stores[], $sale in $sales[]
where $store."store number" = $sale."store number"
return {
"nb" : $store."store number",
"state" : $store.state,
"sold" : $sale.product
}
return $join
""");

expected = ({'nb': 1, 'state': 'MA', 'sold': 'broiler'}, {'nb': 1, 'state': 'MA', 'sold': 'socks'}, {'nb': 2, 'state': 'MA', 'sold': 'toaster'}, {'nb': 2, 'state': 'MA', 'sold': 'toaster'}, {'nb': 2, 'state': 'MA', 'sold': 'socks'}, {'nb': 3, 'state': 'CA', 'sold': 'toaster'}, {'nb': 3, 'state': 'CA', 'sold': 'blender'}, {'nb': 3, 'state': 'CA', 'sold': 'blender'}, {'nb': 3, 'state': 'CA', 'sold': 'shirt'})

self.assertTrue(json.dumps(seq.json()) == json.dumps(expected))

self.assertIn("DataFrame", seq.availableOutputs())

seq.df().show()

self.assertEqual(seq.df().count(), 9)