Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions common/benchmarking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Simple benchmarking utility.

from IPython.core.magic import register_cell_magic
from IPython import get_ipython
import time
from logica.common import sqlite3_logica
import pandas
import signal

timing = {}
reports = []
timeout = 200

def Clear():
global timing, reports
timing = {}
reports = []


@register_cell_magic
def loop(line, cell):
global timing
local_timing = {}
ip = get_ipython()
problem_name, iterator = ip.ev(line)
stop = False

for item in iterator:
if stop:
print('Skipping %s (previous timeout).' % item)
timing[item] = local_timing[item] = 'TIMEOUT (> %d)' % timeout
continue

print('Running %s.' % item)
ip.user_ns['loop_parameter'] = item

timed_out = [False]
def handler(signum, frame):
timed_out[0] = True
raise KeyboardInterrupt("Timeout")

old_handler = signal.getsignal(signal.SIGALRM)
signal.signal(signal.SIGALRM, handler)
signal.alarm(timeout)

start_time = time.perf_counter()
try:
ip.run_cell(cell.replace('{loop_parameter}', item))
finally:
signal.alarm(0)
signal.signal(signal.SIGALRM, old_handler)
elapsed = time.perf_counter() - start_time

if timed_out[0]:
print('*** TIMEOUT on %s ***' % item)
stop = True
elapsed = 'TIMEOUT (> %d)' % timeout

timing[item] = elapsed
local_timing[item] = elapsed

report = (' === Timing for %s ===\n' % problem_name) + (
sqlite3_logica.DataframeAsArtisticTable(
pandas.DataFrame({'problem': list(local_timing.keys()),
'time': list(local_timing.values())})))
reports.append(report)
print(report)

21 changes: 18 additions & 3 deletions common/concertina_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,11 @@ def __init__(self, config, engine, display_mode='colab', iterations=None):
self.all_actions = {a["name"] for a in self.config}
self.complete_actions = set()
self.running_actions = set()
assert display_mode in ('colab', 'terminal', 'colab-text', 'silent'), (
self.show_only_running = False
if os.getenv('LOGICA_TERMINAL_ONELINE', 'no') == 'yes':
self.show_only_running = True
assert display_mode in ('colab', 'terminal',
'colab-text', 'silent'), (
'Unrecognized display mode: %s' % display_mode)
self.display_mode = display_mode
self.display_id = self.GetDisplayId()
Expand Down Expand Up @@ -293,6 +297,13 @@ def AsArtGraph():
extra_lines = self.ProgressBar().split('\n')
return AsArtGraph().GetPicture(updating=updating,
extra_lines=extra_lines)
def ShowRunning(self, updating):
nodes, edges = self.AsNodesAndEdges()
running = [n for n in nodes if n.startswith('\033[1m')]
if not running:
return '*'
return '[%d / %d] ' % (len(self.complete_actions),
len(self.all_actions)) + running[0]

def AsNodesAndEdges(self):
"""Nodes and edges to display in terminal."""
Expand Down Expand Up @@ -405,14 +416,18 @@ def UpdateDisplay(self, final=False):
self.display_update_period = min(0.5, self.display_update_period * 1.2)
if (now - self.recent_display_update_seconds <
self.display_update_period and
not final):
not final and
not self.show_only_running):
# Avoid frequent display updates slowing down execution.
return
self.recent_display_update_seconds = now
if self.display_mode == 'colab':
update_display(self.AsGraphViz(), display_id=self.display_id)
elif self.display_mode == 'terminal':
print(self.AsTextPicture(updating=True))
if self.show_only_running:
print(self.ShowRunning(updating=True))
else:
print(self.AsTextPicture(updating=True))
elif self.display_mode == 'colab-text':
update_display(
self.StateAsSimpleHTML(),
Expand Down
15 changes: 14 additions & 1 deletion common/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,4 +217,17 @@ def Convert(l):

def HierarchicalOptions():
return {'layout': {'hierarchical': {'direction': 'UD',
'sortMethod': 'directed'}}}
'sortMethod': 'directed'}}}

def InstallRequire():
from IPython.display import display, HTML
# This script manually loads RequireJS so that logica/vis.js can work
display(HTML('''
<script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js"></script>
<script>
// This ensures that even if require is loaded late,
// the logica script can still find it.
window.require = require;
</script>
'''))

96 changes: 96 additions & 0 deletions examples/graph/tgdk/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# Benchmark Artifacts for "Logica-TGD: Transforming Graph Databases Logically"

This directory contains reproducible benchmark notebooks for the paper:

> **Logica-TGD: Transforming Graph Databases Logically**
> Evgeny Skvortsov, Yilin Xia, Bertram Ludäscher, Shawn Bowers
> *TGDK, 2026*

## Benchmarks

We compare four systems on graph computation problems (transitive closure,
pairwise distances, same generation):

- **Logica** — compiling to DuckDB SQL
- **Soufflé** — Datalog engine with parallel evaluation
- **DuckPGQ** — DuckDB extension implementing SQL/PGQ (Cypher-style queries)
- **Nemo** — single-threaded Rust rule engine (for the Nemo column only)

All benchmarks were run on a Google Cloud **c2d-standard-32** instance
(32 vCPUs, 128 GB RAM) using Logica 1.3.1415926535897, DuckDB 1.3.2,
Soufflé 2.4, and Nemo 0.10.0.

### Main notebooks

| Notebook | Description |
|----------|-------------|
| `benchmark_logica.ipynb` | Logica benchmarks (all problems). **Run this first** — it generates input data (CSV files and `graphs.db`) used by the other notebooks. |
| `benchmark_souffle.ipynb` | Soufflé benchmarks (compiled mode) |
| `benchmark_cypher.ipynb` | DuckPGQ / Cypher benchmarks |

### Auxiliary materials

| File | Description |
|------|-------------|
| `auxiliary/benchmark_souffle_interpreted.ipynb` | Soufflé benchmarks in interpreted mode (used in the original submission) |
| `auxiliary/benchmark_logica_with_output_sizes.ipynb` | Logica notebook computing output sizes for the table in the paper |
| `auxiliary/souffle_compiled_vs_interpreted.md` | Comparison of Soufflé compiled vs. interpreted modes |

## Reproducing the results

1. Install Jupyter Notebook:
```
python3 -m pip install notebook
```

2. Install DuckDB:
```
python3 -m pip install duckdb
```

3. Install Soufflé (v2.4 was used) by following the instructions at
[souffle-lang.github.io](https://souffle-lang.github.io/install).

4. Clone this repository:
```
git clone https://github.com/EvgSkv/logica
```

5. Start the notebook server from the repository root, so that Logica
is importable:
```
cd logica
python3 -m notebook examples/graph/tgdk
```
Alternatively, install Logica with `python3 -m pip install logica` and start
the notebook from anywhere.

6. Run the notebooks starting with `benchmark_logica.ipynb` — it
generates the input data (CSV files and `graphs.db`) used by the
Soufflé and DuckPGQ notebooks. Then proceed to `benchmark_souffle.ipynb`
and `benchmark_cypher.ipynb`.

For the Nemo comparison, see the [Nemo section](#nemo-comparison) below.

## Nemo comparison

| File | Description |
|------|-------------|
| `benchmark_and_collect.py` | Runs all TC and SG benchmarks on both Logica and Nemo, collects wall/CPU times and fact counts into `benchmark_results.txt` (ASCII table) and `benchmark_results.csv`. Generates the `.l` and `.nemo` programs from templates. |
| `tc_g1k.l`, `tc_g1k.nemo` | Example Logica and Nemo programs for transitive closure (shown for reference — the script regenerates all sizes). |
| `sg_tree7.l`, `sg_tree7.nemo` | Example Logica and Nemo programs for same generation. |
| `benchmark_results.txt` | Output of `benchmark_and_collect.py` from our run. |

To run the Nemo comparison:

1. Install Nemo 0.10.0 (see [nemo rule engine](https://github.com/knowsys/nemo)).
The `nmo` binary must be on `PATH` (we invoke it as `nemo` in the script —
adjust the command there if your binary is named `nmo`).
2. Make sure the CSV inputs (`g1k.csv`..`g5k.csv`, `tree7.csv`..`tree12.csv`)
are present in the same directory. They are produced by running
`benchmark_logica.ipynb`.
3. Run the script from this directory:
```
python3 benchmark_and_collect.py
```
It writes `benchmark_results.txt` and `benchmark_results.csv`.
Binary file added examples/graph/tgdk/all_programs.pdf
Binary file not shown.
Loading
Loading