diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py index 571a5e58393b..ba0f239e3bd1 100644 --- a/dev/archery/archery/benchmark/runner.py +++ b/dev/archery/archery/benchmark/runner.py @@ -18,6 +18,7 @@ import glob import json import os +import random import re from .core import BenchmarkSuite @@ -26,8 +27,22 @@ from ..lang.cpp import CppCMakeDefinition, CppConfiguration from ..lang.java import JavaMavenDefinition, JavaConfiguration from ..utils.cmake import CMakeBuild +from ..utils.git import git from ..utils.maven import MavenBuild from ..utils.logger import logger +from ..utils.source import ArrowSources + + +def _rev_or_path_dirname(src, rev_or_path): + if rev_or_path == ArrowSources.WORKSPACE: + return rev_or_path + try: + sha = git.rev_parse(rev_or_path, git_dir=src.path) + if isinstance(sha, bytes): + sha = sha.decode("ascii") + return sha + except Exception: + return rev_or_path.replace("/", "_") def regex_filter(re_expr): @@ -47,6 +62,7 @@ def __init__(self, suite_filter=None, benchmark_filter=None, self.benchmark_filter = benchmark_filter self.repetitions = repetitions self.repetition_min_time = repetition_min_time + self.results_dir = None @property def suites(self): @@ -108,11 +124,14 @@ def __repr__(self): class CppBenchmarkRunner(BenchmarkRunner): """ Run suites from a CMakeBuild. """ - def __init__(self, build, benchmark_extras, **kwargs): + def __init__(self, build, benchmark_extras, run_id=None, + results_dir=None, **kwargs): """ Initialize a CppBenchmarkRunner. """ + super().__init__(**kwargs) self.build = build self.benchmark_extras = benchmark_extras - super().__init__(**kwargs) + self.run_id = run_id + self.results_dir = results_dir @staticmethod def default_configuration(**kwargs): @@ -217,19 +236,25 @@ def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs): build = CMakeBuild.from_path(rev_or_path) return CppBenchmarkRunner(build, **kwargs) else: - # Revisions can references remote via the `/` character, ensure - # that the revision is path friendly - path_rev = rev_or_path.replace("/", "_") + path_rev = _rev_or_path_dirname(src, rev_or_path) root_rev = os.path.join(root, path_rev) - os.mkdir(root_rev) + os.makedirs(root_rev, exist_ok=True) clone_dir = os.path.join(root_rev, "arrow") - # Possibly checkout the sources at given revision, no need to - # perform cleanup on cloned repository as root_rev is reclaimed. - src_rev, _ = src.at_revision(rev_or_path, clone_dir) + if os.path.isdir(clone_dir): + src_rev = ArrowSources(clone_dir) + else: + src_rev, _ = src.at_revision(rev_or_path, clone_dir) cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf) - build_dir = os.path.join(root_rev, "build") - return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs) + run_root = os.path.join(root_rev, "build", "run") + os.makedirs(run_root, exist_ok=True) + run_id = f"{random.randrange(16**8):08x}" + build_dir = os.path.join(run_root, run_id) + build = cmake_def.build(build_dir) + results_dir = os.path.join(root_rev, "bench", "run", run_id) + os.makedirs(results_dir, exist_ok=True) + return CppBenchmarkRunner(build, run_id=run_id, + results_dir=results_dir, **kwargs) class JavaBenchmarkRunner(BenchmarkRunner): @@ -310,16 +335,15 @@ def from_rev_or_path(src, root, rev_or_path, maven_conf, **kwargs): maven_def = JavaMavenDefinition(rev_or_path, maven_conf) return JavaBenchmarkRunner(maven_def.build(rev_or_path), **kwargs) else: - # Revisions can references remote via the `/` character, ensure - # that the revision is path friendly - path_rev = rev_or_path.replace("/", "_") + path_rev = _rev_or_path_dirname(src, rev_or_path) root_rev = os.path.join(root, path_rev) - os.mkdir(root_rev) + os.makedirs(root_rev, exist_ok=True) clone_dir = os.path.join(root_rev, "arrow") - # Possibly checkout the sources at given revision, no need to - # perform cleanup on cloned repository as root_rev is reclaimed. - src_rev, _ = src.at_revision(rev_or_path, clone_dir) + if os.path.isdir(clone_dir): + src_rev = ArrowSources(clone_dir) + else: + src_rev, _ = src.at_revision(rev_or_path, clone_dir) maven_def = JavaMavenDefinition(src_rev.java, maven_conf) build_dir = os.path.join(root_rev, "arrow/java") return JavaBenchmarkRunner(maven_def.build(build_dir), **kwargs) diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index e70cc3874f22..5a3e2b347f72 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -17,10 +17,12 @@ from io import StringIO import click +import datetime import json import logging import os import pathlib +import platform import sys from .benchmark.codec import JsonEncoder @@ -41,6 +43,27 @@ BOOL = ArrowBool() +def _run_metadata(ctx): + uname = platform.uname() + return { + "time": datetime.datetime.now(datetime.timezone.utc).isoformat(), + "cmd": { + "full": " ".join(sys.argv), + "params": ctx.params, + }, + "machine_info": { + "platform": platform.platform(), + "system": uname.system, + "release": uname.release, + "version": uname.version, + "machine": platform.machine(), + "processor": platform.processor(), + "architecture": platform.architecture()[0], + "logical_cores": os.cpu_count(), + }, + } + + @click.group(context_settings={"help_option_names": ["-h", "--help"]}) @click.option("--debug", type=BOOL, is_flag=True, default=False, envvar='ARCHERY_DEBUG', @@ -305,6 +328,11 @@ def check_language(ctx, param, value): click.option("--preserve", type=BOOL, default=False, show_default=True, is_flag=True, help="Preserve workspace for investigation."), + click.option("--preserve-dir", metavar="", + type=click.Path(file_okay=False, resolve_path=True), + default=None, + help="Parent directory in which to create the preserved " + "workspace. Has no effect without --preserve."), click.option("--output", metavar="", type=click.File("w", encoding="utf8"), default=None, help="Capture output result into file."), @@ -347,12 +375,12 @@ def benchmark_filter_options(cmd): default="WORKSPACE", required=False) @benchmark_common_options @click.pass_context -def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras, - java_home, java_options, build_extras, benchmark_extras, - cpp_benchmark_extras, language, **kwargs): +def benchmark_list(ctx, rev_or_path, src, preserve, preserve_dir, output, + cmake_extras, java_home, java_options, build_extras, + benchmark_extras, cpp_benchmark_extras, language, **kwargs): """ List benchmark suite. """ - with tmpdir(preserve=preserve) as root: + with tmpdir(preserve=preserve, preserve_dir=preserve_dir) as root: logger.debug(f"Running benchmark {rev_or_path}") if language == "cpp": @@ -392,10 +420,11 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras, "Currently only supported for language=cpp. " "[default: use runner-specific defaults]")) @click.pass_context -def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, - java_home, java_options, build_extras, benchmark_extras, - language, suite_filter, benchmark_filter, repetitions, - repetition_min_time, cpp_benchmark_extras, **kwargs): +def benchmark_run(ctx, rev_or_path, src, preserve, preserve_dir, output, + cmake_extras, java_home, java_options, build_extras, + benchmark_extras, language, suite_filter, benchmark_filter, + repetitions, repetition_min_time, cpp_benchmark_extras, + **kwargs): """ Run benchmark suite. This command will run the benchmark suite for a single build. This is @@ -433,7 +462,7 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, \b archery benchmark run --output=run.json """ - with tmpdir(preserve=preserve) as root: + with tmpdir(preserve=preserve, preserve_dir=preserve_dir) as root: logger.debug(f"Running benchmark {rev_or_path}") if language == "cpp": @@ -465,6 +494,15 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, # when asked to JSON-serialize the results, so produce a JSON # output even when none is requested. json_out = json.dumps(runner_base, cls=JsonEncoder) + if runner_base.results_dir is not None: + results_path = os.path.join(runner_base.results_dir, + "benchmark.json") + with open(results_path, "w") as f: + f.write(json_out) + metadata_path = os.path.join(runner_base.results_dir, + "metadata.json") + with open(metadata_path, "w") as f: + json.dump(_run_metadata(ctx), f, indent=2, default=str) if output is not None: output.write(json_out) @@ -486,11 +524,11 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, @click.argument("baseline", metavar="[]]", default="origin/HEAD", required=False) @click.pass_context -def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, - suite_filter, benchmark_filter, repetitions, no_counters, - java_home, java_options, build_extras, benchmark_extras, - cpp_benchmark_extras, threshold, contender, baseline, - **kwargs): +def benchmark_diff(ctx, src, preserve, preserve_dir, output, language, + cmake_extras, suite_filter, benchmark_filter, repetitions, + no_counters, java_home, java_options, build_extras, + benchmark_extras, cpp_benchmark_extras, threshold, + contender, baseline, **kwargs): """Compare (diff) benchmark runs. This command acts like git-diff but for benchmark results. @@ -564,7 +602,7 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, # This should not recompute the benchmark from run.json archery --quiet benchmark diff WORKSPACE run.json > result.json """ - with tmpdir(preserve=preserve) as root: + with tmpdir(preserve=preserve, preserve_dir=preserve_dir) as root: logger.debug(f"Comparing {contender} (contender) with {baseline} (baseline)") if language == "cpp": diff --git a/dev/archery/archery/utils/tmpdir.py b/dev/archery/archery/utils/tmpdir.py index 07d7355c87fb..06eb9a6ba627 100644 --- a/dev/archery/archery/utils/tmpdir.py +++ b/dev/archery/archery/utils/tmpdir.py @@ -15,13 +15,17 @@ # specific language governing permissions and limitations # under the License. +import os from contextlib import contextmanager from tempfile import mkdtemp, TemporaryDirectory @contextmanager -def tmpdir(preserve=False, prefix="arrow-archery-"): - if preserve: +def tmpdir(preserve=False, prefix="arrow-archery-", preserve_dir=None): + if preserve and preserve_dir is not None: + os.makedirs(preserve_dir, exist_ok=True) + yield preserve_dir + elif preserve: yield mkdtemp(prefix=prefix) else: with TemporaryDirectory(prefix=prefix) as tmp: