Skip to content
Open
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,5 @@ pufferlib/ocean/impulse_wars/*-release/
pufferlib/ocean/impulse_wars/debug-*/
pufferlib/ocean/impulse_wars/release-*/
pufferlib/ocean/impulse_wars/benchmark/
*.a
*.o
3 changes: 2 additions & 1 deletion pufferlib/extensions/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,8 @@ TORCH_LIBRARY(_C, m) {
m.def("fc_max(Tensor x, Tensor W, Tensor b) -> Tensor");
}

PYBIND11_MODULE(_C, m) {
__attribute__((visibility("default")))
extern void register_pufferlib_bindings(pybind11::module_& m) {
m.def("log_environments", &log_environments);
m.def("rollouts", &rollouts);
m.def("train", &train);
Expand Down
2 changes: 2 additions & 0 deletions pufferlib/extensions/cuda/cuda_kernels.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
// Compiles the torch/cuda kernels user in pufferlib to _C.so that is statically linked to by <env>/binding.so
#include "kernels.h"
13 changes: 13 additions & 0 deletions pufferlib/extensions/env_glue.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include <stdio.h>

#include <pybind11/pybind11.h>

void register_pufferlib_bindings(pybind11::module_& m);

PYBIND11_MODULE(binding, m) {
register_pufferlib_bindings(m);
}

extern "C" void test_function() {
printf("Test function called!\n");
}
516 changes: 282 additions & 234 deletions pufferlib/extensions/pufferlib.cpp

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions pufferlib/ocean/env_binding.h
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,7 @@ static PyMethodDef methods[] = {
{NULL, NULL, 0, NULL}
};

#ifndef PUFFER_NATIVECPP_PYBINDINGS
// Module definition
static PyModuleDef module = {
PyModuleDef_HEAD_INIT,
Expand All @@ -682,3 +683,5 @@ PyMODINIT_FUNC PyInit_binding(void) {
import_array();
return PyModule_Create(&module);
}

#endif
56 changes: 46 additions & 10 deletions pufferlib/pufferl.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@
import pufferlib.pytorch
try:
from pufferlib import _C
from pufferlib import fake_tensors
except ImportError:
raise ImportError('Failed to import PufferLib C++ backend. If you have non-default PyTorch, try installing with --no-build-isolation')
raise ImportError('Failed to import C/CUDA advantage kernel. If you have non-default PyTorch, try installing with --no-build-isolation')

import rich
import rich.traceback
Expand Down Expand Up @@ -565,7 +566,7 @@ def _train_rank(env_name, args=None, logger=None, verbose=True, early_stop_fn=No
pufferl = PuffeRL(train_config, vec_config, env_config, policy_config, logger, verbose)

if train_config['profile']:
_C.profiler_start()
binding.profiler_start()

# Sweep needs data for early stopped runs, so send data when steps > 100M
logging_threshold = min(0.20*train_config['total_timesteps'], 100_000_000)
Expand All @@ -589,16 +590,15 @@ def _train_rank(env_name, args=None, logger=None, verbose=True, early_stop_fn=No
if pufferl.global_step > logging_threshold:
all_logs.append(logs)

if should_stop_early:
if train_config['profile']:
_C.profiler_stop()
model_path = pufferl.close()
pufferl.logger.log_cost(pufferl.uptime)
pufferl.logger.close(model_path, early_stop=True)
return pufferl, all_logs
if should_stop_early is not None and should_stop_early(logs):
if train_config['profile']:
_C.profiler_stop()
model_path = pufferl.close()
pufferl.logger.close(model_path)
return all_logs

if train_config['profile']:
_C.profiler_stop()
binding.profiler_stop()

pufferl.print_dashboard()

Expand Down Expand Up @@ -684,6 +684,41 @@ def train(env_name, args=None, logger=None, verbose=True, early_stop_fn=None):
pufferl.logger.close(model_path, early_stop=False)
return all_logs

def sps(env_name, args=None, vecenv=None, policy=None, logger=None, verbose=True, should_stop_early=None):
args = args or load_config(env_name)
train_config = dict(**args['train'])#, env=env_name)
train_config['env_name'] = args['env_name']
train_config['vec_kwargs'] = args['vec']
train_config['env_kwargs'] = args['env']
train_config['total_agents'] = args['vec']['total_agents']
train_config['num_buffers'] = args['vec']['num_buffers']
pufferl = PuffeRL(train_config, logger, verbose)
# Warmup
for _ in range(3):
_C.batched_forward(
pufferl.pufferl_cpp,
pufferl.observations,
pufferl.total_minibatches,
pufferl.minibatch_segments,
)

N = 100
torch.cuda.synchronize()
start = time.time()
for _ in range(N):
_C.batched_forward(
pufferl.pufferl_cpp,
pufferl.observations,
pufferl.total_minibatches,
pufferl.minibatch_segments,
)
torch.cuda.synchronize()
end = time.time()
dt = end - start
sps = pufferl.config['batch_size']*N/dt
print(f'SPS: {sps/1e6:.1f}M')


def eval(env_name, args=None, vecenv=None, policy=None):
args = args or load_config(env_name)
backend = args['vec']['backend']
Expand Down Expand Up @@ -1147,6 +1182,7 @@ def main():

mode = sys.argv.pop(1)
env_name = sys.argv.pop(1)

if mode == 'train':
train(env_name=env_name)
elif mode == 'eval':
Expand Down
75 changes: 63 additions & 12 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
import platform
import shutil
import pybind11
import torch
import subprocess
import sysconfig
import torch.utils.cpp_extension as cpp_ext

from setuptools.command.build_ext import build_ext
from torch.utils import cpp_extension
Expand All @@ -37,6 +41,9 @@
NO_OCEAN = os.getenv("NO_OCEAN", "0") == "1"
NO_TRAIN = os.getenv("NO_TRAIN", "0") == "1"

if DEBUG:
print("*****Building in DEBUG mode*******")

# Build raylib for your platform
RAYLIB_URL = 'https://github.com/raysan5/raylib/releases/download/5.5/'
RAYLIB_NAME = 'raylib-5.5_macos' if platform.system() == "Darwin" else 'raylib-5.5_linux_amd64'
Expand Down Expand Up @@ -82,6 +89,7 @@ def download_box2d(platform):
extra_compile_args = [
'-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION',
'-DPLATFORM_DESKTOP',
'-DPUFFER_NATIVECPP_PYBINDINGS=1',
]
extra_link_args = [
'-fwrapv',
Expand All @@ -101,6 +109,12 @@ def download_box2d(platform):
extra_compile_args += [
'-O0',
'-g',
'-flto=auto',
'-fno-semantic-interposition',
'-fvisibility=hidden',
'-DPUFFER_DEBUG=1',
'-DDEBUG=1',

#'-fsanitize=address,undefined,bounds,pointer-overflow,leak',
#'-fno-omit-frame-pointer',
]
Expand Down Expand Up @@ -202,14 +216,49 @@ def run(self):
extra_objects=[RAYLIB_A],
)

def _find_built_pufferlib_native(required: bool = True):
ext_suffix = ".so"

inplace = os.path.join("pufferlib", "native" + ext_suffix)
if os.path.isfile(inplace):
return inplace

cwd = os.getcwd()
candidates = glob.glob(os.path.join(cwd, "build", "**", "pufferlib", "_C*.so"), recursive=True)
candidates += glob.glob(os.path.join(cwd, "pufferlib", "_C*.so"), recursive=True)
candidates = [p for p in candidates if os.path.isfile(p)]
if candidates:
candidates.sort(key=os.path.getmtime, reverse=True)
return candidates[0]

if required:
raise ValueError(f"Could not find built pufferlib.native extension under {cwd}.")
return None

native_lib = _find_built_pufferlib_native(required=False)
if native_lib:
print(f"Adding native library {native_lib} to C/C++ extensions")
extension_kwargs['extra_objects'].append(native_lib)

# Check if CUDA compiler is available. You need cuda dev, not just runtime.
cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH') or torch.utils.cpp_extension.CUDA_HOME or '/usr/local/cuda'
nvtx_lib_dir = os.path.join(cuda_home, 'lib64') # Common on Linux; fall back to 'lib' if needed
nvtx_lib = 'nvToolsExt'

# Find C extensions
c_extensions = []
if not NO_OCEAN:
cpp_sources = [
"pufferlib/extensions/env_glue.cpp",
]
c_extension_paths = glob.glob('pufferlib/ocean/**/binding.c', recursive=True)
extension_kwargs['include_dirs'] += [pybind11.get_include(), torch.utils.cpp_extension.include_paths()[0], "pufferlib/extensions/"]

c_extensions = [
Extension(
CppExtension(
path.rstrip('.c').replace('/', '.'),
sources=[path],
sources=[path] + cpp_sources,
language ='c++',
**extension_kwargs,
)
for path in c_extension_paths if 'matsci' not in path
Expand Down Expand Up @@ -247,9 +296,6 @@ def finalize_options(self):
super().finalize_options()

def run(self):
import subprocess
import sysconfig
import torch.utils.cpp_extension as cpp_ext

src = 'profile_kernels.cu'
out = 'profile_kernels'
Expand Down Expand Up @@ -322,11 +368,15 @@ def run(self):

# -g?
clang_cmd = [
'clang', '-c', '-O2', '-DNDEBUG',
'clang', '-c',
('-O0' if DEBUG else '-O2'),
('-DDEBUG' if DEBUG else '-DNDEBUG'),
'-I.', '-Ipufferlib/extensions', f'-Ipufferlib/ocean/{env_name}',
f'-I./{RAYLIB_NAME}/include', '-I/usr/local/cuda/include',
'-DPLATFORM_DESKTOP',
'-fno-semantic-interposition', '-fvisibility=hidden',
('-DPUFFER_DEBUG=1' if DEBUG else ''),
'-fno-semantic-interposition',
('-fvisibility=default' if DEBUG else '-fvisibility=hidden'),
'-fPIC', '-fopenmp',
env_binding_src, '-o', static_obj
]
Expand Down Expand Up @@ -367,11 +417,6 @@ def run(self):
cmdclass[f"build_{env_name}_so"] = create_env_build_class(c_ext.name)


# Check if CUDA compiler is available. You need cuda dev, not just runtime.
import torch
cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH') or torch.utils.cpp_extension.CUDA_HOME or '/usr/local/cuda'
nvtx_lib_dir = os.path.join(cuda_home, 'lib64') # Common on Linux; fall back to 'lib' if needed
nvtx_lib = 'nvToolsExt'
torch_extensions = []
if not NO_TRAIN:
torch_sources = [
Expand Down Expand Up @@ -454,3 +499,9 @@ def run(self):
cmdclass=cmdclass,
include_dirs=[numpy.get_include(), RAYLIB_NAME + '/include'],
)


# export CC=gcc-12
# export CXX=g++-12
# export LDSHARED="g++-12 -shared"
# export CUDAHOSTCXX=g++-12