diff --git a/doc/nle/source/deterministic_seeds.rst b/doc/nle/source/deterministic_seeds.rst new file mode 100644 index 000000000..fb21a8fc0 --- /dev/null +++ b/doc/nle/source/deterministic_seeds.rst @@ -0,0 +1,86 @@ +Deterministic Seeding +===================== + +NLE provides several mechanisms to make game runs reproducible. This is +important for training and evaluating reinforcement learning agents, where +differences between runs should come from agent behaviour, not from hidden +sources of randomness. + +Core and Display Seeds +********************** + +NetHack 3.6 uses two random number generators (RNGs): + +- **core** -- drives gameplay randomness (monster spawns, loot, combat). +- **disp** -- drives display-only randomness (as an anti-TAS measure). + +You can fix both seeds before a reset: + +.. code-block:: python + + env = NLE() + env.seed(core=42, disp=42, reseed=False) + obs, info = env.reset() # reproducible from here + +Setting ``reseed=False`` disables NetHack's periodic reseeding, which would +otherwise inject true randomness during play. + +Level Generation Seed +********************* + +By default the core RNG is also used when NetHack generates new dungeon +levels, meaning the levels depend on in-game actions taken before reaching +them. The optional **lgen** seed isolates level generation into its own RNG +so that the dungeon layout is fixed regardless of gameplay choices: + +.. code-block:: python + + env.seed(core=42, disp=42, reseed=False, lgen=99) + +Fixing Time-based Effects (``fix_moon_phase``) +********************************************** + +NetHack changes gameplay based on the real-world system clock: + +- **Moon phase** -- full moon grants +1 luck; new moon gives -1 luck. +- **Friday the 13th** -- gives -1 luck. +- **Night** (10 pm -- 6 am) -- affects undead behaviour and other events. +- **Midnight** -- triggers special undead/vampire encounters. + +These effects are **not** controlled by the core seed and will differ between +runs executed at different times of day or on different dates. This can cause +noticeable score variance (e.g. full moon luck bonuses). + +To make these effects deterministic, pass ``fix_moon_phase=True`` when +creating the environment: + +.. code-block:: python + + env = NLE(fix_moon_phase=True) + env.seed(core=42, disp=42, reseed=False) + obs, info = env.reset() # time-based effects now derived from the seed + +When ``fix_moon_phase=True`` **and** seeds are set, the time values (moon +phase, friday 13th, night, midnight) are derived deterministically from the +core seed using a private ISAAC64 RNG instance -- the same RNG family that +NetHack uses internally. + +When ``fix_moon_phase=True`` but no seeds have been set, the real system time +is used as usual. + +Putting It All Together +*********************** + +For fully deterministic runs: + +.. code-block:: python + + import gymnasium as gym + import nle + + env = gym.make("NetHackScore-v0", fix_moon_phase=True) + env.seed(core=42, disp=42, reseed=False, lgen=99) + obs, info = env.reset() + + # Every run with these settings will produce identical results + # regardless of wall-clock time. diff --git a/doc/nle/source/index.rst b/doc/nle/source/index.rst index f58bec644..d798e4df3 100644 --- a/doc/nle/source/index.rst +++ b/doc/nle/source/index.rst @@ -16,6 +16,7 @@ resembles the one used by people when playing the game. :caption: Getting Started getting_started + deterministic_seeds tiles diff --git a/include/nlernd.h b/include/nlernd.h index 5eeac28cb..113c789e9 100644 --- a/include/nlernd.h +++ b/include/nlernd.h @@ -8,6 +8,7 @@ Set of functions to manipulate NetHack's Random Number Generators #define NLERND_H #include "nletypes.h" +#include void nle_init_lgen_rng(); void nle_swap_to_lgen(int); @@ -18,4 +19,7 @@ void nle_set_seed(nle_ctx_t *, unsigned long, unsigned long, boolean, void nle_get_seed(nle_ctx_t *, unsigned long *, unsigned long *, boolean *, unsigned long *, bool *); +/* Fill struct tm with deterministic values from seed via ISAAC64. */ +void nle_fill_fixed_tm(struct tm *, unsigned long); + #endif \ No newline at end of file diff --git a/include/nletypes.h b/include/nletypes.h index 824d41df1..31b9110fc 100644 --- a/include/nletypes.h +++ b/include/nletypes.h @@ -132,6 +132,15 @@ typedef struct nle_settings { /* Initial seeds for the RNGs */ nle_seeds_init_t initial_seeds; + /* + * If true and seeds are set, derive deterministic time values + * (moon phase, friday 13th, night, midnight) from the seed + * instead of using real system time. + */ + bool fix_moon_phase; + unsigned long time_seed; + bool time_seed_is_set; + } nle_settings; #endif /* NLETYPES_H */ diff --git a/nle/env/base.py b/nle/env/base.py index 8c7a77f34..9b905dd9e 100644 --- a/nle/env/base.py +++ b/nle/env/base.py @@ -198,6 +198,7 @@ def __init__( allow_all_modes=False, spawn_monsters=True, render_mode="human", + fix_moon_phase=False, ): """Constructs a new NLE environment. @@ -232,6 +233,10 @@ def __init__( render_mode (str): mode used to render the screen. One of "human" | "ansi" | "full". Defaults to "human", i.e. what a human would see playing the game. + fix_moon_phase (bool): If True and seeds are set, derive + deterministic time-based game effects (moon phase, friday 13th, + night, midnight) from the seed instead of real system time. + If False or no seeds are set, use real system time (default). """ self.character = character self._max_episode_steps = max_episode_steps @@ -317,6 +322,7 @@ def __init__( wizard=wizard, spawn_monsters=spawn_monsters, scoreprefix=scoreprefix, + fix_moon_phase=fix_moon_phase, ) self._close_nethack = weakref.finalize(self, self.nethack.close) diff --git a/nle/nethack/nethack.py b/nle/nethack/nethack.py index 8bc0a808c..3b5ad4744 100644 --- a/nle/nethack/nethack.py +++ b/nle/nethack/nethack.py @@ -172,6 +172,7 @@ def __init__( hackdir=HACKDIR, spawn_monsters=True, scoreprefix="", + fix_moon_phase=False, ): self._copy = copy @@ -224,7 +225,11 @@ def __init__( self._nethackoptions = ",".join(self.options) if ttyrec is None: self._pynethack = _pynethack.Nethack( - self.dlpath, self._vardir, self._nethackoptions, spawn_monsters + self.dlpath, + self._vardir, + self._nethackoptions, + spawn_monsters, + fix_moon_phase, ) else: self._pynethack = _pynethack.Nethack( @@ -234,6 +239,7 @@ def __init__( self._nethackoptions, spawn_monsters, scoreprefix, + fix_moon_phase, ) self._ttyrec = ttyrec diff --git a/nle/tests/test_nethack.py b/nle/tests/test_nethack.py index aa80b0618..d95913532 100644 --- a/nle/tests/test_nethack.py +++ b/nle/tests/test_nethack.py @@ -139,6 +139,113 @@ def test_set_seed_after_reset(self, game): game.set_current_seeds(core=42, disp=666) assert game.get_current_seeds() == (42, 666, False, 0) + def test_fix_moon_phase_determinism(self): + """Same seed + fix_moon_phase should produce identical observations.""" + results = [] + for _ in range(2): + game = nethack.Nethack( + observation_keys=("chars", "blstats"), + copy=True, + fix_moon_phase=True, + ) + game.set_initial_seeds(core=42, disp=42) + obs = game.reset() + results.append(obs) + game.close() + np.testing.assert_equal(results[0], results[1]) + + def test_fix_moon_phase_deterministic_moon_messages(self): + """Verify that fix_moon_phase produces deterministic, seed-dependent + moon phase effects visible in the startup messages. + + seed=3 -> full moon ("You are lucky! Full moon tonight.") + seed=0 -> new moon ("Be careful! New moon tonight.") + seed=49 -> friday 13 ("Watch out! Bad things can happen on Friday the 13th.") + seed=5 -> normal (no moon/friday message) + + Changing the time_seed derivation would change these mappings and + fail the test. + """ + expected = { + 3: "full moon", + 0: "new moon", + 49: "friday the 13th", + 5: None, + } + + for seed, expected_phrase in expected.items(): + game = nethack.Nethack( + observation_keys=("message",), + copy=True, + fix_moon_phase=True, + ) + game.set_initial_seeds(core=seed, disp=seed) + obs = game.reset() + try: + msgs = [] + for _ in range(10): + msg = ( + bytes(obs[0]) + .decode("ascii", errors="replace") + .rstrip("\x00") + .strip() + ) + if msg: + msgs.append(msg) + obs, done = game.step(13) # MORE + if done: + break + combined = " ".join(msgs).lower() + if expected_phrase is not None: + assert ( + expected_phrase in combined + ), f"seed={seed}: expected '{expected_phrase}' in: {combined}" + else: + for phrase in ("full moon", "new moon", "friday the 13th"): + assert ( + phrase not in combined + ), f"seed={seed}: unexpected '{phrase}' in: {combined}" + finally: + game.close() + + def test_fix_moon_phase_different_seeds(self): + """Different seeds with fix_moon_phase should produce different observations.""" + results = [] + for seed in [42, 123]: + game = nethack.Nethack( + observation_keys=("chars", "blstats"), + copy=True, + fix_moon_phase=True, + ) + game.set_initial_seeds(core=seed, disp=seed) + obs = game.reset() + results.append(obs) + game.close() + assert any(not np.array_equal(a, b) for a, b in zip(results[0], results[1])) + + def test_fix_moon_phase_without_seed(self): + """fix_moon_phase=True without seeds should not crash (falls back to real time).""" + game = nethack.Nethack( + observation_keys=("chars", "blstats"), + fix_moon_phase=True, + ) + game.reset() + game.close() + + def test_fix_moon_phase_default_off(self): + """fix_moon_phase defaults to False and doesn't change existing behavior.""" + results = [] + for _ in range(2): + game = nethack.Nethack( + observation_keys=("chars", "blstats"), + copy=True, + ) + game.set_initial_seeds(core=42, disp=42) + obs = game.reset() + results.append(obs) + game.close() + np.testing.assert_equal(results[0], results[1]) + class TestNetHackFurther: def test_run(self): diff --git a/src/hacklib.c b/src/hacklib.c index ea19d0f01..c0c877fc4 100644 --- a/src/hacklib.c +++ b/src/hacklib.c @@ -6,6 +6,10 @@ /* NetHack may be freely redistributed. See license for details. */ #include "hack.h" /* for config.h+extern.h */ +#include "nlernd.h" +#include "nletypes.h" + +extern nle_settings settings; /*= Assorted 'small' utility routines. They're virtually independent of NetHack, except that rounddiv may call panic(). setrandom calls one @@ -931,6 +935,23 @@ getlt() return localtime((LOCALTIME_type) &date); } +/* + * NLE: Return a deterministic struct tm when fix_moon_phase is enabled + * and seeds have been set. Otherwise fall back to real system time. + * The actual RNG work is done by nle_fill_fixed_tm() in nlernd.c. + */ +STATIC_OVL struct tm * +nle_getlt_maybe_fixed() +{ + static struct tm fixed_tm; + + if (!settings.fix_moon_phase || !settings.time_seed_is_set) + return getlt(); + + nle_fill_fixed_tm(&fixed_tm, settings.time_seed); + return &fixed_tm; +} + int getyear() { @@ -1098,7 +1119,7 @@ char *buf; int phase_of_the_moon() /* 0-7, with 0: new, 4: full */ { - register struct tm *lt = getlt(); + register struct tm *lt = nle_getlt_maybe_fixed(); register int epact, diy, goldn; diy = lt->tm_yday; @@ -1113,7 +1134,7 @@ phase_of_the_moon() /* 0-7, with 0: new, 4: full */ boolean friday_13th() { - register struct tm *lt = getlt(); + register struct tm *lt = nle_getlt_maybe_fixed(); /* tm_wday (day of week; 0==Sunday) == 5 => Friday */ return (boolean) (lt->tm_wday == 5 && lt->tm_mday == 13); @@ -1122,7 +1143,7 @@ friday_13th() int night() { - register int hour = getlt()->tm_hour; + register int hour = nle_getlt_maybe_fixed()->tm_hour; return (hour < 6 || hour > 21); } @@ -1130,7 +1151,7 @@ night() int midnight() { - return (getlt()->tm_hour == 0); + return (nle_getlt_maybe_fixed()->tm_hour == 0); } /* strbuf_init() initializes strbuf state for use */ diff --git a/src/nlernd.c b/src/nlernd.c index 81c6d75cd..73892c35f 100644 --- a/src/nlernd.c +++ b/src/nlernd.c @@ -1,6 +1,8 @@ #include "nlernd.h" #include "hack.h" #include "isaac64.h" +#include +#include /* See rng.c. */ struct rnglist_t { @@ -140,6 +142,27 @@ nle_swap_to_core(int dungeon_num) } } +/* + * Fill a struct tm with deterministic values derived from the + * given seed using a private ISAAC64 RNG instance. + */ +void +nle_fill_fixed_tm(struct tm *tm, unsigned long seed) +{ + isaac64_ctx time_rng; + unsigned char seed_bytes[sizeof(seed)]; + + memcpy(seed_bytes, &seed, sizeof(seed_bytes)); + isaac64_init(&time_rng, seed_bytes, sizeof(seed_bytes)); + + tm->tm_year = 100 + (int) isaac64_next_uint(&time_rng, 50); + tm->tm_mon = (int) isaac64_next_uint(&time_rng, 12); + tm->tm_mday = 1 + (int) isaac64_next_uint(&time_rng, 28); + tm->tm_hour = (int) isaac64_next_uint(&time_rng, 24); + tm->tm_wday = (int) isaac64_next_uint(&time_rng, 7); + tm->tm_yday = tm->tm_mon * 30 + tm->tm_mday; +} + void nle_set_seed(nle_ctx_t *nle, unsigned long core, unsigned long disp, boolean reseed, unsigned long lgen) diff --git a/win/rl/pynethack.cc b/win/rl/pynethack.cc index e4d1bbcf6..fefd99c21 100644 --- a/win/rl/pynethack.cc +++ b/win/rl/pynethack.cc @@ -104,9 +104,9 @@ class Nethack public: Nethack(std::string dlpath, std::string ttyrec, std::string hackdir, std::string nethackoptions, bool spawn_monsters, - std::string scoreprefix) + std::string scoreprefix, bool fix_moon_phase) : Nethack(std::move(dlpath), std::move(hackdir), - std::move(nethackoptions), spawn_monsters) + std::move(nethackoptions), spawn_monsters, fix_moon_phase) { ttyrec_ = std::fopen(ttyrec.c_str(), "a"); if (!ttyrec_) { @@ -133,7 +133,8 @@ class Nethack } Nethack(std::string dlpath, std::string hackdir, - std::string nethackoptions, bool spawn_monsters) + std::string nethackoptions, bool spawn_monsters, + bool fix_moon_phase) : dlpath_(std::move(dlpath)), obs_{}, settings_{} { if (hackdir.size() > sizeof(settings_.hackdir) - 1) { @@ -148,6 +149,7 @@ class Nethack strncpy(settings_.options, nethackoptions.c_str(), sizeof(settings_.options)); settings_.spawn_monsters = spawn_monsters; + settings_.fix_moon_phase = fix_moon_phase; } ~Nethack() @@ -297,6 +299,14 @@ class Nethack settings_.initial_seeds.lgen_seed = 0; settings_.initial_seeds.use_lgen_seed = false; } + + if (settings_.fix_moon_phase) { + /* Offset by 1 to decorrelate from the core RNG seed. + ISAAC64 amplifies this small difference into a + completely independent sequence. */ + settings_.time_seed = core + 1; + settings_.time_seed_is_set = true; + } } void @@ -532,13 +542,14 @@ PYBIND11_MODULE(_pynethack, m) py::class_(m, "Nethack") .def(py::init(), + bool, std::string, bool>(), py::arg("dlpath"), py::arg("ttyrec"), py::arg("hackdir"), py::arg("nethackoptions"), py::arg("spawn_monsters") = true, - py::arg("scoreprefix") = "") - .def(py::init(), + py::arg("scoreprefix") = "", py::arg("fix_moon_phase") = false) + .def(py::init(), py::arg("dlpath"), py::arg("hackdir"), py::arg("nethackoptions"), - py::arg("spawn_monsters") = true) + py::arg("spawn_monsters") = true, + py::arg("fix_moon_phase") = false) .def("step", &Nethack::step, py::arg("action")) .def("done", &Nethack::done) .def("reset", py::overload_cast<>(&Nethack::reset))