From 144b875fb5f7c7ab0f255ecbf55c66cf72bf8b62 Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Mon, 12 Jun 2023 09:52:13 +0200
Subject: [PATCH 01/12] use np instead of math

---
 polykit/analysis/polymer_analyses.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/polykit/analysis/polymer_analyses.py b/polykit/analysis/polymer_analyses.py
index 851e702..91a0d8f 100644
--- a/polykit/analysis/polymer_analyses.py
+++ b/polykit/analysis/polymer_analyses.py
@@ -33,7 +33,6 @@
 
 """
 
-from math import sqrt
 
 import numpy as np
 import pandas as pd
@@ -185,7 +184,7 @@ def contact_scaling(data, bins0=None, cutoff=1.1, *, ring=False):
 
     connumbers = connumbers / possible
 
-    a = [sqrt(i[0] * (i[1] - 1)) for i in bins]
+    a = [np.sqrt(i[0] * (i[1] - 1)) for i in bins]
     return a, connumbers
 
 

From 84fa8bb9c0d6e855be808c7b4d7345b3748873a4 Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Mon, 12 Jun 2023 09:53:07 +0200
Subject: [PATCH 02/12] make generate_bins more robust; shorten one input
 variable name

---
 polykit/analysis/polymer_analyses.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/polykit/analysis/polymer_analyses.py b/polykit/analysis/polymer_analyses.py
index 91a0d8f..ca9a2ad 100644
--- a/polykit/analysis/polymer_analyses.py
+++ b/polykit/analysis/polymer_analyses.py
@@ -119,13 +119,14 @@ def smart_contacts(data, cutoff=1.7, min_cutoff=2.1, percent_func=lambda x: 1 /
         return calculate_contacts(data, cutoff)
 
 
-def generate_bins(N, start=4, bins_per_order_magn=10):
+def generate_bins(end, start=1, bins_decade=10):
     lstart = np.log10(start)
-    lend = np.log10(N - 1) + 1e-6
-    num = int(np.ceil((lend - lstart) * bins_per_order_magn))
-    bins = np.unique(np.logspace(lstart, lend, dtype=int, num=max(num, 0)))
-    if len(bins) > 0:
-        assert bins[-1] == N - 1
+    lend = np.log10(end)
+    num = int(np.round(np.ceil((lend - lstart) * bins_decade)))
+    bins = np.unique(
+        np.round(np.logspace(lstart, lend, num=max(num, 0))).astype(np.int64)
+    )
+    assert bins[-1] == end
     return bins
 
 

From 6acdea184b43cf67ae8a8b1e9a8296c9e095fd31 Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Mon, 12 Jun 2023 09:53:45 +0200
Subject: [PATCH 03/12] add function to make Dataframe scalings

---
 polykit/analysis/polymer_analyses.py | 131 +++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)

diff --git a/polykit/analysis/polymer_analyses.py b/polykit/analysis/polymer_analyses.py
index ca9a2ad..5f1f271 100644
--- a/polykit/analysis/polymer_analyses.py
+++ b/polykit/analysis/polymer_analyses.py
@@ -36,6 +36,7 @@
 
 import numpy as np
 import pandas as pd
+
 from scipy.spatial import cKDTree
 from scipy.ndimage import gaussian_filter1d
 
@@ -130,6 +131,51 @@ def generate_bins(end, start=1, bins_decade=10):
     return bins
 
 
+def _bin_contacts_df(contacts, N, bins_decade=10, bins=None, ring=False):
+    if ring:
+        contour_dists = np.abs(contacts[:, 1] - contacts[:, 0])
+        mask = contour_dists > N // 2
+        contour_dists[mask] = N - contour_dists[mask]
+    else:
+        contour_dists = np.abs(contacts[:, 1] - contacts[:, 0])
+
+    if bins is None and bins_decade is None:
+        bins = np.arange(0, N + 1)
+        contacts_per_bin = np.bincount(contour_dists, minlength=N)
+    else:
+        if bins is None:
+            bins = generate_bins(N, bins_decade=bins_decade)
+        else:
+            bins = np.array(bins)
+
+        contacts_per_bin = np.bincount(
+            np.searchsorted(bins, contour_dists, side="right"), minlength=len(bins)
+        )
+        contacts_per_bin = contacts_per_bin[
+            1 : len(bins)
+        ]  # ignore contacts outside of distance binds
+
+    if ring:
+        pairs_per_bin = np.diff(N * bins)
+    else:
+        pairs_per_bin = np.diff(N * bins + 0.5 * bins - 0.5 * (bins ** 2))
+
+    contact_freqs = contacts_per_bin / pairs_per_bin
+
+    bin_mids = np.sqrt(bins[:-1] * bins[1:])
+
+    return pd.DataFrame(
+        {
+            "dist": bin_mids,
+            "contact_freq": contact_freqs,
+            "n_particle_pairs": pairs_per_bin,
+            "n_contacts": contacts_per_bin,
+            "min_dist": bins[:-1],
+            "max_dist": bins[1:],
+        }
+    )
+
+
 def contact_scaling(data, bins0=None, cutoff=1.1, *, ring=False):
     """
     Returns contact probability scaling for a given polymer conformation
@@ -204,6 +250,91 @@ def smooth(x):
     return mids[1:], slope
 
 
+def contact_vs_dist_df(
+    coords,
+    contact_radius=1.1,
+    bins_decade=10,
+    bins=None,
+    ring=False,
+):
+    """
+    Returns the P(s) statistics of a polymer, i.e. the average contact frequency
+    vs countour distance. Contact frequencies are averaged across ranges (bins)
+    of countour distance.
+
+    Parameters
+    ----------
+    coords : Nx3 array of ints/floats
+        An array of coordinates of the polymer particles.
+    bins : array.
+        Bins to divide the total span of possible countour distances.
+        If neither `bins` or `bins_decade` are provided, distances are not binned.
+    bins_decade : int.
+        If provided, distance bins are generated automatically
+        in the range of [1, N_PARTICLES - 1],
+        such that bins edges are approximately equally spaced in
+        the log space, with approx. `bins_decade` bins per decade.
+        If neither `bins` or `bins_decade` are provided, distances are not binned.
+
+    contact_radius : float
+        Particles separated in 3D by less than `contact_radius` are
+        considered to be in contact.
+    ring : bool, optional
+        If True, will calculate contacts for the ring
+
+    Returns
+    -------
+    (bin_mids, contact_freqs, npairs_per_bin) where "mids" contains
+    geometric means of bin start/end
+
+
+    """
+    coords = np.asarray(coords)
+    if coords.shape[1] != 3:
+        raise ValueError(
+            "coords must contain an Nx3 array of particle coordinates in 3D"
+        )
+    N = coords.shape[0]
+
+    contacts = np.array(
+        calculate_contacts(coords, contact_radius)
+    )
+
+    assert np.sum(contacts[:, 1] < contacts[:, 0]) == 0
+
+    cvd = _bin_contacts_df(contacts, N, bins_decade=bins_decade, bins=bins, ring=ring)
+
+    return cvd
+
+
+def gaussian_contact_vs_dist(
+    coords, contact_vs_dist_func, random_sigma=3.0, random_reps=10, **kwargs
+):
+    """Calculates contact_vs_dist_func for a polymer with random normal shifts.
+
+    Args:
+        coords (_type_): coordinates of a polymer
+        contact_vs_dist_func (_type_): function to calculate contact_vs_dist
+        random_sigma (float, optional): the standard deviation of the random gaussian shift along each coordinate. Defaults to 3.0.
+        random_reps (int, optional): number of random shifts to average over. Defaults to 10.
+
+    Returns:
+        _type_: a pandas dataframe with the average contact frequency at different contour distances
+    """
+    if random_sigma is None:
+        return contact_vs_dist_func(coords, **kwargs)
+
+    contact_freqs = 0
+    for _ in range(random_reps):
+        shifts = np.random.normal(scale=random_sigma, size=coords.shape)
+        res = contact_vs_dist_func(coords + shifts, **kwargs)
+        contact_freqs += res["contact_freq"] / random_reps
+
+    res["contact_freq"] = contact_freqs
+
+    return res
+
+
 def Rg2_scaling(data, bins=None, ring=False):
     """Calculates average gyration radius of subchains a function of s
 

From 87d5721d610daab8aaddbe9754a062209fc7e94d Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Mon, 12 Jun 2023 09:53:58 +0200
Subject: [PATCH 04/12] add hoomd fetcher

---
 polykit/io/fetch_hoomd.py | 88 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 polykit/io/fetch_hoomd.py

diff --git a/polykit/io/fetch_hoomd.py b/polykit/io/fetch_hoomd.py
new file mode 100644
index 0000000..55e1121
--- /dev/null
+++ b/polykit/io/fetch_hoomd.py
@@ -0,0 +1,88 @@
+import numpy as np
+import pathlib
+
+import gsd.hoomd
+
+DEFAULT_TRAJ_PATH = "traj.gsd"
+
+def path_to_traj(path):
+    if type(path) == gsd.hoomd.HOOMDTrajectory:
+        return path
+    elif pathlib.Path(path).is_dir():
+        return gsd.hoomd.open(name=pathlib.Path(path) / DEFAULT_TRAJ_PATH)
+    else:
+        return gsd.hoomd.open(name=pathlib.Path(path))
+
+
+def unwrap_pos_gsd(snapshot, get_pos_f=None, max_delta=2):
+    """Unwrap polymer coordinates wrapped into periodic boundary conditions
+
+    Args:
+        snapshot (_type_): HOOMD snapshot
+        get_pos_f (_type_, optional): function to extract particle coordinates from a snapshot. Defaults to None.
+        max_delta (int, optional): maximum separation between particles of a chain that is considered for unwrapping. 
+        Useful for the case where a multi-particle segment of a chain is stretched across the periodic boundary.
+        Defaults to 2.
+
+    Returns:
+        _type_: unwrapped coordinates
+    """
+    if get_pos_f is None:
+        get_pos_f = lambda s: s.particles.position
+    d = np.copy(get_pos_f(snapshot))
+
+    for delta in range(1, max_delta+1):
+        for ax in range(3):
+            L = snapshot.configuration.box[ax]
+
+            bond_projection = d[delta:, ax] - d[:-delta, ax]
+            img_shifts = (
+                np.round(np.abs(bond_projection) / L)
+                * 
+                np.sign(bond_projection)
+            )
+
+            d[delta:,ax] -= np.cumsum(img_shifts) * L
+
+    return d
+
+
+def _get_last_frame_idx_gsd(traj):
+    traj = path_to_traj(traj)
+
+
+    if len(traj) == 0:
+        return None
+
+    return len(traj) - 1
+
+def get_abs_frame_idx_gsd(traj, idx):
+    traj = path_to_traj(traj)
+
+    if idx < 0:
+        idx = _get_last_frame_idx_gsd(traj) + 1 + idx
+
+    return idx
+
+def fetch_snaphot_gsd(
+    traj, 
+    frame_idx):
+    
+    traj = path_to_traj(traj)
+    
+    return traj[get_abs_frame_idx_gsd(traj, frame_idx)]
+
+
+def fetch_frame(
+    traj, 
+    frame_idx, 
+    unwrap=True):
+    
+    snapshot = fetch_snaphot_gsd(traj, frame_idx)
+    
+    if unwrap:
+        d = unwrap_pos_gsd(snapshot)
+    else:
+        d = np.copy(snapshot.particles.position)
+        
+    return d

From 4e2e15ecdcac973e3b332a691dc08861dcbc1f14 Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Mon, 12 Jun 2023 09:54:15 +0200
Subject: [PATCH 05/12] add cached contact_vs_dist

---
 polykit/io/cache.py | 72 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 polykit/io/cache.py

diff --git a/polykit/io/cache.py b/polykit/io/cache.py
new file mode 100644
index 0000000..792dd75
--- /dev/null
+++ b/polykit/io/cache.py
@@ -0,0 +1,72 @@
+from ..analysis import polymer_analyses
+from . import fetch_hoomd
+
+import collections
+import pathlib
+import shelve
+
+SCALING_CACHE_FILENAME = "scalings.shlv"
+
+
+def cached_contact_vs_dist(
+    folder,
+    fetch_func=fetch_hoomd.fetch_frame,
+    get_abs_frame_idx=fetch_hoomd.get_abs_frame_idx,
+    frame_idx=-1,
+    contact_radius=1.1,
+    bins_decade=10,
+    bins=None,
+    ring=False,
+    particle_slice=(0,None),
+    random_sigma=None,
+    random_reps=10,
+    cache_file=SCALING_CACHE_FILENAME,
+    
+):
+    if random_sigma is None:
+        random_reps = None
+
+    frame_idx = fetch_hoomd.get_abs_frame_idx(folder, frame_idx)
+
+    path = pathlib.Path(folder) / cache_file
+    cache_f = shelve.open(path.as_posix(), "c")
+
+    key_dict = {}
+    for k in [
+        "frame_idx",
+        "bins",
+        "bins_decade",
+        "contact_radius",
+        "ring",
+        "random_sigma",
+        "random_reps",
+        "particle_slice"
+    ]:
+        key_dict[k] = locals()[k]
+
+    if isinstance(key_dict["bins"], collections.abc.Iterable):
+        key_dict["bins"] = tuple(key_dict["bins"])
+
+    # key = '_'.join([i for for kv in sorted(key_dict.items()) for i in kv])
+    key = repr(tuple(sorted(key_dict.items())))
+
+    if key in cache_f:
+        return cache_f[key]
+
+    coords = fetch_func(folder, frame_idx)
+    coords = coords[particle_slice[0]:particle_slice[-1]]
+    sc = polymer_analyses.gaussian_contact_vs_dist(
+        coords,
+        contact_vs_dist_func=polymer_analyses.contact_vs_dist,
+        random_sigma=random_sigma,
+        random_reps=random_reps,
+        bins_decade=bins_decade,
+        bins=bins,
+        contact_radius=contact_radius,
+        ring=ring,
+    )
+    cache_f[key] = sc
+    cache_f.close()
+
+    return sc
+

From 557b095f5ba0fc77f9428db4eda4b2bf0e7506fb Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Mon, 12 Jun 2023 10:29:52 +0200
Subject: [PATCH 06/12] several bugfixes

---
 polykit/io/cache.py       |  2 +-
 polykit/io/fetch_hoomd.py | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/polykit/io/cache.py b/polykit/io/cache.py
index 792dd75..f9c6971 100644
--- a/polykit/io/cache.py
+++ b/polykit/io/cache.py
@@ -57,7 +57,7 @@ def cached_contact_vs_dist(
     coords = coords[particle_slice[0]:particle_slice[-1]]
     sc = polymer_analyses.gaussian_contact_vs_dist(
         coords,
-        contact_vs_dist_func=polymer_analyses.contact_vs_dist,
+        contact_vs_dist_func=polymer_analyses.contact_vs_dist_df,
         random_sigma=random_sigma,
         random_reps=random_reps,
         bins_decade=bins_decade,
diff --git a/polykit/io/fetch_hoomd.py b/polykit/io/fetch_hoomd.py
index 55e1121..e3786d6 100644
--- a/polykit/io/fetch_hoomd.py
+++ b/polykit/io/fetch_hoomd.py
@@ -14,7 +14,7 @@ def path_to_traj(path):
         return gsd.hoomd.open(name=pathlib.Path(path))
 
 
-def unwrap_pos_gsd(snapshot, get_pos_f=None, max_delta=2):
+def unwrap_pos(snapshot, get_pos_f=None, max_delta=2):
     """Unwrap polymer coordinates wrapped into periodic boundary conditions
 
     Args:
@@ -47,7 +47,7 @@ def unwrap_pos_gsd(snapshot, get_pos_f=None, max_delta=2):
     return d
 
 
-def _get_last_frame_idx_gsd(traj):
+def _get_last_frame_idx(traj):
     traj = path_to_traj(traj)
 
 
@@ -56,21 +56,21 @@ def _get_last_frame_idx_gsd(traj):
 
     return len(traj) - 1
 
-def get_abs_frame_idx_gsd(traj, idx):
+def get_abs_frame_idx(traj, idx):
     traj = path_to_traj(traj)
 
     if idx < 0:
-        idx = _get_last_frame_idx_gsd(traj) + 1 + idx
+        idx = _get_last_frame_idx(traj) + 1 + idx
 
     return idx
 
-def fetch_snaphot_gsd(
+def fetch_snaphot(
     traj, 
     frame_idx):
     
     traj = path_to_traj(traj)
     
-    return traj[get_abs_frame_idx_gsd(traj, frame_idx)]
+    return traj[get_abs_frame_idx(traj, frame_idx)]
 
 
 def fetch_frame(
@@ -78,10 +78,10 @@ def fetch_frame(
     frame_idx, 
     unwrap=True):
     
-    snapshot = fetch_snaphot_gsd(traj, frame_idx)
+    snapshot = fetch_snaphot(traj, frame_idx)
     
     if unwrap:
-        d = unwrap_pos_gsd(snapshot)
+        d = unwrap_pos(snapshot)
     else:
         d = np.copy(snapshot.particles.position)
         

From bef4ea0d47c927ad9cd0dbb2535a91fa50e1dee2 Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Thu, 23 Nov 2023 21:04:30 +0100
Subject: [PATCH 07/12] add a convenience function that converts the index of
 the frame into the number of steps

---
 polykit/io/fetch_hoomd.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/polykit/io/fetch_hoomd.py b/polykit/io/fetch_hoomd.py
index e3786d6..958da43 100644
--- a/polykit/io/fetch_hoomd.py
+++ b/polykit/io/fetch_hoomd.py
@@ -64,6 +64,10 @@ def get_abs_frame_idx(traj, idx):
 
     return idx
 
+def get_frame_nsteps(traj, frame_idx):
+    snapshot = fetch_snaphot(traj, frame_idx)
+    return snapshot.configuration.step
+
 def fetch_snaphot(
     traj, 
     frame_idx):

From 71f2fbf4531efcd794db6f8a79a7abef0011511e Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Thu, 23 Nov 2023 21:04:56 +0100
Subject: [PATCH 08/12] add a generator of a simple space filling curve

---
 polykit/generators/initial_conformations.py | 53 +++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/polykit/generators/initial_conformations.py b/polykit/generators/initial_conformations.py
index 22e0f87..aa19f9b 100644
--- a/polykit/generators/initial_conformations.py
+++ b/polykit/generators/initial_conformations.py
@@ -311,3 +311,56 @@ def grow_cubic(N, boxSize, method="standard"):
                 break
                 # print a
     return np.array(a) - 1
+
+
+def space_filling_curve(N, cond, init_point=None):
+    """Grow a space-filling curve in 3D.
+
+    Args:
+        N (int): the number of points to generate (including the initial point).
+        cond (function): a function that takes a point in 3D and returns True if the point is valid and False otherwise.
+        init_point (tuple, optional): the initial point. Defaults to None.
+
+    Raises:
+        Exception: _description_
+
+    Returns:
+        np.array: the generated points.
+    """
+    
+    i = 0
+    d = np.zeros((N,3))
+    if init_point is not None:
+        d[0] = init_point
+    nx, ny, nz = 0, 0, 0
+    
+    while i < N-1:
+        dx = ((ny + 1) % 2) * 2 - 1
+        new_p = d[i] + (dx, 0, 0)
+        
+        if cond(new_p):
+            d[i+1] = new_p
+            i += 1
+            nx += 1
+            continue
+        
+        dy = ((nz + 1) % 2) * 2 - 1
+        new_p = d[i] + (0, dy, 0)
+
+        if cond(new_p):
+            d[i+1] = new_p
+            i+= 1
+            ny += 1
+            continue
+        
+        new_p = d[i] + (0, 0, 1)
+
+        if cond(new_p):
+            d[i+1] = new_p
+            i += 1
+            nz += 1
+            continue
+        
+        raise Exception('Cannot grow up!')
+    
+    return d

From c8bce575bbebab12b32d6a6a423d8d60a4ce25e2 Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Thu, 29 Feb 2024 09:53:44 +0100
Subject: [PATCH 09/12] remove numpy upper version limit

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 18281ba..59963a8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-numpy<1.24
+numpy
 scipy>=0.16
 pandas>=0.19
 matplotlib>=2.0

From 4bffb6151546e9d835baa29e4537a581f51c2d57 Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Thu, 29 Feb 2024 09:54:39 +0100
Subject: [PATCH 10/12] add chunked version of calculate_contacts

---
 polykit/analysis/polymer_analyses.py | 39 ++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/polykit/analysis/polymer_analyses.py b/polykit/analysis/polymer_analyses.py
index 5f1f271..324a5dd 100644
--- a/polykit/analysis/polymer_analyses.py
+++ b/polykit/analysis/polymer_analyses.py
@@ -66,6 +66,45 @@ def calculate_contacts(data, cutoff=1.7):
     return pairs
 
 
+def calculate_contacts_chunked(data, cutoff=1.7, chunk_size = int(1e8)):
+    """
+    Calculate contacts between particles, chunked.
+
+    Args:
+        data (ndarray): Array of shape Nx3 representing the polymer data.
+        cutoff (float, optional): Distance cutoff for identifying contacts. Defaults to 1.7.
+
+    Yields:
+        ndarray: Array of shape Mx2 representing the pairs of pairticles that are in contact.
+
+    Raises:
+        ValueError: If the shape of the polymer data is not Nx3.
+        RuntimeError: If the polymer data contains NaN values.
+    """
+
+    if data.shape[1] != 3:
+        raise ValueError("Incorrect polymer data shape. Must be Nx3.")
+
+    if np.isnan(data).any():
+        raise RuntimeError("Data contains NANs")
+
+    tree = cKDTree(data)
+    
+    N_RANDOM = 10
+    random_sample = tree.query_ball_point(x=data[np.random.choice(len(data), size=N_RANDOM)], r=cutoff, workers = 1)
+    n_pairs_avg = np.mean([len(i) for i in random_sample])
+    particles_per_chunk = int(chunk_size // n_pairs_avg)
+
+    for i in range(0, len(d_chrom), particles_per_chunk):
+        chunk = d_chrom[i:i+particles_per_chunk]
+        
+        neighbours = tree.query_ball_point(x=chunk, r=cutoff, workers = 1)
+        pairs = np.vstack([np.repeat(np.arange(len(neighbours)), [len(i) for i in neighbours]), np.concatenate(neighbours)]).T
+        pairs = pairs[pairs[:,0] != pairs[:,1]]
+
+        yield pairs
+
+
 def smart_contacts(data, cutoff=1.7, min_cutoff=2.1, percent_func=lambda x: 1 / x):
     """Calculates contacts for a polymer, give the contact radius (cutoff)
     This method takes a random fraction of the monomers that is equal to (

From a86cb1afc0cd0e9786d01ff222d04e9d8922c66a Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Thu, 29 Feb 2024 09:55:33 +0100
Subject: [PATCH 11/12] bugfix

---
 polykit/analysis/polymer_analyses.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/polykit/analysis/polymer_analyses.py b/polykit/analysis/polymer_analyses.py
index 324a5dd..c8055e6 100644
--- a/polykit/analysis/polymer_analyses.py
+++ b/polykit/analysis/polymer_analyses.py
@@ -95,8 +95,8 @@ def calculate_contacts_chunked(data, cutoff=1.7, chunk_size = int(1e8)):
     n_pairs_avg = np.mean([len(i) for i in random_sample])
     particles_per_chunk = int(chunk_size // n_pairs_avg)
 
-    for i in range(0, len(d_chrom), particles_per_chunk):
-        chunk = d_chrom[i:i+particles_per_chunk]
+    for i in range(0, len(data), particles_per_chunk):
+        chunk = data[i:i+particles_per_chunk]
         
         neighbours = tree.query_ball_point(x=chunk, r=cutoff, workers = 1)
         pairs = np.vstack([np.repeat(np.arange(len(neighbours)), [len(i) for i in neighbours]), np.concatenate(neighbours)]).T

From 826fe7b636ee1785ee05c2308bb4430545ed3bdf Mon Sep 17 00:00:00 2001
From: Anton Goloborodko <goloborodko.anton@gmail.com>
Date: Mon, 11 Nov 2024 12:25:13 +0100
Subject: [PATCH 12/12] add io.fetch_hoomd.unwrap_chains

---
 polykit/io/__init__.py    |  0
 polykit/io/fetch_hoomd.py | 59 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 56 insertions(+), 3 deletions(-)
 create mode 100644 polykit/io/__init__.py

diff --git a/polykit/io/__init__.py b/polykit/io/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/polykit/io/fetch_hoomd.py b/polykit/io/fetch_hoomd.py
index 958da43..fc64f3d 100644
--- a/polykit/io/fetch_hoomd.py
+++ b/polykit/io/fetch_hoomd.py
@@ -27,8 +27,10 @@ def unwrap_pos(snapshot, get_pos_f=None, max_delta=2):
     Returns:
         _type_: unwrapped coordinates
     """
+
     if get_pos_f is None:
         get_pos_f = lambda s: s.particles.position
+    
     d = np.copy(get_pos_f(snapshot))
 
     for delta in range(1, max_delta+1):
@@ -47,6 +49,58 @@ def unwrap_pos(snapshot, get_pos_f=None, max_delta=2):
     return d
 
 
+def _find_chains_sequential(bonds):
+    bonds = np.asarray(bonds)
+    bonds = bonds[np.argsort(bonds[:,0])]
+    bond_starts = bonds[:,0]
+    bond_ends = bonds[:,1]
+
+    chain_starts = bond_starts[np.r_[True, (np.diff(bond_starts) != 1)]]
+    chain_ends = bond_ends[np.r_[(np.diff(bond_starts) != 1), True]] + 1
+    return np.vstack([chain_starts, chain_ends]).T
+    
+
+def find_chains_sequential(snap, bond_types=['polymer']):        
+    bond_typeids = [i for i, btype in enumerate(snap.bonds.types) if btype in bond_types]
+    chain_bonds = snap.bonds.group[np.isin(snap.bonds.typeid, bond_typeids)]
+
+    if ((chain_bonds[:,1] - chain_bonds[:,0]) != 1).sum() != 0:
+        raise ValueError('Some input bonds connect non-sequencial particles.')
+
+    chain_spans = _find_chains_sequential(chain_bonds)
+    return chain_spans
+
+
+def _unwrap_chains(d_unwrapped, chains, box):
+    d_out = np.copy(d_unwrapped)
+    box = np.asarray(box)
+    chains = np.asarray(chains)
+
+    for lo, hi in chains:
+        d_chain = d_out[lo:hi]
+        chain_com = d_chain.mean(axis=0)
+        #img_shift_chain = chain_com // box
+        img_shift_chain = (
+            np.round(np.abs(chain_com) / box)
+            * 
+            np.sign(chain_com))
+        
+        chain_shift = - img_shift_chain * box
+        
+        d_out[lo:hi] += chain_shift
+    return d_out
+
+
+def unwrap_chains(snap, max_delta=2, bond_types=['polymer']):
+    d_unwrapped = unwrap_pos(snap, max_delta=max_delta)
+
+    chains = find_chains_sequential(snap, bond_types=bond_types)
+    box = snap.configuration.box[:3]
+    d_chains = _unwrap_chains(d_unwrapped, chains, box)
+
+    return d_chains, chains
+    
+
 def _get_last_frame_idx(traj):
     traj = path_to_traj(traj)
 
@@ -68,12 +122,11 @@ def get_frame_nsteps(traj, frame_idx):
     snapshot = fetch_snaphot(traj, frame_idx)
     return snapshot.configuration.step
 
+
 def fetch_snaphot(
     traj, 
-    frame_idx):
-    
+    frame_idx):    
     traj = path_to_traj(traj)
-    
     return traj[get_abs_frame_idx(traj, frame_idx)]