Option to turn on parallel computation for CCG in numba (#4305)

yger · pre-commit-ci[bot] · alejoe91 · web-flow · commit 25940b471dde · 2026-02-06T15:54:34.000+01:00
Co-authored-by: pre-commit-ci[bot] &lt;66853113+pre-commit-ci[bot]@users.noreply.github.com&gt;
Co-authored-by: Alessio Buccino &lt;alejoe9187@gmail.com&gt;
diff --git a/src/spikeinterface/postprocessing/correlograms.py b/src/spikeinterface/postprocessing/correlograms.py
@@ -45,6 +45,9 @@ class ComputeCorrelograms(AnalyzerExtension):
         bin size 1 ms, the correlation will be binned as -25 ms, -24 ms, ...
     method : "auto" | "numpy" | "numba", default: "auto"
          If "auto" and numba is installed, numba is used, otherwise numpy is used.
+    fast_mode : "auto" | "on" | "off", default: "auto"
+        If "auto", a faster multithreaded implementations is used if method is "numba" and
+        if the number of units is greater than 300.
 
     Returns
     -------
@@ -88,8 +91,8 @@ class ComputeCorrelograms(AnalyzerExtension):
     use_nodepipeline = False
     need_job_kwargs = False
 
-    def _set_params(self, window_ms: float = 50.0, bin_ms: float = 1.0, method: str = "auto"):
-        params = dict(window_ms=window_ms, bin_ms=bin_ms, method=method)
+    def _set_params(self, window_ms: float = 50.0, bin_ms: float = 1.0, method: str = "auto", fast_mode: str = "auto"):
+        params = dict(window_ms=window_ms, bin_ms=bin_ms, method=method, fast_mode=fast_mode)
 
         return params
 
@@ -215,6 +218,7 @@ def compute_correlograms(
     window_ms: float = 50.0,
     bin_ms: float = 1.0,
     method: str = "auto",
+    fast_mode: str = "auto",
 ):
     """
     Compute correlograms using Numba or Numpy.
@@ -225,11 +229,11 @@ def compute_correlograms(
 
     if isinstance(sorting_analyzer_or_sorting, SortingAnalyzer):
         return compute_correlograms_sorting_analyzer(
-            sorting_analyzer_or_sorting, window_ms=window_ms, bin_ms=bin_ms, method=method
+            sorting_analyzer_or_sorting, window_ms=window_ms, bin_ms=bin_ms, method=method, fast_mode=fast_mode
         )
     else:
         return _compute_correlograms_on_sorting(
-            sorting_analyzer_or_sorting, window_ms=window_ms, bin_ms=bin_ms, method=method
+            sorting_analyzer_or_sorting, window_ms=window_ms, bin_ms=bin_ms, method=method, fast_mode=fast_mode
         )
 
 
@@ -299,7 +303,7 @@ def _compute_num_bins(window_size, bin_size):
     return num_bins, num_half_bins
 
 
-def _compute_correlograms_on_sorting(sorting, window_ms, bin_ms, method="auto"):
+def _compute_correlograms_on_sorting(sorting, window_ms, bin_ms, method="auto", fast_mode="auto"):
     """
     Computes cross-correlograms from multiple units.
 
@@ -318,6 +322,9 @@ def _compute_correlograms_on_sorting(sorting, window_ms, bin_ms, method="auto"):
     method : str
         To use "numpy" or "numba". "auto" will use numba if available,
         otherwise numpy.
+    fast_mode : "auto" | "on" | "off", default: "auto"
+        If "auto", a faster multithreaded implementations is used if method is "numba" and
+        if the number of units is greater than 300.
 
     Returns
     -------
@@ -333,12 +340,20 @@ def _compute_correlograms_on_sorting(sorting, window_ms, bin_ms, method="auto"):
     if method == "auto":
         method = "numba" if HAVE_NUMBA else "numpy"
 
+    if method == "numba" and fast_mode == "auto":
+        num_units = len(sorting.unit_ids)
+        fast_mode = num_units > 300
+    elif fast_mode == "off":
+        fast_mode = False
+    elif fast_mode == "on":
+        fast_mode = True
+
     bins, window_size, bin_size = _make_bins(sorting, window_ms, bin_ms)
 
     if method == "numpy":
         correlograms = _compute_correlograms_numpy(sorting, window_size, bin_size)
     if method == "numba":
-        correlograms = _compute_correlograms_numba(sorting, window_size, bin_size)
+        correlograms = _compute_correlograms_numba(sorting, window_size, bin_size, fast_mode=fast_mode)
 
     return correlograms, bins
 
@@ -483,7 +498,7 @@ def correlogram_for_one_segment(spike_times, spike_unit_indices, window_size, bi
     return correlograms
 
 
-def _compute_correlograms_numba(sorting, window_size, bin_size):
+def _compute_correlograms_numba(sorting, window_size, bin_size, fast_mode):
     """
     Computes cross-correlograms between all units in `sorting`.
 
@@ -499,6 +514,9 @@ def _compute_correlograms_numba(sorting, window_size, bin_size):
             The window size over which to perform the cross-correlation, in samples
     bin_size : int
         The size of which to bin lags, in samples.
+    fast_mode : bool
+        If True, use faster implementations (currently only if method is 'numba'),
+        at the cost of possible minor numerical differences.
 
     Returns
     -------
@@ -516,6 +534,11 @@ def _compute_correlograms_numba(sorting, window_size, bin_size):
     spikes = sorting.to_spike_vector(concatenated=False)
     correlograms = np.zeros((num_units, num_units, num_bins), dtype=np.int64)
 
+    if fast_mode:
+        num_threads = mp.cpu_count()
+    else:
+        num_threads = 1
+
     for seg_index in range(sorting.get_num_segments()):
         spike_times = spikes[seg_index]["sample_index"]
         spike_unit_indices = spikes[seg_index]["unit_index"]
@@ -527,6 +550,7 @@ def _compute_correlograms_numba(sorting, window_size, bin_size):
             window_size,
             bin_size,
             num_half_bins,
+            num_threads,
         )
 
     return correlograms
@@ -539,9 +563,10 @@ def _compute_correlograms_numba(sorting, window_size, bin_size):
         nopython=True,
         nogil=True,
         cache=False,
+        parallel=True,
     )
     def _compute_correlograms_one_segment_numba(
-        correlograms, spike_times, spike_unit_indices, window_size, bin_size, num_half_bins
+        correlograms, spike_times, spike_unit_indices, window_size, bin_size, num_half_bins, num_threads
     ):
         """
         Compute the correlograms using `numba` for speed.
@@ -570,9 +595,12 @@ def _compute_correlograms_one_segment_numba(
             The window size over which to perform the cross-correlation, in samples
         bin_size : int
             The size of which to bin lags, in samples.
+        num_threads : int
+            The number of threads to use in parallel.
         """
+        numba.set_num_threads(num_threads)
         start_j = 0
-        for i in range(spike_times.size):
+        for i in numba.prange(spike_times.size):
             for j in range(start_j, spike_times.size):
                 if i == j:
                     continue
diff --git a/src/spikeinterface/postprocessing/tests/test_correlograms.py b/src/spikeinterface/postprocessing/tests/test_correlograms.py
@@ -104,6 +104,28 @@ def test_equal_results_correlograms(window_and_bin_ms):
     assert np.array_equal(result_numpy, result_numba)
 
 
+@pytest.mark.skipif(not HAVE_NUMBA, reason="Numba not available")
+@pytest.mark.parametrize("window_and_bin_ms", [(60.0, 2.0), (3.57, 1.6421)])
+def test_equal_results_fast_correlograms(window_and_bin_ms):
+    """
+    Test that the 2 methods have same results with some varied time bins
+    that are not tested in other tests.
+    """
+
+    window_ms, bin_ms = window_and_bin_ms
+    sorting = generate_sorting(num_units=5, sampling_frequency=30000.0, durations=[10.325, 3.5], seed=0)
+
+    result_numba_fast, bins_numba_fast = _compute_correlograms_on_sorting(
+        sorting, window_ms=window_ms, bin_ms=bin_ms, method="numba", fast_mode=True
+    )
+    result_numba, bins_numba = _compute_correlograms_on_sorting(
+        sorting, window_ms=window_ms, bin_ms=bin_ms, method="numba", fast_mode=False
+    )
+    from numpy.testing import assert_almost_equal
+
+    assert_almost_equal(result_numba_fast, result_numba)
+
+
 @pytest.mark.parametrize("method", ["numpy", param("numba", marks=SKIP_NUMBA)])
 def test_flat_cross_correlogram(method):
     """