compiler: always use long for alloc sizes

mloubout · mloubout · commit 253a68aa2762 · 2025-07-14T13:19:04.000-04:00
diff --git a/devito/passes/iet/definitions.py b/devito/passes/iet/definitions.py
@@ -19,7 +19,7 @@
 from devito.passes.iet.langbase import LangBB
 from devito.symbolics import (
     Byref, DefFunction, FieldFromPointer, IndexedPointer, ListInitializer,
-    SizeOf, VOID, pow_to_mul, unevaluate, LONG, retrieve_symbols
+    SizeOf, VOID, pow_to_mul, unevaluate, as_long
 )
 from devito.tools import as_mapper, as_list, as_tuple, filter_sorted, flatten
 from devito.types import (
@@ -90,17 +90,6 @@ def __init__(self, rcompile=None, sregistry=None, platform=None, **kwargs):
         self.rcompile = rcompile
         self.sregistry = sregistry
         self.platform = platform
-        self.index_mode = kwargs.get('options', {'index-mode': 'int32'})['index-mode']
-
-    def intm(self, nbytes):
-        if self.index_mode == 'int64':
-            try:
-                syms = retrieve_symbols(nbytes)
-                return nbytes.subs({s: LONG(s) for s in syms})
-            except AttributeError:
-                return LONG(nbytes)
-        else:
-            return nbytes
 
     def _alloc_object_on_low_lat_mem(self, site, obj, storage):
         """
@@ -147,7 +136,7 @@ def _alloc_array_on_global_mem(self, site, obj, storage):
 
         # Copy input array into global array
         name = self.sregistry.make_name(prefix='init_global')
-        nbytes = SizeOf(obj._C_typedata)*self.intm(obj.size)
+        nbytes = SizeOf(obj._C_typedata)*as_long(obj.size)
         body = [Definition(src),
                 self.langbb['alloc-global-symbol'](obj.indexed, src.indexed, nbytes)]
         efunc = make_callable(name, body)
@@ -170,7 +159,7 @@ def _alloc_host_array_on_high_bw_mem(self, site, obj, storage, *args):
 
         memptr = VOID(Byref(obj._C_symbol), '**')
         alignment = obj._data_alignment
-        nbytes = SizeOf(obj._C_typedata)*self.intm(obj.size)
+        nbytes = SizeOf(obj._C_typedata)*as_long(obj.size)
         alloc = self.langbb['host-alloc'](memptr, alignment, nbytes)
 
         free = self.langbb['host-free'](obj._C_symbol)
@@ -369,15 +358,15 @@ def _alloc_pointed_array_on_high_bw_mem(self, site, obj, storage):
 
         memptr = VOID(Byref(obj._C_symbol), '**')
         alignment = obj._data_alignment
-        nbytes = SizeOf(obj._C_typedata, stars='*')*self.intm(obj.dim.symbolic_size)
+        nbytes = SizeOf(obj._C_typedata, stars='*')*as_long(obj.dim.symbolic_size)
         alloc0 = self.langbb['host-alloc'](memptr, alignment, nbytes)
 
         free0 = self.langbb['host-free'](obj._C_symbol)
 
         # The pointee Array
         pobj = IndexedPointer(obj._C_symbol, obj.dim)
         memptr = VOID(Byref(pobj), '**')
-        nbytes = SizeOf(obj._C_typedata)*self.intm(obj.array.size)
+        nbytes = SizeOf(obj._C_typedata)*as_long(obj.array.size)
         alloc1 = self.langbb['host-alloc'](memptr, alignment, nbytes)
 
         free1 = self.langbb['host-free'](pobj)
@@ -562,10 +551,10 @@ def process(self, graph):
 
 class DeviceAwareDataManager(DataManager):
 
-    def __init__(self, **kwargs):
-        self.gpu_fit = kwargs['options']['gpu-fit']
-        self.gpu_create = kwargs['options']['gpu-create']
-        self.pmode = kwargs['options'].get('place-transfers')
+    def __init__(self, options=None, **kwargs):
+        self.gpu_fit = options['gpu-fit']
+        self.gpu_create = options['gpu-create']
+        self.pmode = options.get('place-transfers')
 
         super().__init__(**kwargs)
 
diff --git a/devito/symbolics/manipulation.py b/devito/symbolics/manipulation.py
@@ -10,9 +10,11 @@
 from devito.finite_differences.differentiable import (
     EvalDerivative, IndexDerivative
 )
-from devito.symbolics.extended_sympy import DefFunction, rfunc
+from devito.symbolics.extended_sympy import DefFunction, rfunc, LONG
 from devito.symbolics.queries import q_leaf
-from devito.symbolics.search import retrieve_indexed, retrieve_functions
+from devito.symbolics.search import (
+    retrieve_indexed, retrieve_functions, retrieve_symbols
+)
 from devito.symbolics.unevaluation import (
     Add as UnevalAdd, Mul as UnevalMul, Pow as UnevalPow, UnevaluableMixin
 )
@@ -24,7 +26,8 @@
 
 __all__ = ['xreplace_indices', 'pow_to_mul', 'indexify', 'subs_op_args',
            'normalize_args', 'uxreplace', 'Uxmapper', 'subs_if_composite',
-           'reuse_if_untouched', 'evalrel', 'flatten_args', 'unevaluate']
+           'reuse_if_untouched', 'evalrel', 'flatten_args', 'unevaluate',
+           'as_long']
 
 
 def uxreplace(expr, rule):
@@ -523,3 +526,14 @@ def unevaluate(expr):
         return uneval_mapper[expr.func](*args)
     except KeyError:
         return reuse_if_untouched(expr, args)
+
+
+def as_long(expr):
+    """
+    Convert an expression and its symbolic args to a long integer.
+    """
+    try:
+        syms = retrieve_symbols(expr)
+        return expr.subs({s: LONG(s) for s in syms})
+    except AttributeError:
+        return LONG(expr)
diff --git a/examples/mpi/overview.ipynb b/examples/mpi/overview.ipynb
@@ -486,9 +486,9 @@
        "  MPI_Request rsend;\n",
        "\n",
        "  float *restrict bufg_vec __attribute__ ((aligned (64)));\n",
-       "  posix_memalign((void**)(&bufg_vec),64,x_size*y_size*sizeof(float));\n",
+       "  posix_memalign((void**)(&bufg_vec),64,sizeof(float)*(long)(y_size)*(long)(x_size));\n",
        "  float *restrict bufs_vec __attribute__ ((aligned (64)));\n",
-       "  posix_memalign((void**)(&bufs_vec),64,x_size*y_size*sizeof(float));\n",
+       "  posix_memalign((void**)(&bufs_vec),64,sizeof(float)*(long)(y_size)*(long)(x_size));\n",
        "\n",
        "  MPI_Irecv(bufs_vec,x_size*y_size,MPI_FLOAT,fromrank,13,comm,&(rrecv));\n",
        "  if (torank != MPI_PROC_NULL)\n",
diff --git a/examples/performance/00_overview.ipynb b/examples/performance/00_overview.ipynb
@@ -725,7 +725,8 @@
       "    }\n",
       "  }\n",
       "  STOP(section0,timers)\n",
-      "}\n"
+      "}\n",
+      "\n"
      ]
     }
    ],
@@ -1192,13 +1193,13 @@
       "int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict u_vec, const float h_y, const int time_M, const int time_m, const int x0_blk0_size, const int x_M, const int x_m, const int y0_blk0_size, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, const int z_size, const int x_size, const int y_size, struct profiler * timers)\n",
       "{\n",
       "  float **restrict pr2_vec __attribute__ ((aligned (64)));\n",
-      "  posix_memalign((void**)(&pr2_vec),64,nthreads*sizeof(float*));\n",
+      "  posix_memalign((void**)(&pr2_vec),64,sizeof(float*)*(long)(nthreads));\n",
       "  float *restrict r0_vec __attribute__ ((aligned (64)));\n",
-      "  posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n",
+      "  posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)(z_size)*(long)(y_size)*(long)(x_size));\n",
       "  #pragma omp parallel num_threads(nthreads)\n",
       "  {\n",
       "    const int tid = omp_get_thread_num();\n",
-      "    posix_memalign((void**)(&(pr2_vec[tid])),64,z_size*(y0_blk0_size + 4)*sizeof(float));\n",
+      "    posix_memalign((void**)(&(pr2_vec[tid])),64,sizeof(float)*(long)(z_size)*(4 + (long)(y0_blk0_size)));\n",
       "  }\n",
       "\n",
       "  float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n",
@@ -1412,7 +1413,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n"
+      "posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)(z_size)*(long)(y_size)*(long)(x_size));\n"
      ]
     }
    ],
@@ -1483,13 +1484,13 @@
       "int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict u_vec, const float h_y, const int time_M, const int time_m, const int x_M, const int x_m, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, const int z_size, const int x_size, const int y_size, struct profiler * timers)\n",
       "{\n",
       "  float **restrict pr2_vec __attribute__ ((aligned (64)));\n",
-      "  posix_memalign((void**)(&pr2_vec),64,nthreads*sizeof(float*));\n",
+      "  posix_memalign((void**)(&pr2_vec),64,sizeof(float*)*(long)(nthreads));\n",
       "  float *restrict r0_vec __attribute__ ((aligned (64)));\n",
-      "  posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n",
+      "  posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)(z_size)*(long)(y_size)*(long)(x_size));\n",
       "  #pragma omp parallel num_threads(nthreads)\n",
       "  {\n",
       "    const int tid = omp_get_thread_num();\n",
-      "    posix_memalign((void**)(&(pr2_vec[tid])),64,z_size*(y_size + 4)*sizeof(float));\n",
+      "    posix_memalign((void**)(&(pr2_vec[tid])),64,sizeof(float)*(long)(z_size)*(4 + (long)(y_size)));\n",
       "  }\n",
       "\n",
       "  float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n",
@@ -1626,11 +1627,11 @@
       "int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict u_vec, const float h_x, const float h_y, const int time_M, const int time_m, const int x0_blk0_size, const int x1_blk0_size, const int x_M, const int x_m, const int y0_blk0_size, const int y1_blk0_size, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, const int x_size, const int y_size, const int z_size, struct profiler * timers)\n",
       "{\n",
       "  float *restrict r0_vec __attribute__ ((aligned (64)));\n",
-      "  posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n",
+      "  posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)(z_size)*(long)(y_size)*(long)(x_size));\n",
       "  float *restrict r3_vec __attribute__ ((aligned (64)));\n",
-      "  posix_memalign((void**)(&r3_vec),64,z_size*(x_size + 4)*(y_size + 4)*sizeof(float));\n",
+      "  posix_memalign((void**)(&r3_vec),64,sizeof(float)*(long)(z_size)*(4 + (long)(y_size))*(4 + (long)(x_size)));\n",
       "  float *restrict r4_vec __attribute__ ((aligned (64)));\n",
-      "  posix_memalign((void**)(&r4_vec),64,z_size*(x_size + 4)*(y_size + 4)*sizeof(float));\n",
+      "  posix_memalign((void**)(&r4_vec),64,sizeof(float)*(long)(z_size)*(4 + (long)(y_size))*(4 + (long)(x_size)));\n",
       "\n",
       "  float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n",
       "  float (*restrict r0)[y_size][z_size] __attribute__ ((aligned (64))) = (float (*)[y_size][z_size]) r0_vec;\n",