Skip to content

Commit 253a68a

Browse files
committed
compiler: always use long for alloc sizes
1 parent 4def120 commit 253a68a

File tree

4 files changed

+40
-36
lines changed

4 files changed

+40
-36
lines changed

devito/passes/iet/definitions.py

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from devito.passes.iet.langbase import LangBB
2020
from devito.symbolics import (
2121
Byref, DefFunction, FieldFromPointer, IndexedPointer, ListInitializer,
22-
SizeOf, VOID, pow_to_mul, unevaluate, LONG, retrieve_symbols
22+
SizeOf, VOID, pow_to_mul, unevaluate, as_long
2323
)
2424
from devito.tools import as_mapper, as_list, as_tuple, filter_sorted, flatten
2525
from devito.types import (
@@ -90,17 +90,6 @@ def __init__(self, rcompile=None, sregistry=None, platform=None, **kwargs):
9090
self.rcompile = rcompile
9191
self.sregistry = sregistry
9292
self.platform = platform
93-
self.index_mode = kwargs.get('options', {'index-mode': 'int32'})['index-mode']
94-
95-
def intm(self, nbytes):
96-
if self.index_mode == 'int64':
97-
try:
98-
syms = retrieve_symbols(nbytes)
99-
return nbytes.subs({s: LONG(s) for s in syms})
100-
except AttributeError:
101-
return LONG(nbytes)
102-
else:
103-
return nbytes
10493

10594
def _alloc_object_on_low_lat_mem(self, site, obj, storage):
10695
"""
@@ -147,7 +136,7 @@ def _alloc_array_on_global_mem(self, site, obj, storage):
147136

148137
# Copy input array into global array
149138
name = self.sregistry.make_name(prefix='init_global')
150-
nbytes = SizeOf(obj._C_typedata)*self.intm(obj.size)
139+
nbytes = SizeOf(obj._C_typedata)*as_long(obj.size)
151140
body = [Definition(src),
152141
self.langbb['alloc-global-symbol'](obj.indexed, src.indexed, nbytes)]
153142
efunc = make_callable(name, body)
@@ -170,7 +159,7 @@ def _alloc_host_array_on_high_bw_mem(self, site, obj, storage, *args):
170159

171160
memptr = VOID(Byref(obj._C_symbol), '**')
172161
alignment = obj._data_alignment
173-
nbytes = SizeOf(obj._C_typedata)*self.intm(obj.size)
162+
nbytes = SizeOf(obj._C_typedata)*as_long(obj.size)
174163
alloc = self.langbb['host-alloc'](memptr, alignment, nbytes)
175164

176165
free = self.langbb['host-free'](obj._C_symbol)
@@ -369,15 +358,15 @@ def _alloc_pointed_array_on_high_bw_mem(self, site, obj, storage):
369358

370359
memptr = VOID(Byref(obj._C_symbol), '**')
371360
alignment = obj._data_alignment
372-
nbytes = SizeOf(obj._C_typedata, stars='*')*self.intm(obj.dim.symbolic_size)
361+
nbytes = SizeOf(obj._C_typedata, stars='*')*as_long(obj.dim.symbolic_size)
373362
alloc0 = self.langbb['host-alloc'](memptr, alignment, nbytes)
374363

375364
free0 = self.langbb['host-free'](obj._C_symbol)
376365

377366
# The pointee Array
378367
pobj = IndexedPointer(obj._C_symbol, obj.dim)
379368
memptr = VOID(Byref(pobj), '**')
380-
nbytes = SizeOf(obj._C_typedata)*self.intm(obj.array.size)
369+
nbytes = SizeOf(obj._C_typedata)*as_long(obj.array.size)
381370
alloc1 = self.langbb['host-alloc'](memptr, alignment, nbytes)
382371

383372
free1 = self.langbb['host-free'](pobj)
@@ -562,10 +551,10 @@ def process(self, graph):
562551

563552
class DeviceAwareDataManager(DataManager):
564553

565-
def __init__(self, **kwargs):
566-
self.gpu_fit = kwargs['options']['gpu-fit']
567-
self.gpu_create = kwargs['options']['gpu-create']
568-
self.pmode = kwargs['options'].get('place-transfers')
554+
def __init__(self, options=None, **kwargs):
555+
self.gpu_fit = options['gpu-fit']
556+
self.gpu_create = options['gpu-create']
557+
self.pmode = options.get('place-transfers')
569558

570559
super().__init__(**kwargs)
571560

devito/symbolics/manipulation.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@
1010
from devito.finite_differences.differentiable import (
1111
EvalDerivative, IndexDerivative
1212
)
13-
from devito.symbolics.extended_sympy import DefFunction, rfunc
13+
from devito.symbolics.extended_sympy import DefFunction, rfunc, LONG
1414
from devito.symbolics.queries import q_leaf
15-
from devito.symbolics.search import retrieve_indexed, retrieve_functions
15+
from devito.symbolics.search import (
16+
retrieve_indexed, retrieve_functions, retrieve_symbols
17+
)
1618
from devito.symbolics.unevaluation import (
1719
Add as UnevalAdd, Mul as UnevalMul, Pow as UnevalPow, UnevaluableMixin
1820
)
@@ -24,7 +26,8 @@
2426

2527
__all__ = ['xreplace_indices', 'pow_to_mul', 'indexify', 'subs_op_args',
2628
'normalize_args', 'uxreplace', 'Uxmapper', 'subs_if_composite',
27-
'reuse_if_untouched', 'evalrel', 'flatten_args', 'unevaluate']
29+
'reuse_if_untouched', 'evalrel', 'flatten_args', 'unevaluate',
30+
'as_long']
2831

2932

3033
def uxreplace(expr, rule):
@@ -523,3 +526,14 @@ def unevaluate(expr):
523526
return uneval_mapper[expr.func](*args)
524527
except KeyError:
525528
return reuse_if_untouched(expr, args)
529+
530+
531+
def as_long(expr):
532+
"""
533+
Convert an expression and its symbolic args to a long integer.
534+
"""
535+
try:
536+
syms = retrieve_symbols(expr)
537+
return expr.subs({s: LONG(s) for s in syms})
538+
except AttributeError:
539+
return LONG(expr)

examples/mpi/overview.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -486,9 +486,9 @@
486486
" MPI_Request rsend;\n",
487487
"\n",
488488
" float *restrict bufg_vec __attribute__ ((aligned (64)));\n",
489-
" posix_memalign((void**)(&bufg_vec),64,x_size*y_size*sizeof(float));\n",
489+
" posix_memalign((void**)(&bufg_vec),64,sizeof(float)*(long)(y_size)*(long)(x_size));\n",
490490
" float *restrict bufs_vec __attribute__ ((aligned (64)));\n",
491-
" posix_memalign((void**)(&bufs_vec),64,x_size*y_size*sizeof(float));\n",
491+
" posix_memalign((void**)(&bufs_vec),64,sizeof(float)*(long)(y_size)*(long)(x_size));\n",
492492
"\n",
493493
" MPI_Irecv(bufs_vec,x_size*y_size,MPI_FLOAT,fromrank,13,comm,&(rrecv));\n",
494494
" if (torank != MPI_PROC_NULL)\n",

examples/performance/00_overview.ipynb

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,8 @@
725725
" }\n",
726726
" }\n",
727727
" STOP(section0,timers)\n",
728-
"}\n"
728+
"}\n",
729+
"\n"
729730
]
730731
}
731732
],
@@ -1192,13 +1193,13 @@
11921193
"int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict u_vec, const float h_y, const int time_M, const int time_m, const int x0_blk0_size, const int x_M, const int x_m, const int y0_blk0_size, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, const int z_size, const int x_size, const int y_size, struct profiler * timers)\n",
11931194
"{\n",
11941195
" float **restrict pr2_vec __attribute__ ((aligned (64)));\n",
1195-
" posix_memalign((void**)(&pr2_vec),64,nthreads*sizeof(float*));\n",
1196+
" posix_memalign((void**)(&pr2_vec),64,sizeof(float*)*(long)(nthreads));\n",
11961197
" float *restrict r0_vec __attribute__ ((aligned (64)));\n",
1197-
" posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n",
1198+
" posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)(z_size)*(long)(y_size)*(long)(x_size));\n",
11981199
" #pragma omp parallel num_threads(nthreads)\n",
11991200
" {\n",
12001201
" const int tid = omp_get_thread_num();\n",
1201-
" posix_memalign((void**)(&(pr2_vec[tid])),64,z_size*(y0_blk0_size + 4)*sizeof(float));\n",
1202+
" posix_memalign((void**)(&(pr2_vec[tid])),64,sizeof(float)*(long)(z_size)*(4 + (long)(y0_blk0_size)));\n",
12021203
" }\n",
12031204
"\n",
12041205
" float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n",
@@ -1412,7 +1413,7 @@
14121413
"name": "stdout",
14131414
"output_type": "stream",
14141415
"text": [
1415-
"posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n"
1416+
"posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)(z_size)*(long)(y_size)*(long)(x_size));\n"
14161417
]
14171418
}
14181419
],
@@ -1483,13 +1484,13 @@
14831484
"int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict u_vec, const float h_y, const int time_M, const int time_m, const int x_M, const int x_m, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, const int z_size, const int x_size, const int y_size, struct profiler * timers)\n",
14841485
"{\n",
14851486
" float **restrict pr2_vec __attribute__ ((aligned (64)));\n",
1486-
" posix_memalign((void**)(&pr2_vec),64,nthreads*sizeof(float*));\n",
1487+
" posix_memalign((void**)(&pr2_vec),64,sizeof(float*)*(long)(nthreads));\n",
14871488
" float *restrict r0_vec __attribute__ ((aligned (64)));\n",
1488-
" posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n",
1489+
" posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)(z_size)*(long)(y_size)*(long)(x_size));\n",
14891490
" #pragma omp parallel num_threads(nthreads)\n",
14901491
" {\n",
14911492
" const int tid = omp_get_thread_num();\n",
1492-
" posix_memalign((void**)(&(pr2_vec[tid])),64,z_size*(y_size + 4)*sizeof(float));\n",
1493+
" posix_memalign((void**)(&(pr2_vec[tid])),64,sizeof(float)*(long)(z_size)*(4 + (long)(y_size)));\n",
14931494
" }\n",
14941495
"\n",
14951496
" float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n",
@@ -1626,11 +1627,11 @@
16261627
"int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict u_vec, const float h_x, const float h_y, const int time_M, const int time_m, const int x0_blk0_size, const int x1_blk0_size, const int x_M, const int x_m, const int y0_blk0_size, const int y1_blk0_size, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, const int x_size, const int y_size, const int z_size, struct profiler * timers)\n",
16271628
"{\n",
16281629
" float *restrict r0_vec __attribute__ ((aligned (64)));\n",
1629-
" posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n",
1630+
" posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)(z_size)*(long)(y_size)*(long)(x_size));\n",
16301631
" float *restrict r3_vec __attribute__ ((aligned (64)));\n",
1631-
" posix_memalign((void**)(&r3_vec),64,z_size*(x_size + 4)*(y_size + 4)*sizeof(float));\n",
1632+
" posix_memalign((void**)(&r3_vec),64,sizeof(float)*(long)(z_size)*(4 + (long)(y_size))*(4 + (long)(x_size)));\n",
16321633
" float *restrict r4_vec __attribute__ ((aligned (64)));\n",
1633-
" posix_memalign((void**)(&r4_vec),64,z_size*(x_size + 4)*(y_size + 4)*sizeof(float));\n",
1634+
" posix_memalign((void**)(&r4_vec),64,sizeof(float)*(long)(z_size)*(4 + (long)(y_size))*(4 + (long)(x_size)));\n",
16341635
"\n",
16351636
" float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n",
16361637
" float (*restrict r0)[y_size][z_size] __attribute__ ((aligned (64))) = (float (*)[y_size][z_size]) r0_vec;\n",

0 commit comments

Comments
 (0)