Skip to content

Commit 37dae7f

Browse files
committed
arch: Use the node's max memory transaction size
1 parent c05fabb commit 37dae7f

File tree

2 files changed

+69
-7
lines changed

2 files changed

+69
-7
lines changed

devito/arch/archinfo.py

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,6 @@ def homogenise_gpus(gpu_infos):
179179
homogeneous, otherwise None.
180180
"""
181181
if gpu_infos == []:
182-
warning('No graphics cards detected')
183182
return {}
184183

185184
# Check must ignore physical IDs as they may differ
@@ -324,7 +323,9 @@ def cbk(deviceid=0):
324323

325324
gpu_info[f'mem.{i}'] = make_cbk(i)
326325

327-
gpu_infos['architecture'] = 'AMD'
326+
gpu_info['architecture'] = 'unspecified'
327+
gpu_info['vendor'] = 'AMD'
328+
328329
return gpu_info
329330

330331
except OSError:
@@ -387,13 +388,15 @@ def cbk(deviceid=0):
387388

388389
gpu_info['mem.%s' % i] = make_cbk(i)
389390

390-
gpu_infos['architecture'] = 'Intel'
391+
gpu_info['architecture'] = 'unspecified'
392+
gpu_info['vendor'] = 'INTEL'
393+
391394
return gpu_info
392395

393396
except OSError:
394397
pass
395398

396-
# *** Second try: `lshw`
399+
# *** Fourth try: `lshw`
397400
try:
398401
info_cmd = ['lshw', '-C', 'video']
399402
proc = Popen(info_cmd, stdout=PIPE, stderr=DEVNULL)
@@ -438,7 +441,7 @@ def parse_product_arch():
438441
except OSError:
439442
pass
440443

441-
# Third try: `lspci`, which is more readable but less detailed than `lshw`
444+
# Fifth try: `lspci`, which is more readable but less detailed than `lshw`
442445
try:
443446
info_cmd = ['lspci']
444447
proc = Popen(info_cmd, stdout=PIPE, stderr=DEVNULL)
@@ -756,8 +759,15 @@ def max_mem_trans_size(self, dtype):
756759
Number of items of type `dtype` that can be transferred in a single
757760
memory transaction.
758761
"""
759-
assert self.max_mem_trans_nbytes % np.dtype(dtype).itemsize == 0
760-
return int(self.max_mem_trans_nbytes / np.dtype(dtype).itemsize)
762+
itemsize = np.dtype(dtype).itemsize
763+
764+
# NOTE: This method conservatively uses the node's `max_mem_trans_size`,
765+
# instead of self's, so that we always pad by a compatible amount should
766+
# the user switch target platforms dynamically
767+
mmtb = node_max_mem_trans_nbytes(self)
768+
assert mmtb % itemsize == 0
769+
770+
return int(mmtb / itemsize)
761771

762772
def limits(self, compiler=None, language=None):
763773
"""
@@ -1101,6 +1111,42 @@ def march(cls):
11011111
return fallback
11021112

11031113

1114+
@memoized_func
1115+
def node_max_mem_trans_nbytes(platform):
1116+
"""
1117+
Return the maximum memory transaction size in bytes for the underlying
1118+
node, which, as such, takes into account all available platforms.
1119+
"""
1120+
mmtb0 = platform.max_mem_trans_nbytes
1121+
1122+
if isinstance(platform, Cpu64):
1123+
gpu_info = get_gpu_info()
1124+
if not gpu_info:
1125+
# This node may simply not have a GPU
1126+
return mmtb0
1127+
1128+
mapper = {
1129+
'NVIDIA': NvidiaDevice,
1130+
'AMD': AmdDevice,
1131+
'INTEL': IntelDevice,
1132+
}
1133+
try:
1134+
mmtb1 = mapper[gpu_info['vendor']].max_mem_trans_nbytes
1135+
return max(mmtb0, mmtb1)
1136+
except KeyError:
1137+
# Fallback -- act even more conservatively
1138+
mmtb1 = max(p.max_mem_trans_nbytes for p in
1139+
[NvidiaDevice, AmdDevice, IntelDevice])
1140+
mmtb = max(mmtb0, mmtb1)
1141+
warning("Unable to determine GPU type, assuming a maximum memory "
1142+
f"transaction size of {mmtb} bytes")
1143+
return mmtb
1144+
elif isinstance(platform, Device):
1145+
return max(Cpu64.max_mem_trans_nbytes, mmtb0)
1146+
else:
1147+
assert False, f"Unknown platform type: {type(platform)}"
1148+
1149+
11041150
# CPUs
11051151
ANYCPU = Cpu64('cpu64')
11061152
CPU64_DUMMY = Intel64('cpu64-dummy', cores_logical=2, cores_physical=1, isa='sse')

tests/test_gpu_common.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,22 @@ def test_host_threads(self):
5757
assert nth == get_cpu_info()['physical']
5858
assert nth == Cpu64("test").cores_physical
5959

60+
@switchconfig(platform='intel64', autopadding=True)
61+
def test_autopad_with_platform_switch(self):
62+
grid = Grid(shape=(10, 10))
63+
64+
f = Function(name='f', grid=grid, space_order=0)
65+
66+
assert f.shape_allocated[0] == 10
67+
68+
info = get_gpu_info()
69+
if info['vendor'] == 'INTEL':
70+
assert f.shape_allocated[1] == 16
71+
elif info['vendor'] == 'NVIDIA':
72+
assert f.shape_allocated[1] == 32
73+
elif info['vendor'] == 'AMD':
74+
assert f.shape_allocated[1] == 64
75+
6076

6177
class TestCodeGeneration:
6278

0 commit comments

Comments
 (0)