@@ -179,7 +179,6 @@ def homogenise_gpus(gpu_infos):
179179 homogeneous, otherwise None.
180180 """
181181 if gpu_infos == []:
182- warning ('No graphics cards detected' )
183182 return {}
184183
185184 # Check must ignore physical IDs as they may differ
@@ -324,7 +323,9 @@ def cbk(deviceid=0):
324323
325324 gpu_info [f'mem.{ i } ' ] = make_cbk (i )
326325
327- gpu_infos ['architecture' ] = 'AMD'
326+ gpu_info ['architecture' ] = 'unspecified'
327+ gpu_info ['vendor' ] = 'AMD'
328+
328329 return gpu_info
329330
330331 except OSError :
@@ -387,13 +388,15 @@ def cbk(deviceid=0):
387388
388389 gpu_info ['mem.%s' % i ] = make_cbk (i )
389390
390- gpu_infos ['architecture' ] = 'Intel'
391+ gpu_info ['architecture' ] = 'unspecified'
392+ gpu_info ['vendor' ] = 'INTEL'
393+
391394 return gpu_info
392395
393396 except OSError :
394397 pass
395398
396- # *** Second try: `lshw`
399+ # *** Fourth try: `lshw`
397400 try :
398401 info_cmd = ['lshw' , '-C' , 'video' ]
399402 proc = Popen (info_cmd , stdout = PIPE , stderr = DEVNULL )
@@ -438,7 +441,7 @@ def parse_product_arch():
438441 except OSError :
439442 pass
440443
441- # Third try: `lspci`, which is more readable but less detailed than `lshw`
444+ # Fifth try: `lspci`, which is more readable but less detailed than `lshw`
442445 try :
443446 info_cmd = ['lspci' ]
444447 proc = Popen (info_cmd , stdout = PIPE , stderr = DEVNULL )
@@ -756,8 +759,15 @@ def max_mem_trans_size(self, dtype):
756759 Number of items of type `dtype` that can be transferred in a single
757760 memory transaction.
758761 """
759- assert self .max_mem_trans_nbytes % np .dtype (dtype ).itemsize == 0
760- return int (self .max_mem_trans_nbytes / np .dtype (dtype ).itemsize )
762+ itemsize = np .dtype (dtype ).itemsize
763+
764+ # NOTE: This method conservatively uses the node's `max_mem_trans_size`,
765+ # instead of self's, so that we always pad by a compatible amount should
766+ # the user switch target platforms dynamically
767+ mmtb = node_max_mem_trans_nbytes (self )
768+ assert mmtb % itemsize == 0
769+
770+ return int (mmtb / itemsize )
761771
762772 def limits (self , compiler = None , language = None ):
763773 """
@@ -1101,6 +1111,42 @@ def march(cls):
11011111 return fallback
11021112
11031113
1114+ @memoized_func
1115+ def node_max_mem_trans_nbytes (platform ):
1116+ """
1117+ Return the maximum memory transaction size in bytes for the underlying
1118+ node, which, as such, takes into account all available platforms.
1119+ """
1120+ mmtb0 = platform .max_mem_trans_nbytes
1121+
1122+ if isinstance (platform , Cpu64 ):
1123+ gpu_info = get_gpu_info ()
1124+ if not gpu_info :
1125+ # This node may simply not have a GPU
1126+ return mmtb0
1127+
1128+ mapper = {
1129+ 'NVIDIA' : NvidiaDevice ,
1130+ 'AMD' : AmdDevice ,
1131+ 'INTEL' : IntelDevice ,
1132+ }
1133+ try :
1134+ mmtb1 = mapper [gpu_info ['vendor' ]].max_mem_trans_nbytes
1135+ return max (mmtb0 , mmtb1 )
1136+ except KeyError :
1137+ # Fallback -- act even more conservatively
1138+ mmtb1 = max (p .max_mem_trans_nbytes for p in
1139+ [NvidiaDevice , AmdDevice , IntelDevice ])
1140+ mmtb = max (mmtb0 , mmtb1 )
1141+ warning ("Unable to determine GPU type, assuming a maximum memory "
1142+ f"transaction size of { mmtb } bytes" )
1143+ return mmtb
1144+ elif isinstance (platform , Device ):
1145+ return max (Cpu64 .max_mem_trans_nbytes , mmtb0 )
1146+ else :
1147+ assert False , f"Unknown platform type: { type (platform )} "
1148+
1149+
11041150# CPUs
11051151ANYCPU = Cpu64 ('cpu64' )
11061152CPU64_DUMMY = Intel64 ('cpu64-dummy' , cores_logical = 2 , cores_physical = 1 , isa = 'sse' )
0 commit comments