Skip to content

Commit e3a62b3

Browse files
Fat binary: add MGONGPU_SIMD_LEVEL env-var to override runtime SIMD selection
Agent-Logs-Url: https://github.com/madgraph5/madgraph4gpu/sessions/0bd7a4a4-bb24-4bbf-b578-58747cf4f9fa Co-authored-by: oliviermattelaer <[email protected]>
1 parent 80e37ce commit e3a62b3

1 file changed

Lines changed: 64 additions & 0 deletions

File tree

epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,8 +637,72 @@ namespace mg5amcCpu
637637
//--------------------------------------------------------------------------
638638

639639
// Detect the best SIMD level available on the current CPU at runtime.
640+
// If the environment variable MGONGPU_SIMD_LEVEL is set to one of the
641+
// recognised level names (avx512z, avx512y, avx2, sse4, none) the
642+
// requested level is used *provided* the hardware actually supports it.
643+
// An unsupported or unrecognised value triggers a warning and falls back
644+
// to auto-detection. This allows benchmarking a lower SIMD tier on a
645+
// machine that supports a higher one, e.g.:
646+
// MGONGPU_SIMD_LEVEL=avx2 ./check_cpp.exe # force AVX2 on AVX512 HW
640647
SimdLevel MatrixElementKernelHostFat::detectBestSimd( const bool verbose )
641648
{
649+
// --- optional user override via MGONGPU_SIMD_LEVEL ---
650+
const char* simdEnv = getenv( "MGONGPU_SIMD_LEVEL" );
651+
if( simdEnv != nullptr )
652+
{
653+
const std::string requested( simdEnv );
654+
SimdLevel req = SimdLevel::none; // initialised to keep compiler happy
655+
bool knownLevel = true;
656+
if( requested == "avx512z" )
657+
req = SimdLevel::avx512z;
658+
else if( requested == "avx512y" )
659+
req = SimdLevel::avx512y;
660+
else if( requested == "avx2" )
661+
req = SimdLevel::avx2;
662+
else if( requested == "sse4" )
663+
req = SimdLevel::sse4;
664+
else if( requested == "none" )
665+
req = SimdLevel::none;
666+
else
667+
{
668+
std::cerr << "WARNING: MGONGPU_SIMD_LEVEL='" << requested
669+
<< "' is not recognised (valid values: avx512z avx512y avx2 sse4 none)."
670+
<< " Falling back to auto-detection." << std::endl;
671+
knownLevel = false;
672+
}
673+
if( knownLevel )
674+
{
675+
// Safety check: refuse to use a level the hardware cannot execute.
676+
bool hwOk = false;
677+
#if defined( __x86_64__ ) || defined( __i386__ )
678+
switch( req )
679+
{
680+
case SimdLevel::avx512z: hwOk = __builtin_cpu_supports( "avx512vl" ); break;
681+
case SimdLevel::avx512y: hwOk = __builtin_cpu_supports( "avx512f" ); break;
682+
case SimdLevel::avx2: hwOk = __builtin_cpu_supports( "avx2" ); break;
683+
case SimdLevel::sse4: hwOk = __builtin_cpu_supports( "sse4.2" ); break;
684+
case SimdLevel::none: hwOk = true; break;
685+
}
686+
#else
687+
// Non-x86: only sse4 (NEON/VSX) and none are meaningful overrides.
688+
hwOk = ( req == SimdLevel::sse4 || req == SimdLevel::none );
689+
#endif
690+
if( hwOk )
691+
{
692+
if( verbose )
693+
std::cout << "INFO: Fat binary: MGONGPU_SIMD_LEVEL override: selected SIMD level "
694+
<< requested << std::endl;
695+
return req;
696+
}
697+
else
698+
{
699+
std::cerr << "WARNING: MGONGPU_SIMD_LEVEL='" << requested
700+
<< "' is not supported by this CPU."
701+
<< " Falling back to auto-detection." << std::endl;
702+
}
703+
}
704+
}
705+
// --- auto-detection ---
642706
#if defined( __x86_64__ ) || defined( __i386__ )
643707
if( __builtin_cpu_supports( "avx512vl" ) )
644708
{

0 commit comments

Comments
 (0)