@@ -637,8 +637,72 @@ namespace mg5amcCpu
637637 // --------------------------------------------------------------------------
638638
639639 // Detect the best SIMD level available on the current CPU at runtime.
640+ // If the environment variable MGONGPU_SIMD_LEVEL is set to one of the
641+ // recognised level names (avx512z, avx512y, avx2, sse4, none) the
642+ // requested level is used *provided* the hardware actually supports it.
643+ // An unsupported or unrecognised value triggers a warning and falls back
644+ // to auto-detection. This allows benchmarking a lower SIMD tier on a
645+ // machine that supports a higher one, e.g.:
646+ // MGONGPU_SIMD_LEVEL=avx2 ./check_cpp.exe # force AVX2 on AVX512 HW
640647 SimdLevel MatrixElementKernelHostFat::detectBestSimd ( const bool verbose )
641648 {
649+ // --- optional user override via MGONGPU_SIMD_LEVEL ---
650+ const char * simdEnv = getenv ( " MGONGPU_SIMD_LEVEL" );
651+ if ( simdEnv != nullptr )
652+ {
653+ const std::string requested ( simdEnv );
654+ SimdLevel req = SimdLevel::none; // initialised to keep compiler happy
655+ bool knownLevel = true ;
656+ if ( requested == " avx512z" )
657+ req = SimdLevel::avx512z;
658+ else if ( requested == " avx512y" )
659+ req = SimdLevel::avx512y;
660+ else if ( requested == " avx2" )
661+ req = SimdLevel::avx2;
662+ else if ( requested == " sse4" )
663+ req = SimdLevel::sse4;
664+ else if ( requested == " none" )
665+ req = SimdLevel::none;
666+ else
667+ {
668+ std::cerr << " WARNING: MGONGPU_SIMD_LEVEL='" << requested
669+ << " ' is not recognised (valid values: avx512z avx512y avx2 sse4 none)."
670+ << " Falling back to auto-detection." << std::endl;
671+ knownLevel = false ;
672+ }
673+ if ( knownLevel )
674+ {
675+ // Safety check: refuse to use a level the hardware cannot execute.
676+ bool hwOk = false ;
677+ #if defined( __x86_64__ ) || defined( __i386__ )
678+ switch ( req )
679+ {
680+ case SimdLevel::avx512z: hwOk = __builtin_cpu_supports ( " avx512vl" ); break ;
681+ case SimdLevel::avx512y: hwOk = __builtin_cpu_supports ( " avx512f" ); break ;
682+ case SimdLevel::avx2: hwOk = __builtin_cpu_supports ( " avx2" ); break ;
683+ case SimdLevel::sse4: hwOk = __builtin_cpu_supports ( " sse4.2" ); break ;
684+ case SimdLevel::none: hwOk = true ; break ;
685+ }
686+ #else
687+ // Non-x86: only sse4 (NEON/VSX) and none are meaningful overrides.
688+ hwOk = ( req == SimdLevel::sse4 || req == SimdLevel::none );
689+ #endif
690+ if ( hwOk )
691+ {
692+ if ( verbose )
693+ std::cout << " INFO: Fat binary: MGONGPU_SIMD_LEVEL override: selected SIMD level "
694+ << requested << std::endl;
695+ return req;
696+ }
697+ else
698+ {
699+ std::cerr << " WARNING: MGONGPU_SIMD_LEVEL='" << requested
700+ << " ' is not supported by this CPU."
701+ << " Falling back to auto-detection." << std::endl;
702+ }
703+ }
704+ }
705+ // --- auto-detection ---
642706#if defined( __x86_64__ ) || defined( __i386__ )
643707 if ( __builtin_cpu_supports ( " avx512vl" ) )
644708 {
0 commit comments