gtests: add regression test for PVC WAR bug workaround

echeresh · echeresh · commit 8e5a57f8fc5b · 2025-12-08T18:26:49.000-08:00
diff --git a/src/gpu/intel/compute/device_info.hpp b/src/gpu/intel/compute/device_info.hpp
@@ -49,7 +49,7 @@ struct alignas(int) gpu_product_t {
     unsigned char data[12];
 };
 
-static inline std::string to_string(gpu_arch_t arch) {
+static inline const char *to_string(gpu_arch_t arch) {
 #define CASE(_case) \
     if (arch == gpu_arch_t::_case) return STRINGIFY(_case)
     CASE(xe_lp);
diff --git a/src/gpu/intel/engine.cpp b/src/gpu/intel/engine.cpp
@@ -88,10 +88,19 @@ status_t engine_t::init(const std::vector<uint8_t> &cache_blob) {
 } // namespace impl
 } // namespace dnnl
 
-bool dnnl_impl_gpu_mayiuse_ngen_kernels(dnnl::impl::engine_t *engine) {
+bool dnnl_impl_gpu_intel_mayiuse_ngen_kernels(dnnl::impl::engine_t *engine) {
     using namespace dnnl::impl;
     using namespace dnnl::impl::gpu;
 
     auto *intel_engine = utils::downcast<intel::engine_t *>(engine);
     return intel_engine->mayiuse_ngen_kernels();
 }
+
+const char *dnnl_impl_gpu_intel_get_isa_name(dnnl::impl::engine_t *engine) {
+    using namespace dnnl::impl;
+    using namespace dnnl::impl::gpu;
+
+    auto *intel_engine = utils::downcast<intel::engine_t *>(engine);
+    auto *device_info = intel_engine->device_info();
+    return intel::compute::to_string(device_info->gpu_arch());
+}
diff --git a/src/gpu/intel/engine.hpp b/src/gpu/intel/engine.hpp
@@ -217,7 +217,9 @@ class engine_t : public gpu::engine_t {
 } // namespace dnnl
 
 // Exported for testing purposes only.
-extern "C" bool DNNL_API dnnl_impl_gpu_mayiuse_ngen_kernels(
+extern "C" bool DNNL_API dnnl_impl_gpu_intel_mayiuse_ngen_kernels(
+        dnnl::impl::engine_t *engine);
+extern "C" DNNL_API const char *dnnl_impl_gpu_intel_get_isa_name(
         dnnl::impl::engine_t *engine);
 
 #endif
diff --git a/tests/gtests/ocl/api/test_engine.cpp b/tests/gtests/ocl/api/test_engine.cpp
@@ -23,7 +23,7 @@
 #include <string>
 #include <CL/cl.h>
 
-extern "C" bool dnnl_impl_gpu_mayiuse_ngen_kernels(dnnl_engine_t engine);
+extern "C" bool dnnl_impl_gpu_intel_mayiuse_ngen_kernels(dnnl_engine_t engine);
 
 namespace dnnl {
 namespace {
@@ -287,7 +287,7 @@ TEST_P(ocl_engine_test_t, BinaryKernels) {
 //DNNL_ENABLE_MEM_DEBUG forces allocation fail, causing mayiuse to fail
 #ifndef DNNL_ENABLE_MEM_DEBUG
     if (s == dnnl_success) {
-        ASSERT_EQ(dnnl_impl_gpu_mayiuse_ngen_kernels(eng), true);
+        ASSERT_EQ(dnnl_impl_gpu_intel_mayiuse_ngen_kernels(eng), true);
     }
 #endif
 
diff --git a/tests/gtests/regression/CMakeLists.txt b/tests/gtests/regression/CMakeLists.txt
@@ -14,9 +14,22 @@
 # limitations under the License.
 #===============================================================================
 
-set(TEST_EXE test_regression)
+set(TEST_SOURCES
+    test_regression_binary_stride.cpp
+)
 
-file(GLOB TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/test_*.cpp)
-list(APPEND TEST_SOURCES ${MAIN_SRC_GTEST})
+if(UNIX AND NOT DNNL_GPU_RUNTIME STREQUAL "NONE" AND DNNL_GPU_VENDOR STREQUAL "INTEL")
+    set(TEST_SOURCES "${TEST_SOURCES};test_regression_pvc_war_wa.cpp")
+endif()
 
-register_exe(${TEST_EXE} "${TEST_SOURCES}" "test" "dnnl_gtest")
+foreach(TEST_FILE ${TEST_SOURCES})
+    get_filename_component(exe ${TEST_FILE} NAME_WE)
+    register_exe("${exe}" "${CMAKE_CURRENT_SOURCE_DIR}/${TEST_FILE};${MAIN_SRC_GTEST}" "test" "dnnl_gtest")
+endforeach()
+
+if(TARGET test_regression_pvc_war_wa)
+    # Regression test for PVC WAR bug workaround. The bug is triggered by setting
+    # round-robin thread arbitration policy so override it here.
+    set_tests_properties(test_regression_pvc_war_wa
+        PROPERTIES ENVIRONMENT "NEOReadDebugKeys=1;OverrideThreadArbitrationPolicy=1")
+endif()
diff --git a/tests/gtests/regression/test_regression_binary_stride.cpp b/tests/gtests/regression/test_regression_binary_stride.cpp
diff --git a/tests/gtests/regression/test_regression_pvc_war_wa.cpp b/tests/gtests/regression/test_regression_pvc_war_wa.cpp
@@ -0,0 +1,107 @@
+/*******************************************************************************
+* Copyright 2025 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "dnnl_test_common.hpp"
+#include "gtest/gtest.h"
+
+#include "oneapi/dnnl/dnnl.hpp"
+
+#include <cstdlib>
+#include <iostream>
+
+extern "C" const char *dnnl_impl_gpu_intel_get_isa_name(dnnl_engine_t engine);
+
+namespace dnnl {
+
+struct conv_params_t {
+    memory::dim MB, IC, OC, IH, OH, KH;
+};
+
+// This is a regression test for PVC write-after-read hardware bug workaround.
+// The bug manifests as a page fault caused by WAR-related corruption of the
+// send header. The test doesn't include any validation, the expected failure
+// is a GPU page fault and test segfault.
+class test_regression_conv_pvc_war_t
+    : public ::testing::TestWithParam<conv_params_t> {
+    conv_params_t params;
+    engine eng;
+
+protected:
+    void SetUp() override {
+        SKIP_IF_CUDA(true, "Unsupported test for CUDA.");
+        SKIP_IF_HIP(true, "Unsupported test for HIP.");
+        SKIP_IF_GENERIC(true, "Unsupported test for generic GPU.");
+        SKIP_IF(engine::get_count(engine::kind::gpu) == 0,
+                "GPU engine not found.");
+
+        eng = engine(engine::kind::gpu, 0);
+        SKIP_IF(dnnl_impl_gpu_intel_get_isa_name(eng.get())
+                        != std::string("xe_hpc"),
+                "Test is for PVC only");
+
+        params = ::testing::TestWithParam<decltype(params)>::GetParam();
+        Test();
+    }
+
+    void Test() {
+        memory::dims src_dims = {params.MB, params.IC, params.IH, params.IH};
+        memory::dims wei_dims = {params.OC, params.IC, params.KH, params.KH};
+        memory::dims dst_dims = {params.MB, params.OC, params.OH, params.OH};
+        memory::dims strides = {1, 1};
+        memory::dims padding = {1, 1};
+
+        memory::desc src_md(
+                src_dims, memory::data_type::f64, memory::format_tag::nchw);
+        memory::desc wei_md(
+                wei_dims, memory::data_type::f64, memory::format_tag::oihw);
+        memory::desc dst_md(
+                dst_dims, memory::data_type::f64, memory::format_tag::nchw);
+
+        convolution_forward::primitive_desc hint_fwd_pd(eng,
+                prop_kind::forward_training, algorithm::convolution_direct,
+                src_md, wei_md, dst_md, strides, padding, padding);
+        convolution_backward_data::primitive_desc pd(eng,
+                algorithm::convolution_direct, src_md, wei_md, dst_md, strides,
+                padding, padding, hint_fwd_pd);
+        convolution_backward_data prim(pd);
+
+        auto dst_mem_desc = pd.diff_dst_desc();
+        auto wei_mem_desc = pd.weights_desc();
+        auto src_mem_desc = pd.diff_src_desc();
+
+        memory dst_mem(dst_mem_desc, eng);
+        memory wei_mem(wei_mem_desc, eng);
+        memory src_mem(src_mem_desc, eng);
+
+        stream strm(eng);
+
+        const int REPEATS = 10;
+        for (int i = 0; i < REPEATS; ++i) {
+            prim.execute(strm,
+                    {{DNNL_ARG_DIFF_DST, dst_mem}, {DNNL_ARG_WEIGHTS, wei_mem},
+                            {DNNL_ARG_DIFF_SRC, src_mem}});
+        }
+        strm.wait();
+    }
+};
+
+TEST_P(test_regression_conv_pvc_war_t, Tests) {}
+
+GPU_INSTANTIATE_TEST_SUITE_P(All, test_regression_conv_pvc_war_t,
+        ::testing::Values(conv_params_t {8, 32, 64, 128, 128, 3},
+                conv_params_t {1, 64, 64, 256, 256, 3}));
+
+} // namespace dnnl