The following scenario leads to an access violation when trying to compile a SPIR-V shader:
If the initializer is skipped, the access violation no longer happens. The value of the initializer doesn't matter; using an OpConstant instead of an OpConstantNull makes no difference.
On driver version 31.0.23013.1023 the access violation occurs when creating the shader module. On driver version 31.0.24002.92 the access violation occurs first when creating the pipeline (applies both to compute and graphics pipelines).
Here's the full shader:
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 10
; Bound: 35
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpSource GLSL 450
OpName %main "main"
OpName %DummyFunc "DummyFunc"
OpName %scalarVar "scalarVar"
OpName %vec2Var "vec2Var"
OpName %param "param"
OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
%void = OpTypeVoid
%3 = OpTypeFunction %void
%float = OpTypeFloat 32
%_ptr_Function_float = OpTypePointer Function %float
%v3float = OpTypeVector %float 3
%9 = OpTypeFunction %void %_ptr_Function_float
%_ptr_Function_v3float = OpTypePointer Function %v3float
%float_0 = OpConstant %float 0
%16 = OpConstantComposite %v3float %float_0 %float_0 %float_0
%v2float = OpTypeVector %float 2
%_ptr_Function_v2float = OpTypePointer Function %v2float
%float_1 = OpConstant %float 1
%24 = OpConstantComposite %v2float %float_1 %float_1
%uint = OpTypeInt 32 0
%uint_0 = OpConstant %uint 0
%v3uint = OpTypeVector %uint 3
%uint_1 = OpConstant %uint 1
%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
%18 = OpConstantNull %float
%main = OpFunction %void None %3
%5 = OpLabel
%vec2Var = OpVariable %_ptr_Function_v2float Function ; <- vec2 variable we'll be swizzling.
%param = OpVariable %_ptr_Function_float Function %18 ; <- Declare float variable using an initializer. To make the shader compile, simply remove the "%18".
OpStore %vec2Var %24
%29 = OpAccessChain %_ptr_Function_float %vec2Var %uint_0 ; <- Swizzle.
%30 = OpLoad %float %29
OpStore %param %30
%31 = OpFunctionCall %void %DummyFunc %param ; <- Call a dummy function that does nothing.
OpReturn
OpFunctionEnd
%DummyFunc = OpFunction %void None %9
%scalarVar = OpFunctionParameter %_ptr_Function_float
%12 = OpLabel
OpReturn
OpFunctionEnd
I am running on a Radeon RX 5700 XT (731FC1) on Windows 10.
The validation layers do not report any issues. spirv-val does not report any issues with the shader.
Here is a reproducer: https://drive.google.com/file/d/1Rcg2cR-lX_ZIWJkT1yF1O9VVlZnMGRpu/view?usp=sharing
The reproducer will create a device, read in "compute.spv" and try to compile it to a compute pipeline. This leads to an access violation. To make it pass, replace "compute.spv" with "compute_no_initializer.spv". The only difference is the removal of the initializer.
This is the full source code of the reproducer:
#include <iostream>
#include <cstdio>
#include <vulkan/vulkan.h>
#include <cassert>
using namespace std;
static VkInstance CreateInstance() {
// Information about our application.
VkApplicationInfo applicationInfo = {};
applicationInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
applicationInfo.pApplicationName = "AMDShader";
applicationInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0);
applicationInfo.pEngineName = "AMDShader";
applicationInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0);
applicationInfo.apiVersion = VK_API_VERSION_1_1;
// Create Vulkan instance.
VkInstanceCreateInfo instanceCreateInfo = {};
instanceCreateInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
instanceCreateInfo.pApplicationInfo = &applicationInfo;
VkInstance instance;
VkResult result = vkCreateInstance(&instanceCreateInfo, nullptr, &instance);
if (result != VK_SUCCESS) {
cerr << "Could not create Vulkan instance.\n";
}
return instance;
}
static VkDevice CreateDevice(VkInstance instance) {
// Query physical devices and pick the first one.
// First query the number of devices.
uint32_t deviceCount;
vkEnumeratePhysicalDevices(instance, &deviceCount, nullptr);
// Then get a list of the actual devices.
VkPhysicalDevice* physicalDevices = new VkPhysicalDevice[deviceCount];
vkEnumeratePhysicalDevices(instance, &deviceCount, physicalDevices);
VkPhysicalDevice physicalDevice = physicalDevices[0];
delete[] physicalDevices;
// We need one queue to be able to create a device, but we'll never actually submit any work so just pick the first one.
const float priority = 1.0f;
VkDeviceQueueCreateInfo queueCreateInfo = {};
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queueCreateInfo.queueCount = 1;
queueCreateInfo.pQueuePriorities = &priority;
// Create logical device.
VkDeviceCreateInfo deviceCreateInfo = {};
deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
deviceCreateInfo.queueCreateInfoCount = 1;
deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo;
VkDevice device;
vkCreateDevice(physicalDevice, &deviceCreateInfo, nullptr, &device);
return device;
}
static VkShaderModule CreateShaderModule(VkDevice device, const char* filename) {
// Read SPIR-V.
FILE* file = fopen(filename, "rb");
fseek(file, 0, SEEK_END);
const long fileSize = ftell(file);
assert(fileSize % 4 == 0);
uint32_t* spirv = new uint32_t[fileSize / 4];
fseek(file, 0, SEEK_SET);
fread(spirv, sizeof(uint32_t), fileSize / 4, file);
fclose(file);
// Create shader module.
VkShaderModuleCreateInfo createInfo;
createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
createInfo.pNext = nullptr;
createInfo.flags = 0;
createInfo.codeSize = fileSize;
createInfo.pCode = spirv;
VkShaderModule shaderModule;
VkResult result = vkCreateShaderModule(device, &createInfo, nullptr, &shaderModule);
if (result != VK_SUCCESS) {
cout << "Failed to create shader module.\n";
}
delete[] spirv;
return shaderModule;
}
int main() {
VkInstance instance = CreateInstance();
VkDevice device = CreateDevice(instance);
VkShaderModule compute = CreateShaderModule(device, "compute.spv");
// Create empty pipeline layout (no resources bound).
VkPipelineLayoutCreateInfo layoutDesc = {};
layoutDesc.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
VkPipelineLayout layout;
vkCreatePipelineLayout(device, &layoutDesc, nullptr, &layout);
// Create compute pipeline.
VkPipelineShaderStageCreateInfo stage = {};
stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
stage.module = compute;
stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
stage.pName = "main";
VkComputePipelineCreateInfo pipelineDesc = {};
pipelineDesc.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
pipelineDesc.stage = stage;
pipelineDesc.layout = layout;
VkPipeline pipeline;
VkResult result = vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &pipelineDesc, nullptr, &pipeline);
if (result != VK_SUCCESS) {
cout << "Failed to created compute pipeline.\n";
}
// Cleanup
vkDestroyPipeline(device, pipeline, nullptr);
vkDestroyPipelineLayout(device, layout, nullptr);
vkDestroyShaderModule(device, compute, nullptr);
vkDestroyDevice(device, nullptr);
vkDestroyInstance(instance, nullptr);
cout << "Success!\n";
return 0;
}
I have also encountered a different shader which doesn't crash on compilation, but instead has incorrect results (pure black screen) when using an initializer on a function parameter. Removing the initializer fixes the output.
A colleague with an RX 7800 XT (same driver version 31.0.24002.92) ran the reproducer without getting any access violation so may be GPU/architecture specific.