Skip to content

Commit 4918c3b

Browse files
Merge pull request #2605 from Genesis-Embodied-AI/hp/volatile-load-no-forward
Respect MemoryAccessVolatileMask on OpLoad to prevent forwarding
2 parents 4d4b79b + 6933ffc commit 4918c3b

File tree

5 files changed

+177
-0
lines changed

5 files changed

+177
-0
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#include <metal_stdlib>
2+
#include <simd/simd.h>
3+
4+
using namespace metal;
5+
6+
struct Registers
7+
{
8+
ulong addr;
9+
ulong addr2;
10+
};
11+
12+
kernel void main0(constant Registers& registers [[buffer(0)]])
13+
{
14+
device int* _21 = reinterpret_cast<device int*>(registers.addr2);
15+
int _22 = *(reinterpret_cast<device int*>(registers.addr));
16+
*_21 = _22;
17+
*(reinterpret_cast<device int*>(reinterpret_cast<ulong>(_21) + 4ul)) = _22;
18+
}
19+
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#version 450
2+
#if defined(GL_ARB_gpu_shader_int64)
3+
#extension GL_ARB_gpu_shader_int64 : require
4+
#else
5+
#error No extension available for 64-bit integers.
6+
#endif
7+
#extension GL_EXT_buffer_reference2 : require
8+
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
9+
10+
layout(buffer_reference) buffer intPointer;
11+
12+
layout(buffer_reference, buffer_reference_align = 4) buffer intPointer
13+
{
14+
int value;
15+
};
16+
17+
layout(push_constant, std430) uniform Registers
18+
{
19+
uint64_t addr;
20+
uint64_t addr2;
21+
} registers;
22+
23+
void main()
24+
{
25+
intPointer _21 = intPointer(registers.addr2);
26+
int _22 = intPointer(registers.addr).value;
27+
_21.value = _22;
28+
intPointer(uint64_t(_21) + 4ul).value = _22;
29+
}
30+
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; SPIR-V
2+
; Version: 1.0
3+
; Generator: Khronos Glslang Reference Front End; 10
4+
; Bound: 40
5+
; Schema: 0
6+
OpCapability Shader
7+
OpCapability Int64
8+
OpCapability PhysicalStorageBufferAddresses
9+
OpExtension "SPV_KHR_physical_storage_buffer"
10+
%1 = OpExtInstImport "GLSL.std.450"
11+
OpMemoryModel PhysicalStorageBuffer64 GLSL450
12+
OpEntryPoint GLCompute %main "main"
13+
OpExecutionMode %main LocalSize 1 1 1
14+
OpName %main "main"
15+
OpName %Registers "Registers"
16+
OpMemberName %Registers 0 "addr"
17+
OpMemberName %Registers 1 "addr2"
18+
OpName %registers "registers"
19+
OpMemberDecorate %Registers 0 Offset 0
20+
OpMemberDecorate %Registers 1 Offset 8
21+
OpDecorate %Registers Block
22+
%void = OpTypeVoid
23+
%3 = OpTypeFunction %void
24+
%int = OpTypeInt 32 1
25+
%ulong = OpTypeInt 64 0
26+
%Registers = OpTypeStruct %ulong %ulong
27+
%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers
28+
%registers = OpVariable %_ptr_PushConstant_Registers PushConstant
29+
%int_0 = OpConstant %int 0
30+
%int_1 = OpConstant %int 1
31+
%ulong_4 = OpConstant %ulong 4
32+
%_ptr_PushConstant_ulong = OpTypePointer PushConstant %ulong
33+
%_ptr_PhysicalStorageBuffer_int = OpTypePointer PhysicalStorageBuffer %int
34+
%main = OpFunction %void None %3
35+
%5 = OpLabel
36+
37+
%pc0 = OpAccessChain %_ptr_PushConstant_ulong %registers %int_0
38+
%addr0 = OpLoad %ulong %pc0
39+
%src_p = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %addr0
40+
41+
%pc1 = OpAccessChain %_ptr_PushConstant_ulong %registers %int_1
42+
%addr1 = OpLoad %ulong %pc1
43+
%dst_p = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %addr1
44+
45+
; Volatile load from src — must NOT be forwarded
46+
%ld = OpLoad %int %src_p Volatile|Aligned 4
47+
48+
; Store the loaded value into dst (first use)
49+
OpStore %dst_p %ld Aligned 4
50+
51+
; Compute dst + 1 element via integer arithmetic
52+
%dst_u64 = OpConvertPtrToU %ulong %dst_p
53+
%dst_plus4 = OpIAdd %ulong %dst_u64 %ulong_4
54+
%dst_p2 = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %dst_plus4
55+
56+
; Store the same loaded value again (second use)
57+
OpStore %dst_p2 %ld Aligned 4
58+
59+
OpReturn
60+
OpFunctionEnd
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; SPIR-V
2+
; Version: 1.0
3+
; Generator: Khronos Glslang Reference Front End; 10
4+
; Bound: 40
5+
; Schema: 0
6+
OpCapability Shader
7+
OpCapability Int64
8+
OpCapability PhysicalStorageBufferAddresses
9+
OpExtension "SPV_KHR_physical_storage_buffer"
10+
%1 = OpExtInstImport "GLSL.std.450"
11+
OpMemoryModel PhysicalStorageBuffer64 GLSL450
12+
OpEntryPoint GLCompute %main "main"
13+
OpExecutionMode %main LocalSize 1 1 1
14+
OpName %main "main"
15+
OpName %Registers "Registers"
16+
OpMemberName %Registers 0 "addr"
17+
OpMemberName %Registers 1 "addr2"
18+
OpName %registers "registers"
19+
OpMemberDecorate %Registers 0 Offset 0
20+
OpMemberDecorate %Registers 1 Offset 8
21+
OpDecorate %Registers Block
22+
%void = OpTypeVoid
23+
%3 = OpTypeFunction %void
24+
%int = OpTypeInt 32 1
25+
%ulong = OpTypeInt 64 0
26+
%Registers = OpTypeStruct %ulong %ulong
27+
%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers
28+
%registers = OpVariable %_ptr_PushConstant_Registers PushConstant
29+
%int_0 = OpConstant %int 0
30+
%int_1 = OpConstant %int 1
31+
%ulong_4 = OpConstant %ulong 4
32+
%_ptr_PushConstant_ulong = OpTypePointer PushConstant %ulong
33+
%_ptr_PhysicalStorageBuffer_int = OpTypePointer PhysicalStorageBuffer %int
34+
%main = OpFunction %void None %3
35+
%5 = OpLabel
36+
37+
%pc0 = OpAccessChain %_ptr_PushConstant_ulong %registers %int_0
38+
%addr0 = OpLoad %ulong %pc0
39+
%src_p = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %addr0
40+
41+
%pc1 = OpAccessChain %_ptr_PushConstant_ulong %registers %int_1
42+
%addr1 = OpLoad %ulong %pc1
43+
%dst_p = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %addr1
44+
45+
; Volatile load from src — must NOT be forwarded
46+
%ld = OpLoad %int %src_p Volatile|Aligned 4
47+
48+
; Store the loaded value into dst (first use)
49+
OpStore %dst_p %ld Aligned 4
50+
51+
; Compute dst + 1 element via integer arithmetic
52+
%dst_u64 = OpConvertPtrToU %ulong %dst_p
53+
%dst_plus4 = OpIAdd %ulong %dst_u64 %ulong_4
54+
%dst_p2 = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %dst_plus4
55+
56+
; Store the same loaded value again (second use)
57+
OpStore %dst_p2 %ld Aligned 4
58+
59+
OpReturn
60+
OpFunctionEnd

spirv_glsl.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12649,6 +12649,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
1264912649
// If an expression is mutable and forwardable, we speculate that it is immutable.
1265012650
bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
1265112651

12652+
// Volatile memory access requires the value be read exactly once from
12653+
// memory. Do not forward the expression so that re-evaluation at each
12654+
// use site cannot re-read potentially modified memory.
12655+
// FIXME: To force implementations to actually respect the volatile nature of the load,
12656+
// the block itself must be marked volatile, or VulkanMM is used to do an explicit volatile load.
12657+
if (forward && length >= 4 && (ops[3] & MemoryAccessVolatileMask) != 0)
12658+
forward = false;
12659+
1265212660
// If loading a non-native row-major matrix, mark the expression as need_transpose.
1265312661
bool need_transpose = false;
1265412662
bool old_need_transpose = false;

0 commit comments

Comments
 (0)