[llvm] 779cba7 - AMDGPU: Remove mayLoad/mayStore from some side effecting intrinsics
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 18 11:12:28 PDT 2020
Author: Matt Arsenault
Date: 2020-06-18T14:12:19-04:00
New Revision: 779cba79ec852c1c085c4aab79dc3514edd3efc5
URL: https://github.com/llvm/llvm-project/commit/779cba79ec852c1c085c4aab79dc3514edd3efc5
DIFF: https://github.com/llvm/llvm-project/commit/779cba79ec852c1c085c4aab79dc3514edd3efc5.diff
LOG: AMDGPU: Remove mayLoad/mayStore from some side effecting intrinsics
These don't really modify any memory, and should not expect memory
operands.
Added:
Modified:
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/BUFInstructions.td
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/SMInstructions.td
llvm/lib/Target/AMDGPU/SOPInstructions.td
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll
llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 033ca5a4acb5..eceaca32c0a2 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -209,10 +209,10 @@ def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent]>;
def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">,
- Intrinsic<[], [], [IntrConvergent]>;
+ Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent]>;
def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">,
- Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
+ Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
def int_amdgcn_div_scale : Intrinsic<
// 1st parameter: Numerator
@@ -1179,15 +1179,15 @@ def int_amdgcn_exp_compr : Intrinsic <[], [
def int_amdgcn_buffer_wbinvl1_sc :
GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
- Intrinsic<[], [], []>;
+ Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
def int_amdgcn_buffer_wbinvl1 :
GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">,
- Intrinsic<[], [], []>;
+ Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
def int_amdgcn_s_dcache_inv :
GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">,
- Intrinsic<[], [], []>;
+ Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
def int_amdgcn_s_memtime :
GCCBuiltin<"__builtin_amdgcn_s_memtime">,
@@ -1195,17 +1195,17 @@ def int_amdgcn_s_memtime :
def int_amdgcn_s_sleep :
GCCBuiltin<"__builtin_amdgcn_s_sleep">,
- Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]> {
+ Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]> {
}
def int_amdgcn_s_incperflevel :
GCCBuiltin<"__builtin_amdgcn_s_incperflevel">,
- Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]> {
+ Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]> {
}
def int_amdgcn_s_decperflevel :
GCCBuiltin<"__builtin_amdgcn_s_decperflevel">,
- Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]> {
+ Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]> {
}
def int_amdgcn_s_getreg :
@@ -1493,6 +1493,7 @@ def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty],
>;
// If false, set EXEC=0 for the current thread until the end of program.
+// FIXME: Should this be IntrNoMem, IntrHasSideEffects?
def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>;
// Copies the active channels of the source value to the destination value,
@@ -1532,11 +1533,11 @@ def int_amdgcn_is_private : GCCBuiltin<"__builtin_amdgcn_is_private">,
def int_amdgcn_s_dcache_inv_vol :
GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,
- Intrinsic<[], [], []>;
+ Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
def int_amdgcn_buffer_wbinvl1_vol :
GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">,
- Intrinsic<[], [], []>;
+ Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
//===----------------------------------------------------------------------===//
// VI Intrinsics
@@ -1562,11 +1563,11 @@ def int_amdgcn_update_dpp :
def int_amdgcn_s_dcache_wb :
GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
- Intrinsic<[], [], []>;
+ Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
def int_amdgcn_s_dcache_wb_vol :
GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
- Intrinsic<[], [], []>;
+ Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
def int_amdgcn_s_memrealtime :
GCCBuiltin<"__builtin_amdgcn_s_memrealtime">,
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 9954934e00a2..4bc9fd04b3de 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -374,7 +374,8 @@ class MUBUF_Invalidate <string opName, SDPatternOperator node = null_frag> :
let AsmMatchConverter = "";
let hasSideEffects = 1;
- let mayStore = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
// Set everything to 0.
let offen = 0;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index a28fd5203b7c..24113cf69f7e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -274,13 +274,14 @@ def S_OR_B32_term : WrapTerminatorInst<S_OR_B32>;
def S_ANDN2_B32_term : WrapTerminatorInst<S_ANDN2_B32>;
}
+
def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
[(int_amdgcn_wave_barrier)]> {
let SchedRW = [];
let hasNoSchedulingInfo = 1;
let hasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
let isConvergent = 1;
let FixedSize = 1;
let Size = 0;
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 252f191a2f66..70bf215c03f3 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -187,7 +187,7 @@ class SM_Time_Pseudo<string opName, SDPatternOperator node = null_frag> : SM_Pse
class SM_Inval_Pseudo <string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
opName, (outs), (ins), "", [(node)]> {
let hasSideEffects = 1;
- let mayStore = 1;
+ let mayStore = 0;
let has_sdst = 0;
let has_sbase = 0;
let has_offset = 0;
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index e2516c1af0e8..dd6363b1439f 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1149,7 +1149,7 @@ def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> {
let mayStore = 1;
}
-let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16",
[(int_amdgcn_s_waitcnt timm:$simm16)]>;
def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
@@ -1162,8 +1162,8 @@ def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16),
"s_sleep $simm16", [(int_amdgcn_s_sleep timm:$simm16)]> {
let hasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
}
def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">;
@@ -1188,14 +1188,14 @@ def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16",
[(int_amdgcn_s_incperflevel timm:$simm16)]> {
let hasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
}
def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16",
[(int_amdgcn_s_decperflevel timm:$simm16)]> {
let hasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
}
def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> {
let simm16 = 0;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll
index 4dc938c9b0a2..05b125fa14f9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll
@@ -5,13 +5,14 @@ declare void @llvm.amdgcn.buffer.wbinvl1.vol() #0
; GCN-LABEL: {{^}}test_buffer_wbinvl1_vol:
; GCN-NEXT: ; %bb.0:
-; CI-NEXT: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00]
-; VI-NEXT: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00]
-; GCN: s_endpgm
-define amdgpu_kernel void @test_buffer_wbinvl1_vol() #0 {
+; CI: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00]
+; VI: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00]
+; GCN: _store_byte
+; GCN-NEXT: s_endpgm
+define amdgpu_kernel void @test_buffer_wbinvl1_vol(i8 addrspace(1)* %ptr) #0 {
call void @llvm.amdgcn.buffer.wbinvl1.vol()
; This used to crash in hazard recognizer
- store i8 0, i8 addrspace(1)* undef, align 1
+ store i8 0, i8 addrspace(1)* %ptr, align 1
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir b/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir
index 149e1c234ee5..b3399ff2dfda 100644
--- a/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir
@@ -23,8 +23,8 @@ body: |
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN: $vcc_hi = IMPLICIT_DEF
- ; GCN: DS_WRITE_B32_gfx9 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (store 4, addrspace 3)
; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+ ; GCN: DS_WRITE_B32_gfx9 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (store 4, addrspace 3)
; GCN: $vgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GCN: $m0 = S_MOV_B32 0
; GCN: BUNDLE implicit $vgpr0, implicit $m0, implicit $exec {
More information about the llvm-commits
mailing list