[llvm] r319826 - AMDGPU: Fix SDWA crash on inline asm
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 5 12:32:01 PST 2017
Author: arsenm
Date: Tue Dec 5 12:32:01 2017
New Revision: 319826
URL: http://llvm.org/viewvc/llvm-project?rev=319826&view=rev
Log:
AMDGPU: Fix SDWA crash on inline asm
This was only searching for explicit defs,
and asserting for any implicit or variadic
instruction defs, like inline asm.
Modified:
llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp?rev=319826&r1=319825&r2=319826&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp Tue Dec 5 12:32:01 2017
@@ -324,7 +324,8 @@ static MachineOperand *findSingleRegDef(
return &DefMO;
}
- llvm_unreachable("invalid reg");
+ // Ignore implicit defs.
+ return nullptr;
}
uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll?rev=319826&r1=319825&r2=319826&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll Tue Dec 5 12:32:01 2017
@@ -497,3 +497,26 @@ entry:
store <8 x i8> %tmp19, <8 x i8> addrspace(1)* %arrayidx5, align 8
ret void
}
+
+; GCN-LABEL: {{^}}sdwa_crash_inlineasm_de
+; GCN: s_mov_b32 s{{[0-9]+}}, 0xffff
+; GCN: v_and_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
+; GCN: v_or_b32_e32 v{{[0-9]+}}, 0x10000,
+define amdgpu_kernel void @sdwa_crash_inlineasm_def() #0 {
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb11, %bb
+ %tmp = phi <2 x i32> [ %tmp12, %bb11 ], [ undef, %bb ]
+ br i1 true, label %bb2, label %bb11
+
+bb2: ; preds = %bb1
+ %tmp3 = call i32 asm "v_and_b32_e32 $0, $1, $2", "=v,s,v"(i32 65535, i32 undef) #1
+ %tmp5 = or i32 %tmp3, 65536
+ %tmp6 = insertelement <2 x i32> %tmp, i32 %tmp5, i64 0
+ br label %bb11
+
+bb11: ; preds = %bb10, %bb2
+ %tmp12 = phi <2 x i32> [ %tmp6, %bb2 ], [ %tmp, %bb1 ]
+ br label %bb1
+}
More information about the llvm-commits
mailing list