[llvm] e37997c - [AMDGPU] Simplify test and extend to gfx9 and gfx10
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 27 08:57:14 PST 2020
Author: Jay Foad
Date: 2020-01-27T16:56:40Z
New Revision: e37997cc0de0f9dc8b9b9d6efa706c7283fb04b7
URL: https://github.com/llvm/llvm-project/commit/e37997cc0de0f9dc8b9b9d6efa706c7283fb04b7
DIFF: https://github.com/llvm/llvm-project/commit/e37997cc0de0f9dc8b9b9d6efa706c7283fb04b7.diff
LOG: [AMDGPU] Simplify test and extend to gfx9 and gfx10
Summary:
This is in preparation for adding more test cases for D69661 and other
bug fixes in the same area.
Reviewers: tpr, dstuttard, critson, nhaehnle, arsenm
Subscribers: kzhuravl, jvesely, wdng, yaxunl, t-tye, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70708
Added:
Modified:
llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
index 330426fb0ad1..d3262abad36a 100644
--- a/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
+++ b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
@@ -1,171 +1,87 @@
-# RUN: llc -run-pass si-insert-waitcnts -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s
---- |
-
- define amdgpu_kernel void @vccz_corrupt_workaround(float %cond, i32 addrspace(1)* %out) #0 {
- entry:
- %cmp0 = fcmp oeq float %cond, 0.000000e+00
- br i1 %cmp0, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
-
- else: ; preds = %entry
- store volatile i32 100, i32 addrspace(1)* undef
- br label %done, !structurizecfg.uniform !0
-
- if: ; preds = %entry
- store volatile i32 9, i32 addrspace(1)* undef
- br label %done, !structurizecfg.uniform !0
-
- done: ; preds = %if, %else
- %value = phi i32 [ 0, %if ], [ 1, %else ]
- store i32 %value, i32 addrspace(1)* %out
- ret void
- }
-
- define amdgpu_kernel void @vccz_corrupt_undef_vcc(float %cond, i32 addrspace(1)* %out) #0 {
- entry:
- br i1 undef, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
-
- else: ; preds = %entry
- store volatile i32 100, i32 addrspace(1)* undef
- br label %done, !structurizecfg.uniform !0
-
- if: ; preds = %entry
- store volatile i32 9, i32 addrspace(1)* undef
- br label %done, !structurizecfg.uniform !0
-
- done: ; preds = %if, %else
- %value = phi i32 [ 0, %if ], [ 1, %else ]
- store i32 %value, i32 addrspace(1)* %out
- ret void
- }
-
- attributes #0 = { nounwind }
- attributes #1 = { readnone }
-
- !0 = !{}
-
-...
+# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s -check-prefixes=CHECK,SI
+# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx900 -o - %s | FileCheck %s -check-prefixes=CHECK,GFX9
+# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -o - %s | FileCheck %s -check-prefixes=CHECK,GFX10
---
# CHECK-LABEL: name: vccz_corrupt_workaround
# CHECK: $vcc = V_CMP_EQ_F32
-# CHECK-NEXT: S_WAITCNT 127
-# CHECK-NEXT: $vcc = S_MOV_B64 $vcc
+# SI-NEXT: S_WAITCNT 127
+# SI-NEXT: $vcc = S_MOV_B64 $vcc
# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit killed $vcc
-name: vccz_corrupt_workaround
-alignment: 1
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
+name: vccz_corrupt_workaround
tracksRegLiveness: true
-liveins:
- - { reg: '$sgpr0_sgpr1' }
-frameInfo:
- isFrameAddressTaken: false
- isReturnAddressTaken: false
- hasStackMap: false
- hasPatchPoint: false
- stackSize: 0
- offsetAdjustment: 0
- maxAlignment: 0
- adjustsStack: false
- hasCalls: false
- maxCallFrameSize: 0
- hasOpaqueSPAdjustment: false
- hasVAStart: false
- hasMustTailInVarArgFunc: false
-body: |
- bb.0.entry:
- liveins: $sgpr0_sgpr1, $vcc
-
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
$vcc = V_CMP_EQ_F32_e64 0, 0, 0, undef $sgpr2, 0, implicit $exec
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
- bb.2.if:
+ bb.2:
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.3
- bb.1.else:
+ bb.1:
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
- bb.3.done:
+ bb.3:
liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
# CHECK-LABEL: name: vccz_corrupt_undef_vcc
-# CHECK: S_WAITCNT 3855
+# CHECK: BUFFER_STORE_DWORD_OFFSET
+# SI-NEXT: S_WAITCNT 3855
# CHECK-NEXT: $vgpr0 = V_MOV_B32_e32
-name: vccz_corrupt_undef_vcc
-alignment: 1
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
+name: vccz_corrupt_undef_vcc
tracksRegLiveness: true
-liveins:
- - { reg: '$sgpr0_sgpr1' }
-frameInfo:
- isFrameAddressTaken: false
- isReturnAddressTaken: false
- hasStackMap: false
- hasPatchPoint: false
- stackSize: 0
- offsetAdjustment: 0
- maxAlignment: 0
- adjustsStack: false
- hasCalls: false
- maxCallFrameSize: 0
- hasOpaqueSPAdjustment: false
- hasVAStart: false
- hasMustTailInVarArgFunc: false
-body: |
- bb.0.entry:
+body: |
+ bb.0:
liveins: $sgpr0_sgpr1
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
- bb.2.if:
+ bb.2:
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.3
- bb.1.else:
+ bb.1:
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
- bb.3.done:
+ bb.3:
liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
More information about the llvm-commits
mailing list