[llvm] EarlyCSE: create casts on type-mismatch (PR #113339)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 22 09:39:26 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
Author: Ramkumar Ramachandra (artagnon)
<details>
<summary>Changes</summary>
getOrCreateResult suffers from the deficiency that it doesn't attempt to create casts when types mismatch. Fix this deficiency, making EarlyCSE more powerful.
-- 8< --
Based on #<!-- -->113338.
---
Patch is 314.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113339.diff
8 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/EarlyCSE.cpp (+38-24)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+330-315)
- (modified) llvm/test/CodeGen/NVPTX/load-store.ll (+1641-568)
- (modified) llvm/test/CodeGen/PowerPC/big-endian-store-forward.ll (+8-3)
- (modified) llvm/test/CodeGen/PowerPC/p10-spill-creq.ll (+28-34)
- (modified) llvm/test/Transforms/EarlyCSE/invariant.start.ll (+18-12)
- (modified) llvm/test/Transforms/EarlyCSE/opaque-ptr.ll (+6-10)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index a1dbb4e1d5e75f..9714611cda8b0f 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -964,32 +965,45 @@ class EarlyCSE {
bool overridingStores(const ParseMemoryInst &Earlier,
const ParseMemoryInst &Later);
- Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
- // TODO: We could insert relevant casts on type mismatch here.
- if (auto *LI = dyn_cast<LoadInst>(Inst))
- return LI->getType() == ExpectedType ? LI : nullptr;
- if (auto *SI = dyn_cast<StoreInst>(Inst)) {
- Value *V = SI->getValueOperand();
- return V->getType() == ExpectedType ? V : nullptr;
+ Value *getOrCreateResult(Instruction *Inst, Type *ExpectedType) const {
+ if (!isa<IntrinsicInst, LoadInst, StoreInst>(Inst))
+ llvm_unreachable("Instruction not supported");
+
+ // The load or the store's first operand.
+ Value *V;
+ if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
+ if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::masked_load:
+ V = II;
+ break;
+ case Intrinsic::masked_store:
+ V = II->getOperand(0);
+ break;
+ default:
+ return nullptr;
+ }
+ else
+ return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
+ } else {
+ V = isa<LoadInst>(Inst) ? Inst : cast<StoreInst>(Inst)->getValueOperand();
}
- assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
- auto *II = cast<IntrinsicInst>(Inst);
- if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
- return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType);
- return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
- }
- Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II,
- Type *ExpectedType) const {
- // TODO: We could insert relevant casts on type mismatch here.
- switch (II->getIntrinsicID()) {
- case Intrinsic::masked_load:
- return II->getType() == ExpectedType ? II : nullptr;
- case Intrinsic::masked_store: {
- Value *V = II->getOperand(0);
- return V->getType() == ExpectedType ? V : nullptr;
- }
- }
+ Type *ActualType = V->getType();
+ BasicBlock *TheBB = Inst->getParent();
+
+ // First handle the case when no cast is required.
+ if (ActualType == ExpectedType)
+ return V;
+
+ // Try to create BitCast, SExt, or Trunc.
+ IRBuilder<> Builder(TheBB, std::next(Inst->getIterator()));
+ if (CastInst::castIsValid(Instruction::BitCast, V, ExpectedType))
+ return Builder.CreateBitCast(V, ExpectedType);
+ if (CastInst::castIsValid(Instruction::SExt, V, ExpectedType))
+ return Builder.CreateSExt(V, ExpectedType);
+ if (CastInst::castIsValid(Instruction::Trunc, V, ExpectedType))
+ return Builder.CreateTrunc(V, ExpectedType);
return nullptr;
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index c3694158e7b971..6fe26286b74c22 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -3683,7 +3683,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[LOAD2]](s8)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
@@ -3720,7 +3720,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-NEXT: G_STORE [[COPY18]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32)
- ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
+ ; CHECK-NEXT: G_STORE [[SEXT]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index 86254329923971..4a81bec15411fb 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -4,7 +4,7 @@
define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 %arg2, i1 %arg3, i1 %arg4, i1 %arg5, i1 %arg6, ptr addrspace(3) %arg7, ptr addrspace(3) %arg8, ptr addrspace(3) %arg9, ptr addrspace(3) %arg10) {
; GFX90A-LABEL: name: f1
; GFX90A: bb.0.bb:
- ; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GFX90A-NEXT: successors: %bb.56(0x40000000), %bb.1(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr15, $sgpr10_sgpr11
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $sgpr32 = S_MOV_B32 0
@@ -30,44 +30,25 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr2 = DS_READ_B32_gfx9 renamable $vgpr3, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(3) null`, align 8, addrspace 3)
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr24_sgpr25, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_VCCZ %bb.2, implicit $vcc
+ ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.56, implicit $vcc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.1.bb103:
- ; GFX90A-NEXT: successors: %bb.59(0x40000000), %bb.2(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr42_sgpr43, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $sgpr30_sgpr31 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr26_sgpr27, implicit-def dead $scc
- ; GFX90A-NEXT: $vgpr22 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr10 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr24 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr18 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr20 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.59, implicit $vcc
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.2:
- ; GFX90A-NEXT: successors: %bb.3(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr22, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3, $vgpr10, $vgpr24, $vgpr18, $vgpr20
+ ; GFX90A-NEXT: bb.1:
+ ; GFX90A-NEXT: successors: %bb.60(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $sgpr23 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr23 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr25 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr23 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $sgpr28_sgpr29 = S_MOV_B64 0
+ ; GFX90A-NEXT: S_BRANCH %bb.60
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.3.Flow17:
- ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.58(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr23, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr30 = V_AND_B32_e32 1023, $vgpr31, implicit $exec
- ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr30_sgpr31, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_VCCZ %bb.58, implicit $vcc
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.4.bb15:
- ; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000)
+ ; GFX90A-NEXT: bb.2.bb15:
+ ; GFX90A-NEXT: successors: %bb.33(0x40000000), %bb.3(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr42_sgpr43
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr2_vgpr3, implicit $exec
@@ -78,10 +59,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr40, renamable $vcc = V_ADD_CO_U32_e64 $vgpr46, killed $vgpr0, 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr41, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr47, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr26_sgpr27, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.35, implicit $vcc
+ ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.33, implicit $vcc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.5:
- ; GFX90A-NEXT: successors: %bb.6(0x80000000)
+ ; GFX90A-NEXT: bb.3:
+ ; GFX90A-NEXT: successors: %bb.4(0x80000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr42_sgpr43
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1
@@ -108,96 +89,96 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr50 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr51 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.6.Flow20:
- ; GFX90A-NEXT: successors: %bb.7(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.4.Flow20:
+ ; GFX90A-NEXT: successors: %bb.5(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr19 = COPY renamable $sgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr18 = COPY $sgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr21 = COPY $sgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr20 = COPY $sgpr15, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr52 = COPY $sgpr15, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr25 = COPY $sgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr23 = COPY $sgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr22 = COPY $sgpr15, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr25 = COPY $sgpr15, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr24 = COPY $sgpr15, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.7.Flow19:
- ; GFX90A-NEXT: successors: %bb.63(0x40000000), %bb.8(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.5.Flow19:
+ ; GFX90A-NEXT: successors: %bb.65(0x40000000), %bb.6(0x40000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr25, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $vgpr52, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0
; GFX90A-NEXT: $sgpr30_sgpr31 = S_AND_SAVEEXEC_B64 $sgpr28_sgpr29, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.63, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.65, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.8.Flow32:
- ; GFX90A-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000)
+ ; GFX90A-NEXT: bb.6.Flow32:
+ ; GFX90A-NEXT: successors: %bb.7(0x40000000), %bb.8(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr30_sgpr3...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/113339
More information about the llvm-commits
mailing list