[llvm] Remove fold immediates from shrink (PR #68045)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 2 15:23:40 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
<details>
<summary>Changes</summary>
This includes https://github.com/llvm/llvm-project/pull/67878, I am not sure how to create stacked reviews on github. The second commit in the PR actually removes SIShrinkInstructions::foldImmediates.
---
Patch is 212.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/68045.diff
22 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+2)
- (modified) llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp (-62)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll (+2-3)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll (+69-92)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll (+69-92)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll (+7-7)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll (+23-23)
- (modified) llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll (+1-2)
- (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline.ll (+2)
- (modified) llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll (+40-66)
- (modified) llvm/test/CodeGen/AMDGPU/sdiv64.ll (+3-6)
- (modified) llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll (+645-653)
- (modified) llvm/test/CodeGen/AMDGPU/srem64.ll (+3-6)
- (modified) llvm/test/CodeGen/AMDGPU/udiv64.ll (+1-2)
- (modified) llvm/test/CodeGen/AMDGPU/urem64.ll (+1-2)
- (modified) llvm/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir (+3-3)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index bcbc03eb2559c4f..a674c52667c684b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1199,6 +1199,8 @@ void GCNPassConfig::addMachineSSAOptimization() {
}
addPass(&DeadMachineInstructionElimID);
addPass(createSIShrinkInstructionsPass());
+ if (TM->getOptLevel() > CodeGenOptLevel::Less)
+ addPass(&SIFoldOperandsID);
}
bool GCNPassConfig::addILPOpts() {
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 4159dc694c1e037..6dc749240e45cb2 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -19,8 +19,6 @@
STATISTIC(NumInstructionsShrunk,
"Number of 64-bit instruction reduced to 32-bit.");
-STATISTIC(NumLiteralConstantsFolded,
- "Number of literal constants folded into 32-bit instructions.");
using namespace llvm;
@@ -40,7 +38,6 @@ class SIShrinkInstructions : public MachineFunctionPass {
SIShrinkInstructions() : MachineFunctionPass(ID) {
}
- bool foldImmediates(MachineInstr &MI, bool TryToCommute = true) const;
bool shouldShrinkTrue16(MachineInstr &MI) const;
bool isKImmOperand(const MachineOperand &Src) const;
bool isKUImmOperand(const MachineOperand &Src) const;
@@ -84,64 +81,6 @@ FunctionPass *llvm::createSIShrinkInstructionsPass() {
return new SIShrinkInstructions();
}
-/// This function checks \p MI for operands defined by a move immediate
-/// instruction and then folds the literal constant into the instruction if it
-/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
-bool SIShrinkInstructions::foldImmediates(MachineInstr &MI,
- bool TryToCommute) const {
- assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
-
- int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
-
- // Try to fold Src0
- MachineOperand &Src0 = MI.getOperand(Src0Idx);
- if (Src0.isReg()) {
- Register Reg = Src0.getReg();
- if (Reg.isVirtual()) {
- MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
- if (Def && Def->isMoveImmediate()) {
- MachineOperand &MovSrc = Def->getOperand(1);
- bool ConstantFolded = false;
-
- if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) {
- if (MovSrc.isImm() &&
- (isInt<32>(MovSrc.getImm()) || isUInt<32>(MovSrc.getImm()))) {
- Src0.ChangeToImmediate(MovSrc.getImm());
- ConstantFolded = true;
- } else if (MovSrc.isFI()) {
- Src0.ChangeToFrameIndex(MovSrc.getIndex());
- ConstantFolded = true;
- } else if (MovSrc.isGlobal()) {
- Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
- MovSrc.getTargetFlags());
- ConstantFolded = true;
- }
- }
-
- if (ConstantFolded) {
- if (MRI->use_nodbg_empty(Reg))
- Def->eraseFromParent();
- ++NumLiteralConstantsFolded;
- return true;
- }
- }
- }
- }
-
- // We have failed to fold src0, so commute the instruction and try again.
- if (TryToCommute && MI.isCommutable()) {
- if (TII->commuteInstruction(MI)) {
- if (foldImmediates(MI, false))
- return true;
-
- // Commute back.
- TII->commuteInstruction(MI);
- }
- }
-
- return false;
-}
-
/// Do not shrink the instruction if its registers are not expressible in the
/// shrunk encoding.
bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {
@@ -1010,7 +949,6 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
Inst32->findRegisterDefOperand(VCCReg)->setIsDead();
MI.eraseFromParent();
- foldImmediates(*Inst32);
LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
index 26d1fbb09210c64..e9f30e8503b310e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
@@ -165,9 +165,8 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
; GFX7-LABEL: v_add_v2i16_neg_inline_imm_splat:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_movk_i32 s4, 0xffc0
-; GFX7-NEXT: v_add_i32_e32 v0, vcc, s4, v0
-; GFX7-NEXT: v_add_i32_e32 v1, vcc, s4, v1
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0xffffffc0, v0
+; GFX7-NEXT: v_add_i32_e32 v1, vcc, 0xffffffc0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_add_v2i16_neg_inline_imm_splat:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index cded5c94edf8cc3..c78d4533f4ddd3f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -231,14 +231,12 @@ define i16 @v_saddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0
-; GFX6-NEXT: s_brev_b32 s5, 1
; GFX6-NEXT: v_min_i32_e32 v5, 0, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT: s_brev_b32 s4, -2
; GFX6-NEXT: v_max_i32_e32 v4, 0, v0
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s5, v5
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s4, v4
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x7fffffff, v4
; GFX6-NEXT: v_max_i32_e32 v1, v5, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v4
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
@@ -246,8 +244,8 @@ define i16 @v_saddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
; GFX6-NEXT: v_min_i32_e32 v4, 0, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3
; GFX6-NEXT: v_max_i32_e32 v3, 0, v1
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s5, v4
-; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s4, v3
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0x7fffffff, v3
; GFX6-NEXT: v_max_i32_e32 v2, v4, v2
; GFX6-NEXT: v_min_i32_e32 v2, v2, v3
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2
@@ -512,15 +510,15 @@ define i32 @v_saddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v7, 24, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT: s_brev_b32 s4, -2
; GFX6-NEXT: v_max_i32_e32 v8, 0, v0
; GFX6-NEXT: v_sub_i32_e32 v10, vcc, s5, v10
-; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s4, v8
+; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 0x7fffffff, v8
; GFX6-NEXT: v_max_i32_e32 v1, v10, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v8
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v2
; GFX6-NEXT: v_min_i32_e32 v8, 0, v1
+; GFX6-NEXT: s_brev_b32 s4, -2
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v5
; GFX6-NEXT: v_max_i32_e32 v5, 0, v1
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s5, v8
@@ -1265,19 +1263,17 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; GFX6-LABEL: v_saddsat_v2i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: s_brev_b32 s5, 1
; GFX6-NEXT: v_min_i32_e32 v5, 0, v0
-; GFX6-NEXT: s_brev_b32 s4, -2
; GFX6-NEXT: v_max_i32_e32 v4, 0, v0
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s5, v5
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s4, v4
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x7fffffff, v4
; GFX6-NEXT: v_max_i32_e32 v2, v5, v2
; GFX6-NEXT: v_min_i32_e32 v2, v2, v4
; GFX6-NEXT: v_min_i32_e32 v4, 0, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: v_max_i32_e32 v2, 0, v1
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s5, v4
-; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s4, v2
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0x7fffffff, v2
; GFX6-NEXT: v_max_i32_e32 v3, v4, v3
; GFX6-NEXT: v_min_i32_e32 v2, v3, v2
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2
@@ -1286,19 +1282,17 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; GFX8-LABEL: v_saddsat_v2i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_brev_b32 s5, 1
; GFX8-NEXT: v_min_i32_e32 v5, 0, v0
-; GFX8-NEXT: s_brev_b32 s4, -2
; GFX8-NEXT: v_max_i32_e32 v4, 0, v0
-; GFX8-NEXT: v_sub_u32_e32 v5, vcc, s5, v5
-; GFX8-NEXT: v_sub_u32_e32 v4, vcc, s4, v4
+; GFX8-NEXT: v_sub_u32_e32 v5, vcc, 0x80000000, v5
+; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0x7fffffff, v4
; GFX8-NEXT: v_max_i32_e32 v2, v5, v2
; GFX8-NEXT: v_min_i32_e32 v2, v2, v4
; GFX8-NEXT: v_min_i32_e32 v4, 0, v1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_max_i32_e32 v2, 0, v1
-; GFX8-NEXT: v_sub_u32_e32 v4, vcc, s5, v4
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s4, v2
+; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0x80000000, v4
+; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 0x7fffffff, v2
; GFX8-NEXT: v_max_i32_e32 v3, v4, v3
; GFX8-NEXT: v_min_i32_e32 v2, v3, v2
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
@@ -1383,26 +1377,25 @@ define <3 x i32> @v_saddsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
; GFX6-LABEL: v_saddsat_v3i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: s_brev_b32 s5, 1
; GFX6-NEXT: v_min_i32_e32 v7, 0, v0
-; GFX6-NEXT: s_brev_b32 s4, -2
; GFX6-NEXT: v_max_i32_e32 v6, 0, v0
-; GFX6-NEXT: v_sub_i32_e32 v7, vcc, s5, v7
-; GFX6-NEXT: v_sub_i32_e32 v6, vcc, s4, v6
+; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0x80000000, v7
+; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0x7fffffff, v6
; GFX6-NEXT: v_max_i32_e32 v3, v7, v3
; GFX6-NEXT: v_min_i32_e32 v3, v3, v6
; GFX6-NEXT: v_min_i32_e32 v6, 0, v1
+; GFX6-NEXT: s_brev_b32 s4, -2
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v3
; GFX6-NEXT: v_max_i32_e32 v3, 0, v1
-; GFX6-NEXT: v_sub_i32_e32 v6, vcc, s5, v6
+; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0x80000000, v6
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s4, v3
; GFX6-NEXT: v_max_i32_e32 v4, v6, v4
; GFX6-NEXT: v_min_i32_e32 v3, v4, v3
; GFX6-NEXT: v_min_i32_e32 v4, 0, v2
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3
; GFX6-NEXT: v_max_i32_e32 v3, 0, v2
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s5, v4
-; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s4, v3
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0x7fffffff, v3
; GFX6-NEXT: v_max_i32_e32 v4, v4, v5
; GFX6-NEXT: v_min_i32_e32 v3, v4, v3
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
@@ -1411,26 +1404,25 @@ define <3 x i32> @v_saddsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
; GFX8-LABEL: v_saddsat_v3i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_brev_b32 s5, 1
; GFX8-NEXT: v_min_i32_e32 v7, 0, v0
-; GFX8-NEXT: s_brev_b32 s4, -2
; GFX8-NEXT: v_max_i32_e32 v6, 0, v0
-; GFX8-NEXT: v_sub_u32_e32 v7, vcc, s5, v7
-; GFX8-NEXT: v_sub_u32_e32 v6, vcc, s4, v6
+; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 0x80000000, v7
+; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 0x7fffffff, v6
; GFX8-NEXT: v_max_i32_e32 v3, v7, v3
; GFX8-NEXT: v_min_i32_e32 v3, v3, v6
; GFX8-NEXT: v_min_i32_e32 v6, 0, v1
+; GFX8-NEXT: s_brev_b32 s4, -2
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3
; GFX8-NEXT: v_max_i32_e32 v3, 0, v1
-; GFX8-NEXT: v_sub_u32_e32 v6, vcc, s5, v6
+; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 0x80000000, v6
; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s4, v3
; GFX8-NEXT: v_max_i32_e32 v4, v6, v4
; GFX8-NEXT: v_min_i32_e32 v3, v4, v3
; GFX8-NEXT: v_min_i32_e32 v4, 0, v2
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
; GFX8-NEXT: v_max_i32_e32 v3, 0, v2
-; GFX8-NEXT: v_sub_u32_e32 v4, vcc, s5, v4
-; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s4, v3
+; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0x80000000, v4
+; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 0x7fffffff, v3
; GFX8-NEXT: v_max_i32_e32 v4, v4, v5
; GFX8-NEXT: v_min_i32_e32 v3, v4, v3
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3
@@ -1536,26 +1528,24 @@ define <4 x i32> @v_saddsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; GFX6-LABEL: v_saddsat_v4i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: s_brev_b32 s5, 1
; GFX6-NEXT: v_min_i32_e32 v9, 0, v0
-; GFX6-NEXT: s_brev_b32 s4, -2
; GFX6-NEXT: v_max_i32_e32 v8, 0, v0
-; GFX6-NEXT: v_sub_i32_e32 v9, vcc, s5, v9
-; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s4, v8
+; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 0x80000000, v9
+; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 0x7fffffff, v8
; GFX6-NEXT: v_max_i32_e32 v4, v9, v4
; GFX6-NEXT: v_min_i32_e32 v4, v4, v8
; GFX6-NEXT: v_min_i32_e32 v8, 0, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v4
; GFX6-NEXT: v_max_i32_e32 v4, 0, v1
-; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s5, v8
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s4, v4
+; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 0x80000000, v8
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x7fffffff, v4
; GFX6-NEXT: v_max_i32_e32 v5, v8, v5
; GFX6-NEXT: v_min_i32_e32 v4, v5, v4
; GFX6-NEXT: v_min_i32_e32 v5, 0, v2
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v4
; GFX6-NEXT: v_max_i32_e32 v4, 0, v2
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s5, v5
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s4, v4
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x7fffffff, v4
; GFX6-NEXT: v_max_i32_e32 v5, v5, v6
; GFX6-NEXT: v_min_i32_e32 v4, v5, v4
; GFX6-NEXT: v_min_i32_e32 v5, 0, v3
@@ -1571,26 +1561,24 @@ define <4 x i32> @v_saddsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; GFX8-LABEL: v_saddsat_v4i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_brev_b32 s5, 1
; GFX8-NEXT: v_min_i32_e32 v9, 0, v0
-; GFX8-NEXT: s_brev_b32 s4, -2
; GFX8-NEXT: v_max_i32_e32 v8, 0, v0
-; GFX8-NEXT: v_sub_u32_e32 v9, vcc, s5, v9
-; GFX8-NEXT: v_sub_u32_e32 v8, vcc, s4, v8
+; GFX8-NEXT: v_sub_u32_e32 v9, vcc, 0x80000000, v9
+; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 0x7fffffff, v8
; GFX8-NEXT: v_max_i32_e32 v4, v9, v4
; GFX8-NEXT: v_min_i32_e32 v4, v4, v8
; GFX8-NEXT: v_min_i32_e32 v8, 0, v1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v4
; GFX8-NEXT: v_max_i32_e32 v4, 0, v1
-; GFX8-NEXT: v_sub_u32_e32 v8, vcc, s5, v8
-; GFX8-NEXT: v_sub_u32_e32 v4, vcc, s4, v4
+; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 0x80000000, v8
+; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0x7fffffff, v4
; GFX8-NEXT: v_max_i32_e32 v5, v8, v5
; GFX8-NEXT: v_min_i32_e32 v4, v5, v4
; GFX8-NEXT: v_min_i32_e32 v5, 0, v2
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v4
; GFX8-NEXT: v_max_i32_e32 v4, 0, v2
-; GFX8-NEXT: v_sub_u32_e32 v5, vcc, s5, v5
-; GFX8-NEXT: v_sub_u32_e32 v4, vcc, s4, v4
+; GFX8-NEXT: v_sub_u32_e32 v5, vcc, 0x80000000, v5
+; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 0x7fffffff, v4
; GFX8-NEXT: v_max_i32_e32 v5, v5, v6
; GFX8-NEXT: v_min_i32_e32 v4, v5, v4
; GFX8-NEXT: v_min_i32_e32 v5, 0, v3
@@ -1724,34 +1712,32 @@ define <5 x i32> @v_saddsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
; GFX6-LABEL: v_saddsat_v5i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: s_brev_b32 s5, 1
-; GFX6-NEXT: v_min_i32_e32 v12, 0, v0
-; GFX6-NEXT: s_brev_b32 s4, -2
+; GFX6-NEXT: v_min_i32_e32 v11, 0, v0
; GFX6-NEXT: v_max_i32_e32 v10, 0, v0
-; GFX6-NEXT: v_sub_i32_e32 v12, vcc, s5, v12
-; GFX6-NEXT: v_sub_i32_e32 v10, vcc, s4, v10
-; GFX6-NEXT: v_max_i32_e32 v5, v12, v5
+; GFX6-NEXT: v_sub_i32_e32 v11, vcc, 0x80000000, v11
+; GFX6-NEXT: v_sub_i32_e32 v10, vcc, 0x7fffffff, v10
+; GFX6-NEXT: v_max_i32_e32 v5, v11, v5
; GFX6-NEXT: v_min_i32_e32 v5, v5, v10
; GFX6-NEXT: v_min_i32_e32 v10, 0, v1
+; GFX6-NEXT: s_brev_b32 s4, -2
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v5
; GFX6-NEXT: v_max_i32_e32 v5, 0, v1
-; GFX6-NEXT: v_sub_i32_e32 v10, vcc, s5, v10
+; GFX6-NEXT: v_sub_i32_e32 v10, vcc, 0x80000000, v10
; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s4, v5
; GFX6-NEXT: v_max_i32_e32 v6, v10, v6
; GFX6-NEXT: v_min_i32_e32 v5, v6, v5
; GFX6-NEXT: v_min_i32_e32 v6, 0, v2
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v5
; GFX6-NEXT: v_max_i32_e32 v5, 0, v2
-; GFX6-NEXT: v_sub_i32_e32 v6, vcc, s5, v6
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s4, v5
+; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0x80000000, v6
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x7fffffff, v5
; GFX6-NEXT: v_max_i32_e32 v6, v6, v7
; GFX6-NEXT: v_min_i32_e32 v5, v6, v5
; GFX6-NEXT: v_min_i32_e32 v6, 0, v3
-; GFX6-NEXT: v_bfrev_b32_e32 v11, -2
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v5
; GFX6-NEXT: v_max_i32_e32 v5, 0, v3
; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0x80000000, v6
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v11, v5
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x7fffffff, v5
; GFX6-NEXT: v_max_i32_e32 v6, v6, v8
; GFX6-NEXT: v_min_i32_e32 v5, v6, v5
; GFX6-NEXT: v_min_i32_e32 v6, 0, v4
@@ -1767,34 +1753,32 @@ define <5 x i32> @v_saddsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
; GFX8-LABEL: v_saddsat_v5i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_brev_b32 s5, 1
-; GFX8-NEXT: v_min_i32_e32 v12, 0, v0
-; GFX8-NEXT: s_brev_b32 s4, -2
+; GFX8-NEXT: v_min_i32_e32 v11, 0, v0
; GFX8-NEXT: v_max_i32_e32 v10, 0, v0
-; GFX8-NEXT: v_sub_u32_e32 v12, vcc, s5, v12
-; GFX8-NEXT: v_sub_u32_e32 v10, vcc, s4, v10
-; GFX8-NEXT: v_max_i32_e32 v5, v12, v5
+; GFX8-NEXT: v_sub_u32_e32 v11, vcc, 0x80000000, v11
+; GFX8-NEXT: v_sub_u32_e32 v10, vcc, 0x7fffffff, v10
+; GFX8-NEXT: v_max_i32_e32 v5, v11, v5
; GFX8-NEXT: v_min_i32_e32 v5, v5, v10
; GFX8-NEXT: v_min_i32_e32 v10, 0, v1
+; GFX8-NEXT: s_brev_b32 s4, -2
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5
; GFX8-NEXT: v_max_i32_e32 v5, 0, v1
-; GFX8-NEXT: v_sub_u32_e32 v10, vcc, s5, v10
+; GFX8-NEXT: v_sub_u32_e32 v10, vcc, 0x80000000, v10
; GFX8-NEXT: v_sub_u32_e32 v5, vcc, s4, v5
; GFX8-NEXT: v_max_i32_e32 v6, v10, v6
; GFX8-NEXT: v_min_i32_e32 v5, v6, v5
; GFX8-NEXT: v_min_i32_e32 v6, 0, v2
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v5
; GFX8-NEXT: v_max_i32_e32 v5, 0, v2
-; GFX8-NEXT: v_sub_u32_e32 v6, vcc, s5, v6
-; GFX8-NEXT: v_sub_u32_e32 v5, vcc, s4, v5
+; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 0x80000000, v6
+; GFX8-NEXT: v_sub_u32_e32 v5, vcc, 0x7fffffff, v5
; GFX8-NEXT: v_max_i32_e32 v6, v6, v7
; GFX8-NEXT: v_min_i32_e32 v5, v6, v5
; GFX8-NEXT: v_min_i32_e32 v6, 0, v3
-; GFX8-NEXT: v_bfrev_b32_e32 v11, -2
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5
; GFX8-NEXT: v_max_i32_e32 v5, 0, v3
; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 0x80000000, v6
-; GFX8-NEXT: v_sub_u32_e32 v5, vcc, v11, v5
+; GFX8-NEXT: v_sub_u32_e32 v5, vcc, 0x7fffffff, v5
; GFX8-NEXT: v_max_i32_e32 v6, v6, v8
; GFX8-NEXT: v_min_i32_e32 v5, v6, v5
; GFX8-NEXT: v_min_i32_e32 v6, 0, v4
@@ -2766,13 +2750,11 @@ define <2 x i16> @v_saddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT: s_brev_b32 s5, 1
; GFX6-NEXT: v_min_i32_e32 v5, 0, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX6-NEXT: s_brev_b32 s4, -2
; GFX6-NEXT: v_max_i32_e32 v4, 0, v0
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s5, v5
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s4, v4
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x7fffffff, v4
; GFX6-NEXT: v_max_i32_e32 v2, v5, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_min_i32_e32 v2, v2, v4
@@ -2780,8 +2762,8 @@ define <2 x i16> @v_saddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/68045
More information about the llvm-commits
mailing list