[llvm] [GISel][AArch64][AMDGPU][RISCV] Canonicalize (sub X, C) -> (add X, -C) (PR #114309)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 30 14:55:56 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
@llvm/pr-subscribers-backend-amdgpu
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
This matches InstCombine and DAGCombine.
RISC-V only has an ADDI instruction so without this we need additional patterns to do the conversion.
Some of the AMDGPU tests look like possible regressions.
---
Patch is 439.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114309.diff
36 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+3)
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+10-2)
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+22)
- (modified) llvm/lib/Target/RISCV/RISCVGISel.td (-9)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir (+2-2)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir (+4-4)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir (+2-2)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll (+3-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll (+282-278)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll (+315-312)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll (+9-9)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll (+35-35)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll (+48-48)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll (+536-530)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll (+17-17)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll (+24-24)
- (modified) llvm/test/CodeGen/AMDGPU/ctlz.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/div_i128.ll (+16-12)
- (modified) llvm/test/CodeGen/AMDGPU/div_v2i128.ll (+144-136)
- (modified) llvm/test/CodeGen/AMDGPU/fptoi.i128.ll (+36-36)
- (modified) llvm/test/CodeGen/AMDGPU/itofp.i128.ll (+72-72)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll (+22-22)
- (modified) llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll (+565-453)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll (+9-9)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv32.mir (+3-2)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv64.mir (+6-4)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-medium-rv64.mir (+2-2)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv32.mir (+2-2)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv64.mir (+2-2)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-rv32.mir (+2-2)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-small-rv64.mir (+2-2)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9240a3c3127eb4..b09981eaef506e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -321,6 +321,9 @@ class CombinerHelper {
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
+ // Transform a G_SUB with constant on the RHS to G_ADD.
+ bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo);
+
// Transform a G_SHL with an extended source into a narrower shift if
// possible.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index ead4149fc11068..9891db5ceb6fa9 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -335,6 +335,13 @@ def mul_to_shl : GICombineRule<
[{ return Helper.matchCombineMulToShl(*${mi}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineMulToShl(*${mi}, ${matchinfo}); }])>;
+// (sub x, C) -> (add x, -C)
+def sub_to_add : GICombineRule<
+ (defs root:$d, build_fn_matchinfo:$matchinfo),
+ (match (G_SUB $d, $op1, $op2):$mi,
+ [{ return Helper.matchCombineSubToAdd(*${mi}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnNoErase(*${mi}, ${matchinfo}); }])>;
+
// shl ([asz]ext x), y => zext (shl x, y), if shift does not overflow int
def reduce_shl_of_extend_matchdata : GIDefMatchData<"RegisterImmPair">;
def reduce_shl_of_extend : GICombineRule<
@@ -1903,8 +1910,9 @@ def bitreverse_shift : GICombineGroup<[bitreverse_shl, bitreverse_lshr]>;
def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp,
select_to_iminmax, match_selects]>;
-def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
- mul_by_neg_one, idempotent_prop]>;
+def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, sub_to_add,
+ add_p2i_to_ptradd, mul_by_neg_one,
+ idempotent_prop]>;
def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
combine_fadd_fpext_fmul_to_fmad_or_fma, combine_fadd_fma_fmul_to_fmad_or_fma,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b7ddf9f479ef8e..91e5af9dfd8e25 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2041,6 +2041,28 @@ void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
Observer.changedInstr(MI);
}
+bool CombinerHelper::matchCombineSubToAdd(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SUB && "Expected a G_SUB");
+ auto MaybeImmVal =
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
+ APInt NegImm = -MaybeImmVal->Value;
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto NegCst = B.buildConstant(Ty, NegImm);
+ Observer.changingInstr(MI);
+ MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
+ MI.getOperand(2).setReg(NegCst.getReg(0));
+ MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
RegisterImmPair &MatchData) {
diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index 67e93b812421b4..40aae220fbd47e 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -96,15 +96,6 @@ def gi_sh2add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<2>">,
def gi_sh3add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<3>">,
GIComplexPatternEquiv<sh3add_uw_op>;
-// FIXME: Canonicalize (sub X, C) -> (add X, -C) earlier.
-def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)),
- (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>;
-
-let Predicates = [IsRV64] in {
-def : Pat<(i32 (sub GPR:$rs1, simm12Plus1i32:$imm)),
- (ADDIW GPR:$rs1, (i64 (NegImm $imm)))>;
-}
-
// Ptr type used in patterns with GlobalISelEmitter
def PtrVT : PtrValueTypeByHwMode<XLenVT, 0>;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
index 2f10a497fa74cb..5cbff0f0c74cb7 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
@@ -308,8 +308,8 @@ body: |
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %a:_(s64) = COPY $x0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 71
- ; CHECK-NEXT: %sub:_(s64) = G_SUB %a, [[C]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -71
+ ; CHECK-NEXT: %sub:_(s64) = G_ADD %a, [[C]]
; CHECK-NEXT: $x0 = COPY %sub(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%a:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
index f207e9c149a476..e9d4af7da5d06f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
@@ -88,8 +88,8 @@ body: |
; CHECK-LABEL: name: test_combine_trunc_sub_i128
; CHECK: %lhs:_(s128) = COPY $q0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s128)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
- ; CHECK-NEXT: %small:_(s32) = G_SUB [[TRUNC]], [[C]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -5
+ ; CHECK-NEXT: %small:_(s32) = G_ADD [[TRUNC]], [[C]]
; CHECK-NEXT: $w0 = COPY %small(s32)
%lhs:_(s128) = COPY $q0
%rhs:_(s128) = G_CONSTANT i128 5
@@ -103,8 +103,8 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_trunc_sub_i128_multi_use
; CHECK: %lhs:_(s128) = COPY $q0
- ; CHECK-NEXT: %rhs:_(s128) = G_CONSTANT i128 5
- ; CHECK-NEXT: %res:_(s128) = G_SUB %lhs, %rhs
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 -5
+ ; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, [[C]]
; CHECK-NEXT: %small:_(s32) = G_TRUNC %res(s128)
; CHECK-NEXT: $q0 = COPY %res(s128)
; CHECK-NEXT: $w0 = COPY %small(s32)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
index 04968dab3a37ce..591b6a17928cb1 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
@@ -95,7 +95,7 @@ body: |
%11:_(s8) = G_CONSTANT i8 1
; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
%7:_(s8) = G_SUB %2, %11
- ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
+ ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
G_BR %bb.3.exit
bb.3.exit:
; CHECK: bb.3.exit:
@@ -197,7 +197,7 @@ body: |
%7:_(s8) = G_CONSTANT i8 1
; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
%8:_(s8) = G_SUB %2, %7
- ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
+ ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
G_BR %bb.3.exit
bb.3.exit:
; CHECK: bb.3.exit:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
index 0900dd4267a2e4..bc3be691bd25a0 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
@@ -289,8 +289,8 @@ body: |
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %x:_(s32) = COPY $w0
- ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: %op:_(s32) = G_SUB %x, %cst
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
; CHECK-NEXT: $w0 = COPY %op(s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%x:_(s32) = COPY $w0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
index 63f5464371cc62..493e8cef638902 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
@@ -1669,7 +1669,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v3
; GFX6-NEXT: v_lshr_b64 v[6:7], v[0:1], v3
; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], v8
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 64, v3
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v3
; GFX6-NEXT: v_ashr_i64 v[10:11], v[4:5], v3
; GFX6-NEXT: v_or_b32_e32 v6, v6, v8
; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v5
@@ -1692,7 +1692,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v3
; GFX8-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
-; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3
; GFX8-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
; GFX8-NEXT: v_or_b32_e32 v6, v6, v8
; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v5
@@ -1715,7 +1715,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX9-NEXT: v_sub_u32_e32 v8, 64, v3
; GFX9-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
-; GFX9-NEXT: v_subrev_u32_e32 v2, 64, v3
+; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
; GFX9-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
; GFX9-NEXT: v_or_b32_e32 v6, v6, v8
; GFX9-NEXT: v_ashrrev_i32_e32 v8, 31, v5
@@ -1735,7 +1735,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_bfe_i32 v4, v2, 0, 1
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v3
-; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
+; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
; GFX10-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4
@@ -1758,7 +1758,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_bfe_i32 v4, v2, 0, 1
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v3
-; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
+; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
; GFX11-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX11-NEXT: v_ashrrev_i32_e32 v5, 31, v4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
index 405b1e8f3a250f..46d6b86789c778 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
@@ -1438,7 +1438,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
; SI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; SI-NEXT: v_ffbh_i32_e32 v3, 0
; SI-NEXT: v_add_i32_e32 v2, vcc, 32, v2
-; SI-NEXT: v_subrev_i32_e32 v3, vcc, 1, v3
+; SI-NEXT: v_add_i32_e32 v3, vcc, -1, v3
; SI-NEXT: v_mov_b32_e32 v1, 0
; SI-NEXT: v_min_u32_e32 v2, v3, v2
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
@@ -1456,7 +1456,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
; VI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; VI-NEXT: v_ffbh_i32_e32 v3, 0
; VI-NEXT: v_add_u32_e32 v2, vcc, 32, v2
-; VI-NEXT: v_subrev_u32_e32 v3, vcc, 1, v3
+; VI-NEXT: v_add_u32_e32 v3, vcc, -1, v3
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: v_min_u32_e32 v2, v3, v2
; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
index 146f344930a4ee..6e55d7fdb5e957 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
@@ -4101,7 +4101,7 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
; GFX10-NEXT: v_rcp_f32_e32 v1, 0x3f40e400
; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v0
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
-; GFX10-NEXT: v_subrev_nc_u32_e32 v0, 14, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v0, -14, v0
; GFX10-NEXT: v_mul_f32_e32 v1, v2, v1
; GFX10-NEXT: v_ldexp_f32 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -4112,10 +4112,9 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
; GFX11-NEXT: v_rcp_f32_e32 v1, 0x3f40e400
; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v0
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v0, 14, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
+; GFX11-NEXT: v_dual_mul_f32 v1, v2, v1 :: v_dual_add_nc_u32 v0, -14, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_ldexp_f32 v0, v1, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; EG-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
index 3bd3486ec261d4..5d76b542fad894 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
@@ -20,10 +20,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX6-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
-; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
+; GFX6-NEXT: v_add_i32_e32 v1, vcc, -7, v0
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
+; GFX6-NEXT: v_add_i32_e32 v1, vcc, -7, v0
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 6, v0
@@ -51,10 +51,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX8-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
-; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
+; GFX8-NEXT: v_add_u32_e32 v1, vcc, -7, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
+; GFX8-NEXT: v_add_u32_e32 v1, vcc, -7, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX8-NEXT: v_sub_u16_e32 v1, 6, v0
@@ -82,10 +82,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX9-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
+; GFX9-NEXT: v_add_u32_e32 v1, -7, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
+; GFX9-NEXT: v_add_u32_e32 v1, -7, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX9-NEXT: v_sub_u16_e32 v1, 6, v0
@@ -113,10 +113,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX10-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0
-; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v1, -7, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v1, -7, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0
@@ -150,11 +150,11 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX11-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0
-; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
+; GFX11-NEXT: v_add_nc_u32_e32 v1, -7, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
+; GFX11-NEXT: v_add_nc_u32_e32 v1, -7, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -189,10 +189,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX6-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, -7, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, -7, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 6, v2
@@ -219,10 +219,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX8-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, -7, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, -7, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX8-NEXT: v_sub_u16_e32 v3, 6, v2
@@ -249,10 +249,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX9-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
-; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
+; GFX9-NEXT: v_add_u32_e32 v3, -7, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
+; GFX9-NEXT: v_add_u32_e32 v3, -7, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX9-NEXT: v_sub_u16_e32 v3, 6, v2
@@ -279,10 +279,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX10-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
-; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
+; GFX10-NEXT: v_add_nc_u32_e32 v3, -7, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
+; GFX10-NEXT: v_add_nc_u32_e32 v3, -7, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX10-NEXT: v_sub_nc_u16 v3, 6, v2
@@ -315,11 +315,11 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX11-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3
-; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
+; GFX11-NEXT: v_add_nc_u32_e32 v3, -7, v2
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | i...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/114309
More information about the llvm-commits
mailing list