[llvm] [GISel][AArch64][AMDGPU][RISCV] Canonicalize (sub X, C) -> (add X, -C) (PR #114309)

Wed Oct 30 14:55:56 PDT 2024

llvmbot wrote:



@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-backend-amdgpu

Author: Craig Topper (topperc)

<details>
<summary>Changes</summary>

This matches InstCombine and DAGCombine.

RISC-V only has an ADDI instruction so without this we need additional patterns to do the conversion.

Some of the AMDGPU tests look like possible regressions.

---

Patch is 439.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114309.diff


36 Files Affected:

- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+3) 
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+10-2) 
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+22) 
- (modified) llvm/lib/Target/RISCV/RISCVGISel.td (-9) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir (+2-2) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir (+4-4) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir (+2-2) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll (+5-5) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll (+3-4) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll (+282-278) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll (+315-312) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll (+9-9) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll (+35-35) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll (+5-5) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll (+48-48) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll (+536-530) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll (+17-17) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll (+24-24) 
- (modified) llvm/test/CodeGen/AMDGPU/ctlz.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/div_i128.ll (+16-12) 
- (modified) llvm/test/CodeGen/AMDGPU/div_v2i128.ll (+144-136) 
- (modified) llvm/test/CodeGen/AMDGPU/fptoi.i128.ll (+36-36) 
- (modified) llvm/test/CodeGen/AMDGPU/itofp.i128.ll (+72-72) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll (+22-22) 
- (modified) llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll (+565-453) 
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll (+9-9) 
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv32.mir (+3-2) 
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv64.mir (+6-4) 
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-medium-rv64.mir (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv32.mir (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv64.mir (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-rv32.mir (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-small-rv64.mir (+2-2) 


``````````diff

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9240a3c3127eb4..b09981eaef506e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -321,6 +321,9 @@ class CombinerHelper {
   bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
   void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
 
+  // Transform a G_SUB with constant on the RHS to G_ADD.
+  bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo);
+
   // Transform a G_SHL with an extended source into a narrower shift if
   // possible.
   bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index ead4149fc11068..9891db5ceb6fa9 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -335,6 +335,13 @@ def mul_to_shl : GICombineRule<
          [{ return Helper.matchCombineMulToShl(*${mi}, ${matchinfo}); }]),
   (apply [{ Helper.applyCombineMulToShl(*${mi}, ${matchinfo}); }])>;
 
+// (sub x, C) -> (add x, -C)
+def sub_to_add : GICombineRule<
+  (defs root:$d, build_fn_matchinfo:$matchinfo),
+  (match (G_SUB $d, $op1, $op2):$mi,
+         [{ return Helper.matchCombineSubToAdd(*${mi}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFnNoErase(*${mi}, ${matchinfo}); }])>;
+
 // shl ([asz]ext x), y => zext (shl x, y), if shift does not overflow int
 def reduce_shl_of_extend_matchdata : GIDefMatchData<"RegisterImmPair">;
 def reduce_shl_of_extend : GICombineRule<
@@ -1903,8 +1910,9 @@ def bitreverse_shift : GICombineGroup<[bitreverse_shl, bitreverse_lshr]>;
 def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp,
                                       select_to_iminmax, match_selects]>;
 
-def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
-                                       mul_by_neg_one, idempotent_prop]>;
+def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, sub_to_add,
+                                       add_p2i_to_ptradd, mul_by_neg_one,
+                                       idempotent_prop]>;
 
 def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
   combine_fadd_fpext_fmul_to_fmad_or_fma, combine_fadd_fma_fmul_to_fmad_or_fma,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b7ddf9f479ef8e..91e5af9dfd8e25 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2041,6 +2041,28 @@ void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
   Observer.changedInstr(MI);
 }
 
+bool CombinerHelper::matchCombineSubToAdd(MachineInstr &MI,
+                                          BuildFnTy &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_SUB && "Expected a G_SUB");
+  auto MaybeImmVal =
+      getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+  if (!MaybeImmVal)
+    return false;
+
+  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
+  APInt NegImm = -MaybeImmVal->Value;
+  MatchInfo = [=, &MI](MachineIRBuilder &B) {
+    auto NegCst = B.buildConstant(Ty, NegImm);
+    Observer.changingInstr(MI);
+    MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
+    MI.getOperand(2).setReg(NegCst.getReg(0));
+    MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
+    Observer.changedInstr(MI);
+  };
+  return true;
+}
+
 // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
 bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
                                              RegisterImmPair &MatchData) {
diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index 67e93b812421b4..40aae220fbd47e 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -96,15 +96,6 @@ def gi_sh2add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<2>">,
 def gi_sh3add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<3>">,
                       GIComplexPatternEquiv<sh3add_uw_op>;
 
-// FIXME: Canonicalize (sub X, C) -> (add X, -C) earlier.
-def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)),
-          (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>;
-
-let Predicates = [IsRV64] in {
-def : Pat<(i32 (sub GPR:$rs1, simm12Plus1i32:$imm)),
-          (ADDIW GPR:$rs1, (i64 (NegImm $imm)))>;
-}
-
 // Ptr type used in patterns with GlobalISelEmitter
 def PtrVT : PtrValueTypeByHwMode<XLenVT, 0>;
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
index 2f10a497fa74cb..5cbff0f0c74cb7 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
@@ -308,8 +308,8 @@ body:             |
     ; CHECK: liveins: $w0, $w1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %a:_(s64) = COPY $x0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 71
-    ; CHECK-NEXT: %sub:_(s64) = G_SUB %a, [[C]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -71
+    ; CHECK-NEXT: %sub:_(s64) = G_ADD %a, [[C]]
     ; CHECK-NEXT: $x0 = COPY %sub(s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %a:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
index f207e9c149a476..e9d4af7da5d06f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
@@ -88,8 +88,8 @@ body:             |
     ; CHECK-LABEL: name: test_combine_trunc_sub_i128
     ; CHECK: %lhs:_(s128) = COPY $q0
     ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s128)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CHECK-NEXT: %small:_(s32) = G_SUB [[TRUNC]], [[C]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -5
+    ; CHECK-NEXT: %small:_(s32) = G_ADD [[TRUNC]], [[C]]
     ; CHECK-NEXT: $w0 = COPY %small(s32)
     %lhs:_(s128) = COPY $q0
     %rhs:_(s128) = G_CONSTANT i128 5
@@ -103,8 +103,8 @@ body:             |
   bb.1:
     ; CHECK-LABEL: name: test_combine_trunc_sub_i128_multi_use
     ; CHECK: %lhs:_(s128) = COPY $q0
-    ; CHECK-NEXT: %rhs:_(s128) = G_CONSTANT i128 5
-    ; CHECK-NEXT: %res:_(s128) = G_SUB %lhs, %rhs
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 -5
+    ; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, [[C]]
     ; CHECK-NEXT: %small:_(s32) = G_TRUNC %res(s128)
     ; CHECK-NEXT: $q0 = COPY %res(s128)
     ; CHECK-NEXT: $w0 = COPY %small(s32)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
index 04968dab3a37ce..591b6a17928cb1 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
@@ -95,7 +95,7 @@ body: |
     %11:_(s8) = G_CONSTANT i8 1
     ; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
     %7:_(s8) = G_SUB %2, %11
-    ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
+    ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
     G_BR %bb.3.exit
   bb.3.exit:
   ; CHECK: bb.3.exit:
@@ -197,7 +197,7 @@ body: |
     %7:_(s8) = G_CONSTANT i8 1
     ; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
     %8:_(s8) = G_SUB %2, %7
-    ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
+    ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
     G_BR %bb.3.exit
   bb.3.exit:
   ; CHECK: bb.3.exit:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
index 0900dd4267a2e4..bc3be691bd25a0 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
@@ -289,8 +289,8 @@ body:             |
     ; CHECK: liveins: $w0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %x:_(s32) = COPY $w0
-    ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: %op:_(s32) = G_SUB %x, %cst
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
     ; CHECK-NEXT: $w0 = COPY %op(s32)
     ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %x:_(s32) = COPY $w0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
index 63f5464371cc62..493e8cef638902 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
@@ -1669,7 +1669,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
 ; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, 64, v3
 ; GFX6-NEXT:    v_lshr_b64 v[6:7], v[0:1], v3
 ; GFX6-NEXT:    v_lshl_b64 v[8:9], v[4:5], v8
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 64, v3
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 0xffffffc0, v3
 ; GFX6-NEXT:    v_ashr_i64 v[10:11], v[4:5], v3
 ; GFX6-NEXT:    v_or_b32_e32 v6, v6, v8
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v8, 31, v5
@@ -1692,7 +1692,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
 ; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, 64, v3
 ; GFX8-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
 ; GFX8-NEXT:    v_lshlrev_b64 v[8:9], v8, v[4:5]
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 64, v3
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xffffffc0, v3
 ; GFX8-NEXT:    v_ashrrev_i64 v[10:11], v3, v[4:5]
 ; GFX8-NEXT:    v_or_b32_e32 v6, v6, v8
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v8, 31, v5
@@ -1715,7 +1715,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
 ; GFX9-NEXT:    v_sub_u32_e32 v8, 64, v3
 ; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
 ; GFX9-NEXT:    v_lshlrev_b64 v[8:9], v8, v[4:5]
-; GFX9-NEXT:    v_subrev_u32_e32 v2, 64, v3
+; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffc0, v3
 ; GFX9-NEXT:    v_ashrrev_i64 v[10:11], v3, v[4:5]
 ; GFX9-NEXT:    v_or_b32_e32 v6, v6, v8
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v8, 31, v5
@@ -1735,7 +1735,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_bfe_i32 v4, v2, 0, 1
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 64, v3
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, 64, v3
+; GFX10-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v3
 ; GFX10-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
 ; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
@@ -1758,7 +1758,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_bfe_i32 v4, v2, 0, 1
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 64, v3
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v10, 64, v3
+; GFX11-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v3
 ; GFX11-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
 ; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
index 405b1e8f3a250f..46d6b86789c778 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
@@ -1438,7 +1438,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
 ; SI-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
 ; SI-NEXT:    v_ffbh_i32_e32 v3, 0
 ; SI-NEXT:    v_add_i32_e32 v2, vcc, 32, v2
-; SI-NEXT:    v_subrev_i32_e32 v3, vcc, 1, v3
+; SI-NEXT:    v_add_i32_e32 v3, vcc, -1, v3
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
 ; SI-NEXT:    v_min_u32_e32 v2, v3, v2
 ; SI-NEXT:    v_lshl_b64 v[0:1], v[0:1], v2
@@ -1456,7 +1456,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
 ; VI-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
 ; VI-NEXT:    v_ffbh_i32_e32 v3, 0
 ; VI-NEXT:    v_add_u32_e32 v2, vcc, 32, v2
-; VI-NEXT:    v_subrev_u32_e32 v3, vcc, 1, v3
+; VI-NEXT:    v_add_u32_e32 v3, vcc, -1, v3
 ; VI-NEXT:    v_mov_b32_e32 v1, 0
 ; VI-NEXT:    v_min_u32_e32 v2, v3, v2
 ; VI-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
index 146f344930a4ee..6e55d7fdb5e957 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
@@ -4101,7 +4101,7 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
 ; GFX10-NEXT:    v_rcp_f32_e32 v1, 0x3f40e400
 ; GFX10-NEXT:    v_frexp_mant_f32_e32 v2, v0
 ; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v0, 14, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v0, -14, v0
 ; GFX10-NEXT:    v_mul_f32_e32 v1, v2, v1
 ; GFX10-NEXT:    v_ldexp_f32 v0, v1, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -4112,10 +4112,9 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
 ; GFX11-NEXT:    v_rcp_f32_e32 v1, 0x3f40e400
 ; GFX11-NEXT:    v_frexp_mant_f32_e32 v2, v0
 ; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v0, 14, v0
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_mul_f32_e32 v1, v2, v1
+; GFX11-NEXT:    v_dual_mul_f32 v1, v2, v1 :: v_dual_add_nc_u32 v0, -14, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_ldexp_f32 v0, v1, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ; EG-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
index 3bd3486ec261d4..5d76b542fad894 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
@@ -20,10 +20,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX6-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 7, v0
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, -7, v0
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 7, v0
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, -7, v0
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 6, v0
@@ -51,10 +51,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX8-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 7, v0
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, -7, v0
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 7, v0
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, -7, v0
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX8-NEXT:    v_sub_u16_e32 v1, 6, v0
@@ -82,10 +82,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX9-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT:    v_subrev_u32_e32 v1, 7, v0
+; GFX9-NEXT:    v_add_u32_e32 v1, -7, v0
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v1, 7, v0
+; GFX9-NEXT:    v_add_u32_e32 v1, -7, v0
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX9-NEXT:    v_sub_u16_e32 v1, 6, v0
@@ -113,10 +113,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX10-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, -7, v0
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, -7, v0
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u16 v1, 6, v0
@@ -150,11 +150,11 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX11-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, -7, v0
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, -7, v0
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -189,10 +189,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX6-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 7, v2
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, -7, v2
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 7, v2
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, -7, v2
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 6, v2
@@ -219,10 +219,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX8-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 7, v2
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, -7, v2
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 7, v2
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, -7, v2
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_sub_u16_e32 v3, 6, v2
@@ -249,10 +249,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX9-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
-; GFX9-NEXT:    v_subrev_u32_e32 v3, 7, v2
+; GFX9-NEXT:    v_add_u32_e32 v3, -7, v2
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v3, 7, v2
+; GFX9-NEXT:    v_add_u32_e32 v3, -7, v2
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX9-NEXT:    v_sub_u16_e32 v3, 6, v2
@@ -279,10 +279,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX10-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, -7, v2
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, -7, v2
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u16 v3, 6, v2
@@ -315,11 +315,11 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX11-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, -7, v2
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | i...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/114309