[llvm] 44a7d2f - [AArch64] Add patterns for add(x, trunc(shift)) (#168927)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 24 22:57:13 PST 2025
Author: David Green
Date: 2025-11-25T06:57:09Z
New Revision: 44a7d2f22aec6ac2019a3674e1390276c2ee7ca5
URL: https://github.com/llvm/llvm-project/commit/44a7d2f22aec6ac2019a3674e1390276c2ee7ca5
DIFF: https://github.com/llvm/llvm-project/commit/44a7d2f22aec6ac2019a3674e1390276c2ee7ca5.diff
LOG: [AArch64] Add patterns for add(x, trunc(shift)) (#168927)
This can be lowered to a 64bit add where we only use the bottom 32bits
of the result. It is conceptually the same as
https://alive2.llvm.org/ce/z/Xfz3Rf, but with the sext replaced by an
anyext.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
llvm/test/CodeGen/AArch64/combine-sdiv.ll
llvm/test/CodeGen/AArch64/rem-by-const.ll
llvm/test/CodeGen/AArch64/srem-lkk.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 881f7707f0eb7..0d63a9121310e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2752,6 +2752,20 @@ def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
(ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
}
+
+def trunc_isWorthFoldingALU : PatFrag<(ops node:$src), (trunc $src)> {
+ let PredicateCode = [{ return isWorthFoldingALU(SDValue(N, 0)); }];
+ let GISelPredicateCode = [{ return isWorthFoldingIntoExtendedReg(MI, MRI, false); }];
+}
+
+// Patterns for (add X, trunc(shift(Y))), for which we can generate 64bit instructions.
+def : Pat<(add GPR32:$Rn, (trunc_isWorthFoldingALU arith_shifted_reg64:$Rm)),
+ (EXTRACT_SUBREG (ADDXrs (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Rn, sub_32),
+ arith_shifted_reg64:$Rm), sub_32)>;
+def : Pat<(sub GPR32:$Rn, (trunc_isWorthFoldingALU arith_shifted_reg64:$Rm)),
+ (EXTRACT_SUBREG (SUBXrs (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Rn, sub_32),
+ arith_shifted_reg64:$Rm), sub_32)>;
+
def : InstAlias<"neg $dst, $src",
(SUBWrs GPR32:$dst, WZR,
(arith_shifted_reg32 GPR32:$src, 0)), 3>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 64db3765c433f..f9db39e5f8622 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -417,10 +417,10 @@ class AArch64InstructionSelector : public InstructionSelector {
}
std::optional<bool>
- isWorthFoldingIntoAddrMode(MachineInstr &MI,
+ isWorthFoldingIntoAddrMode(const MachineInstr &MI,
const MachineRegisterInfo &MRI) const;
- bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
+ bool isWorthFoldingIntoExtendedReg(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
bool IsAddrOperand) const;
ComplexRendererFns
@@ -7068,7 +7068,7 @@ AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
- MachineInstr &MI, const MachineRegisterInfo &MRI) const {
+ const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
if (MI.getOpcode() == AArch64::G_SHL) {
// Address operands with shifts are free, except for running on subtargets
// with AddrLSLSlow14.
@@ -7089,7 +7089,7 @@ std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
/// \p IsAddrOperand whether the def of MI is used as an address operand
/// (e.g. feeding into an LDR/STR).
bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
- MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const MachineInstr &MI, const MachineRegisterInfo &MRI,
bool IsAddrOperand) const {
// Always fold if there is one use, or if we're optimizing for size.
diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
index cca190f08df2b..ada49414805e2 100644
--- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll
+++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
@@ -1493,13 +1493,12 @@ define i5 @combine_i5_sdiv_const7(i5 %x) {
; CHECK-SD-LABEL: combine_i5_sdiv_const7:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-SD-NEXT: mov x8, #-56173 // =0xffffffffffff2493
-; CHECK-SD-NEXT: sbfx x9, x0, #0, #5
-; CHECK-SD-NEXT: movk x8, #37449, lsl #16
-; CHECK-SD-NEXT: smull x8, w9, w8
-; CHECK-SD-NEXT: lsl w9, w0, #27
-; CHECK-SD-NEXT: lsr x8, x8, #32
-; CHECK-SD-NEXT: add w8, w8, w9, asr #27
+; CHECK-SD-NEXT: sbfx x8, x0, #0, #5
+; CHECK-SD-NEXT: mov x9, #-56173 // =0xffffffffffff2493
+; CHECK-SD-NEXT: movk x9, #37449, lsl #16
+; CHECK-SD-NEXT: smull x8, w8, w9
+; CHECK-SD-NEXT: sbfx w9, w0, #0, #5
+; CHECK-SD-NEXT: add x8, x9, x8, lsr #32
; CHECK-SD-NEXT: asr w9, w8, #2
; CHECK-SD-NEXT: add w0, w9, w8, lsr #31
; CHECK-SD-NEXT: ret
@@ -1646,10 +1645,10 @@ define i32 @combine_i32_sdiv_const7(i32 %x) {
; CHECK-SD-LABEL: combine_i32_sdiv_const7:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: mov w8, #9363 // =0x2493
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: movk w8, #37449, lsl #16
; CHECK-SD-NEXT: smull x8, w0, w8
-; CHECK-SD-NEXT: lsr x8, x8, #32
-; CHECK-SD-NEXT: add w8, w8, w0
+; CHECK-SD-NEXT: add x8, x0, x8, lsr #32
; CHECK-SD-NEXT: asr w9, w8, #2
; CHECK-SD-NEXT: add w0, w9, w8, lsr #31
; CHECK-SD-NEXT: ret
@@ -1657,10 +1656,10 @@ define i32 @combine_i32_sdiv_const7(i32 %x) {
; CHECK-GI-LABEL: combine_i32_sdiv_const7:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #9363 // =0x2493
+; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-GI-NEXT: movk w8, #37449, lsl #16
; CHECK-GI-NEXT: smull x8, w0, w8
-; CHECK-GI-NEXT: asr x8, x8, #32
-; CHECK-GI-NEXT: add w8, w8, w0
+; CHECK-GI-NEXT: add x8, x0, x8, asr #32
; CHECK-GI-NEXT: asr w8, w8, #2
; CHECK-GI-NEXT: add w0, w8, w8, lsr #31
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll
index 87b11086e28d5..a55aaeb62830f 100644
--- a/llvm/test/CodeGen/AArch64/rem-by-const.ll
+++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll
@@ -247,10 +247,10 @@ define i32 @si32_7(i32 %a, i32 %b) {
; CHECK-SD-LABEL: si32_7:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: mov w8, #9363 // =0x2493
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: movk w8, #37449, lsl #16
; CHECK-SD-NEXT: smull x8, w0, w8
-; CHECK-SD-NEXT: lsr x8, x8, #32
-; CHECK-SD-NEXT: add w8, w8, w0
+; CHECK-SD-NEXT: add x8, x0, x8, lsr #32
; CHECK-SD-NEXT: asr w9, w8, #2
; CHECK-SD-NEXT: add w8, w9, w8, lsr #31
; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
@@ -260,10 +260,10 @@ define i32 @si32_7(i32 %a, i32 %b) {
; CHECK-GI-LABEL: si32_7:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov w8, #9363 // =0x2493
+; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-GI-NEXT: movk w8, #37449, lsl #16
; CHECK-GI-NEXT: smull x8, w0, w8
-; CHECK-GI-NEXT: asr x8, x8, #32
-; CHECK-GI-NEXT: add w8, w8, w0
+; CHECK-GI-NEXT: add x8, x0, x8, asr #32
; CHECK-GI-NEXT: asr w8, w8, #2
; CHECK-GI-NEXT: add w8, w8, w8, lsr #31
; CHECK-GI-NEXT: lsl w9, w8, #3
@@ -801,13 +801,10 @@ define <3 x i8> @sv3i8_7(<3 x i8> %d, <3 x i8> %e) {
; CHECK-SD-NEXT: smull x10, w10, w9
; CHECK-SD-NEXT: smull x9, w11, w9
; CHECK-SD-NEXT: sxtb w11, w2
-; CHECK-SD-NEXT: lsr x8, x8, #32
-; CHECK-SD-NEXT: lsr x10, x10, #32
-; CHECK-SD-NEXT: lsr x9, x9, #32
-; CHECK-SD-NEXT: add w8, w8, w13
-; CHECK-SD-NEXT: add w10, w10, w12
+; CHECK-SD-NEXT: add x8, x13, x8, lsr #32
+; CHECK-SD-NEXT: add x10, x12, x10, lsr #32
+; CHECK-SD-NEXT: add x9, x11, x9, lsr #32
; CHECK-SD-NEXT: asr w14, w8, #2
-; CHECK-SD-NEXT: add w9, w9, w11
; CHECK-SD-NEXT: asr w15, w10, #2
; CHECK-SD-NEXT: asr w16, w9, #2
; CHECK-SD-NEXT: add w8, w14, w8, lsr #31
@@ -899,45 +896,41 @@ define <4 x i8> @sv4i8_7(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-NEXT: mov x8, #-56173 // =0xffffffffffff2493
; CHECK-SD-NEXT: movk x8, #37449, lsl #16
; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
-; CHECK-SD-NEXT: smov x9, v0.h[0]
-; CHECK-SD-NEXT: smov x10, v0.h[1]
-; CHECK-SD-NEXT: smov w11, v0.h[0]
-; CHECK-SD-NEXT: smov x12, v0.h[2]
-; CHECK-SD-NEXT: smov w13, v0.h[1]
-; CHECK-SD-NEXT: smov x14, v0.h[3]
-; CHECK-SD-NEXT: smov w16, v0.h[2]
-; CHECK-SD-NEXT: smull x9, w9, w8
+; CHECK-SD-NEXT: smov x10, v0.h[0]
+; CHECK-SD-NEXT: smov x9, v0.h[1]
+; CHECK-SD-NEXT: smov w12, v0.h[0]
+; CHECK-SD-NEXT: smov w11, v0.h[1]
+; CHECK-SD-NEXT: smov x13, v0.h[2]
+; CHECK-SD-NEXT: smov w14, v0.h[2]
+; CHECK-SD-NEXT: smov x17, v0.h[3]
; CHECK-SD-NEXT: smull x10, w10, w8
-; CHECK-SD-NEXT: smull x12, w12, w8
-; CHECK-SD-NEXT: lsr x9, x9, #32
-; CHECK-SD-NEXT: smull x8, w14, w8
-; CHECK-SD-NEXT: smov w14, v0.h[3]
-; CHECK-SD-NEXT: lsr x10, x10, #32
-; CHECK-SD-NEXT: add w9, w9, w11
-; CHECK-SD-NEXT: lsr x12, x12, #32
+; CHECK-SD-NEXT: smull x9, w9, w8
+; CHECK-SD-NEXT: smull x13, w13, w8
+; CHECK-SD-NEXT: add x10, x12, x10, lsr #32
+; CHECK-SD-NEXT: smull x8, w17, w8
+; CHECK-SD-NEXT: add x9, x11, x9, lsr #32
+; CHECK-SD-NEXT: asr w16, w10, #2
+; CHECK-SD-NEXT: add x13, x14, x13, lsr #32
; CHECK-SD-NEXT: asr w15, w9, #2
-; CHECK-SD-NEXT: add w10, w10, w13
-; CHECK-SD-NEXT: lsr x8, x8, #32
-; CHECK-SD-NEXT: asr w17, w10, #2
-; CHECK-SD-NEXT: add w12, w12, w16
+; CHECK-SD-NEXT: add w10, w16, w10, lsr #31
+; CHECK-SD-NEXT: asr w16, w13, #2
; CHECK-SD-NEXT: add w9, w15, w9, lsr #31
-; CHECK-SD-NEXT: asr w15, w12, #2
-; CHECK-SD-NEXT: add w8, w8, w14
-; CHECK-SD-NEXT: add w10, w17, w10, lsr #31
-; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
+; CHECK-SD-NEXT: smov w15, v0.h[3]
; CHECK-SD-NEXT: sub w10, w10, w10, lsl #3
-; CHECK-SD-NEXT: add w9, w11, w9
-; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: add w10, w13, w10
-; CHECK-SD-NEXT: add w9, w15, w12, lsr #31
; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
-; CHECK-SD-NEXT: mov v0.h[1], w10
+; CHECK-SD-NEXT: add w10, w12, w10
+; CHECK-SD-NEXT: fmov s0, w10
+; CHECK-SD-NEXT: add w9, w11, w9
+; CHECK-SD-NEXT: add w10, w16, w13, lsr #31
+; CHECK-SD-NEXT: add x8, x15, x8, lsr #32
+; CHECK-SD-NEXT: mov v0.h[1], w9
+; CHECK-SD-NEXT: sub w9, w10, w10, lsl #3
; CHECK-SD-NEXT: asr w10, w8, #2
-; CHECK-SD-NEXT: add w9, w16, w9
+; CHECK-SD-NEXT: add w9, w14, w9
; CHECK-SD-NEXT: add w8, w10, w8, lsr #31
; CHECK-SD-NEXT: mov v0.h[2], w9
; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
-; CHECK-SD-NEXT: add w8, w14, w8
+; CHECK-SD-NEXT: add w8, w15, w8
; CHECK-SD-NEXT: mov v0.h[3], w8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
@@ -1770,32 +1763,29 @@ define <3 x i16> @sv3i16_7(<3 x i16> %d, <3 x i16> %e) {
; CHECK-SD-NEXT: mov x8, #-56173 // =0xffffffffffff2493
; CHECK-SD-NEXT: smov x10, v0.h[1]
; CHECK-SD-NEXT: movk x8, #37449, lsl #16
-; CHECK-SD-NEXT: smov w12, v0.h[0]
-; CHECK-SD-NEXT: smov x11, v0.h[2]
-; CHECK-SD-NEXT: smov w13, v0.h[1]
+; CHECK-SD-NEXT: smov w11, v0.h[0]
+; CHECK-SD-NEXT: smov x13, v0.h[2]
+; CHECK-SD-NEXT: smov w12, v0.h[1]
+; CHECK-SD-NEXT: smov w14, v0.h[2]
; CHECK-SD-NEXT: smull x9, w9, w8
; CHECK-SD-NEXT: smull x10, w10, w8
-; CHECK-SD-NEXT: smull x8, w11, w8
-; CHECK-SD-NEXT: smov w11, v0.h[2]
-; CHECK-SD-NEXT: lsr x9, x9, #32
-; CHECK-SD-NEXT: lsr x10, x10, #32
-; CHECK-SD-NEXT: add w9, w9, w12
-; CHECK-SD-NEXT: lsr x8, x8, #32
-; CHECK-SD-NEXT: asr w14, w9, #2
-; CHECK-SD-NEXT: add w10, w10, w13
+; CHECK-SD-NEXT: smull x8, w13, w8
+; CHECK-SD-NEXT: add x9, x11, x9, lsr #32
+; CHECK-SD-NEXT: add x10, x12, x10, lsr #32
+; CHECK-SD-NEXT: asr w13, w9, #2
+; CHECK-SD-NEXT: add x8, x14, x8, lsr #32
; CHECK-SD-NEXT: asr w15, w10, #2
-; CHECK-SD-NEXT: add w8, w8, w11
-; CHECK-SD-NEXT: add w9, w14, w9, lsr #31
-; CHECK-SD-NEXT: asr w14, w8, #2
+; CHECK-SD-NEXT: add w9, w13, w9, lsr #31
+; CHECK-SD-NEXT: asr w13, w8, #2
; CHECK-SD-NEXT: add w10, w15, w10, lsr #31
; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
-; CHECK-SD-NEXT: add w8, w14, w8, lsr #31
+; CHECK-SD-NEXT: add w8, w13, w8, lsr #31
; CHECK-SD-NEXT: sub w10, w10, w10, lsl #3
-; CHECK-SD-NEXT: add w9, w12, w9
+; CHECK-SD-NEXT: add w9, w11, w9
; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: add w10, w13, w10
-; CHECK-SD-NEXT: add w8, w11, w8
+; CHECK-SD-NEXT: add w10, w12, w10
+; CHECK-SD-NEXT: add w8, w14, w8
; CHECK-SD-NEXT: mov v0.h[1], w10
; CHECK-SD-NEXT: mov v0.h[2], w8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -2444,15 +2434,14 @@ define <3 x i32> @sv3i32_7(<3 x i32> %d, <3 x i32> %e) {
; CHECK-SD-NEXT: dup v1.2s, w8
; CHECK-SD-NEXT: smull x8, w9, w8
; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s
-; CHECK-SD-NEXT: lsr x8, x8, #32
-; CHECK-SD-NEXT: add w8, w8, w9
-; CHECK-SD-NEXT: shrn v1.2s, v1.2d, #32
+; CHECK-SD-NEXT: add x8, x9, x8, lsr #32
; CHECK-SD-NEXT: asr w10, w8, #2
+; CHECK-SD-NEXT: shrn v1.2s, v1.2d, #32
; CHECK-SD-NEXT: add w8, w10, w8, lsr #31
; CHECK-SD-NEXT: add v1.2s, v1.2s, v0.2s
; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
-; CHECK-SD-NEXT: sshr v2.2s, v1.2s, #2
; CHECK-SD-NEXT: add w8, w9, w8
+; CHECK-SD-NEXT: sshr v2.2s, v1.2s, #2
; CHECK-SD-NEXT: usra v2.2s, v1.2s, #31
; CHECK-SD-NEXT: mls v0.2s, v2.2s, v3.2s
; CHECK-SD-NEXT: mov v0.s[2], w8
diff --git a/llvm/test/CodeGen/AArch64/srem-lkk.ll b/llvm/test/CodeGen/AArch64/srem-lkk.ll
index 1223ae3a15e7b..3cc509871eb26 100644
--- a/llvm/test/CodeGen/AArch64/srem-lkk.ll
+++ b/llvm/test/CodeGen/AArch64/srem-lkk.ll
@@ -5,10 +5,10 @@ define i32 @fold_srem_positive_odd(i32 %x) {
; CHECK-LABEL: fold_srem_positive_odd:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #37253 // =0x9185
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: movk w8, #44150, lsl #16
; CHECK-NEXT: smull x8, w0, w8
-; CHECK-NEXT: lsr x8, x8, #32
-; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: add x8, x0, x8, lsr #32
; CHECK-NEXT: asr w9, w8, #6
; CHECK-NEXT: add w8, w9, w8, lsr #31
; CHECK-NEXT: mov w9, #95 // =0x5f
@@ -72,10 +72,10 @@ define i32 @combine_srem_sdiv(i32 %x) {
; CHECK-LABEL: combine_srem_sdiv:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #37253 // =0x9185
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: movk w8, #44150, lsl #16
; CHECK-NEXT: smull x8, w0, w8
-; CHECK-NEXT: lsr x8, x8, #32
-; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: add x8, x0, x8, lsr #32
; CHECK-NEXT: asr w9, w8, #6
; CHECK-NEXT: add w8, w9, w8, lsr #31
; CHECK-NEXT: mov w9, #95 // =0x5f
More information about the llvm-commits
mailing list