[llvm] [GlobalISel] Allow expansion of srem by constant in prelegalizer (PR #148845)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 15 06:05:17 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
@llvm/pr-subscribers-llvm-globalisel
Author: None (jyli0116)
<details>
<summary>Changes</summary>
This patch allows srem by a constant to be expanded more efficiently to avoid the need for expensive sdiv instructions. This is the last part of the patches which fixes #<!-- -->118090
---
Patch is 56.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148845.diff
5 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+12-11)
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+13-7)
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+29-16)
- (modified) llvm/test/CodeGen/AArch64/rem-by-const.ll (+350-469)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll (+45-113)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 31f1197b9723b..da829046cc421 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -700,18 +700,19 @@ class CombinerHelper {
/// Given an G_UDIV \p MI or G_UREM \p MI expressing a divide by constant,
/// return an expression that implements it by multiplying by a magic number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
- MachineInstr *buildUDivorURemUsingMul(MachineInstr &MI) const;
+ MachineInstr *buildUDivOrURemUsingMul(MachineInstr &MI) const;
/// Combine G_UDIV or G_UREM by constant into a multiply by magic constant.
- bool matchUDivorURemByConst(MachineInstr &MI) const;
- void applyUDivorURemByConst(MachineInstr &MI) const;
-
- /// Given an G_SDIV \p MI expressing a signed divide by constant, return an
- /// expression that implements it by multiplying by a magic number.
- /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
- MachineInstr *buildSDivUsingMul(MachineInstr &MI) const;
- /// Combine G_SDIV by constant into a multiply by magic constant.
- bool matchSDivByConst(MachineInstr &MI) const;
- void applySDivByConst(MachineInstr &MI) const;
+ bool matchUDivOrURemByConst(MachineInstr &MI) const;
+ void applyUDivOrURemByConst(MachineInstr &MI) const;
+
+ /// Given an G_SDIV \p MI or G_SREM \p MI expressing a signed divide by
+ /// constant, return an expression that implements it by multiplying by a
+ /// magic number. Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's
+ /// Guide".
+ MachineInstr *buildSDivOrSRemUsingMul(MachineInstr &MI) const;
+ /// Combine G_SDIV or G_SREM by constant into a multiply by magic constant.
+ bool matchSDivOrSRemByConst(MachineInstr &MI) const;
+ void applySDivOrSRemByConst(MachineInstr &MI) const;
/// Given an G_SDIV \p MI expressing a signed divided by a pow2 constant,
/// return expressions that implements it by shifting.
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 66051d756c808..fc81ab76dc72d 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1132,14 +1132,14 @@ def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
def udiv_by_const : GICombineRule<
(defs root:$root),
(match (G_UDIV $dst, $x, $y):$root,
- [{ return Helper.matchUDivorURemByConst(*${root}); }]),
- (apply [{ Helper.applyUDivorURemByConst(*${root}); }])>;
+ [{ return Helper.matchUDivOrURemByConst(*${root}); }]),
+ (apply [{ Helper.applyUDivOrURemByConst(*${root}); }])>;
def sdiv_by_const : GICombineRule<
(defs root:$root),
(match (G_SDIV $dst, $x, $y):$root,
- [{ return Helper.matchSDivByConst(*${root}); }]),
- (apply [{ Helper.applySDivByConst(*${root}); }])>;
+ [{ return Helper.matchSDivOrSRemByConst(*${root}); }]),
+ (apply [{ Helper.applySDivOrSRemByConst(*${root}); }])>;
def sdiv_by_pow2 : GICombineRule<
(defs root:$root),
@@ -1159,10 +1159,16 @@ def intdiv_combines : GICombineGroup<[udiv_by_pow2, sdiv_by_pow2,
def urem_by_const : GICombineRule<
(defs root:$root),
(match (G_UREM $dst, $x, $y):$root,
- [{ return Helper.matchUDivorURemByConst(*${root}); }]),
- (apply [{ Helper.applyUDivorURemByConst(*${root}); }])>;
+ [{ return Helper.matchUDivOrURemByConst(*${root}); }]),
+ (apply [{ Helper.applyUDivOrURemByConst(*${root}); }])>;
-def intrem_combines : GICombineGroup<[urem_by_const]>;
+def srem_by_const : GICombineRule<
+ (defs root:$root),
+ (match (G_SREM $dst, $x, $y):$root,
+ [{ return Helper.matchSDivOrSRemByConst(*${root}); }]),
+ (apply [{ Helper.applySDivOrSRemByConst(*${root}); }])>;
+
+def intrem_combines : GICombineGroup<[urem_by_const, srem_by_const]>;
def reassoc_ptradd : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 3922eba55e195..e8f513ad5a7a9 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5300,7 +5300,7 @@ bool CombinerHelper::matchSubAddSameReg(MachineInstr &MI,
return false;
}
-MachineInstr *CombinerHelper::buildUDivorURemUsingMul(MachineInstr &MI) const {
+MachineInstr *CombinerHelper::buildUDivOrURemUsingMul(MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
auto &UDivorRem = cast<GenericMachineInstr>(MI);
@@ -5468,7 +5468,7 @@ MachineInstr *CombinerHelper::buildUDivorURemUsingMul(MachineInstr &MI) const {
return ret;
}
-bool CombinerHelper::matchUDivorURemByConst(MachineInstr &MI) const {
+bool CombinerHelper::matchUDivOrURemByConst(MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
Register Dst = MI.getOperand(0).getReg();
@@ -5517,13 +5517,14 @@ bool CombinerHelper::matchUDivorURemByConst(MachineInstr &MI) const {
MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
}
-void CombinerHelper::applyUDivorURemByConst(MachineInstr &MI) const {
- auto *NewMI = buildUDivorURemUsingMul(MI);
+void CombinerHelper::applyUDivOrURemByConst(MachineInstr &MI) const {
+ auto *NewMI = buildUDivOrURemUsingMul(MI);
replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
}
-bool CombinerHelper::matchSDivByConst(MachineInstr &MI) const {
- assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
+bool CombinerHelper::matchSDivOrSRemByConst(MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM);
Register Dst = MI.getOperand(0).getReg();
Register RHS = MI.getOperand(2).getReg();
LLT DstTy = MRI.getType(Dst);
@@ -5543,7 +5544,8 @@ bool CombinerHelper::matchSDivByConst(MachineInstr &MI) const {
return false;
// If the sdiv has an 'exact' flag we can use a simpler lowering.
- if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
+ if (Opcode == TargetOpcode::G_SDIV &&
+ MI.getFlag(MachineInstr::MIFlag::IsExact)) {
return matchUnaryPredicate(
MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
}
@@ -5559,23 +5561,28 @@ bool CombinerHelper::matchSDivByConst(MachineInstr &MI) const {
if (!isLegal({TargetOpcode::G_SMULH, {DstTy}}) &&
!isLegalOrHasWidenScalar({TargetOpcode::G_MUL, {WideTy, WideTy}}))
return false;
+ if (Opcode == TargetOpcode::G_SREM &&
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
+ return false;
}
return matchUnaryPredicate(
MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
}
-void CombinerHelper::applySDivByConst(MachineInstr &MI) const {
- auto *NewMI = buildSDivUsingMul(MI);
+void CombinerHelper::applySDivOrSRemByConst(MachineInstr &MI) const {
+ auto *NewMI = buildSDivOrSRemUsingMul(MI);
replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
}
-MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) const {
- assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
- auto &SDiv = cast<GenericMachineInstr>(MI);
- Register Dst = SDiv.getReg(0);
- Register LHS = SDiv.getReg(1);
- Register RHS = SDiv.getReg(2);
+MachineInstr *CombinerHelper::buildSDivOrSRemUsingMul(MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ assert(MI.getOpcode() == TargetOpcode::G_SDIV ||
+ Opcode == TargetOpcode::G_SREM);
+ auto &SDivorRem = cast<GenericMachineInstr>(MI);
+ Register Dst = SDivorRem.getReg(0);
+ Register LHS = SDivorRem.getReg(1);
+ Register RHS = SDivorRem.getReg(2);
LLT Ty = MRI.getType(Dst);
LLT ScalarTy = Ty.getScalarType();
const unsigned EltBits = ScalarTy.getScalarSizeInBits();
@@ -5705,7 +5712,13 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) const {
auto SignShift = MIB.buildConstant(ShiftAmtTy, EltBits - 1);
auto T = MIB.buildLShr(Ty, Q, SignShift);
T = MIB.buildAnd(Ty, T, ShiftMask);
- return MIB.buildAdd(Ty, Q, T);
+ auto ret = MIB.buildAdd(Ty, Q, T);
+
+ if (Opcode == TargetOpcode::G_SREM) {
+ auto Prod = MIB.buildMul(Ty, ret, RHS);
+ return MIB.buildSub(Ty, LHS, Prod);
+ }
+ return ret;
}
bool CombinerHelper::matchDivByPow2(MachineInstr &MI, bool IsSigned) const {
diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll
index 1376f5d9a380d..b124042265d40 100644
--- a/llvm/test/CodeGen/AArch64/rem-by-const.ll
+++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll
@@ -19,8 +19,13 @@ define i8 @si8_7(i8 %a, i8 %b) {
; CHECK-GI-LABEL: si8_7:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sxtb w8, w0
-; CHECK-GI-NEXT: mov w9, #7 // =0x7
-; CHECK-GI-NEXT: sdiv w8, w8, w9
+; CHECK-GI-NEXT: mov w9, #-109 // =0xffffff93
+; CHECK-GI-NEXT: mul w8, w8, w9
+; CHECK-GI-NEXT: sxth w8, w8
+; CHECK-GI-NEXT: add w8, w0, w8, asr #8
+; CHECK-GI-NEXT: sbfx w8, w8, #2, #6
+; CHECK-GI-NEXT: ubfx w9, w8, #7, #1
+; CHECK-GI-NEXT: add w8, w8, w9
; CHECK-GI-NEXT: lsl w9, w8, #3
; CHECK-GI-NEXT: sub w8, w9, w8
; CHECK-GI-NEXT: sub w0, w0, w8
@@ -45,8 +50,14 @@ define i8 @si8_100(i8 %a, i8 %b) {
; CHECK-GI-LABEL: si8_100:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sxtb w8, w0
+; CHECK-GI-NEXT: mov w9, #41 // =0x29
+; CHECK-GI-NEXT: mul w8, w8, w9
+; CHECK-GI-NEXT: sxth w8, w8
+; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
+; CHECK-GI-NEXT: asr w8, w8, #4
+; CHECK-GI-NEXT: ubfx w9, w8, #7, #1
+; CHECK-GI-NEXT: add w8, w8, w9
; CHECK-GI-NEXT: mov w9, #100 // =0x64
-; CHECK-GI-NEXT: sdiv w8, w8, w9
; CHECK-GI-NEXT: msub w0, w8, w9, w0
; CHECK-GI-NEXT: ret
entry:
@@ -129,8 +140,12 @@ define i16 @si16_7(i16 %a, i16 %b) {
; CHECK-GI-LABEL: si16_7:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sxth w8, w0
-; CHECK-GI-NEXT: mov w9, #7 // =0x7
-; CHECK-GI-NEXT: sdiv w8, w8, w9
+; CHECK-GI-NEXT: mov w9, #18725 // =0x4925
+; CHECK-GI-NEXT: mul w8, w8, w9
+; CHECK-GI-NEXT: asr w8, w8, #16
+; CHECK-GI-NEXT: asr w8, w8, #1
+; CHECK-GI-NEXT: ubfx w9, w8, #15, #1
+; CHECK-GI-NEXT: add w8, w8, w9
; CHECK-GI-NEXT: lsl w9, w8, #3
; CHECK-GI-NEXT: sub w8, w9, w8
; CHECK-GI-NEXT: sub w0, w0, w8
@@ -155,8 +170,13 @@ define i16 @si16_100(i16 %a, i16 %b) {
; CHECK-GI-LABEL: si16_100:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sxth w8, w0
+; CHECK-GI-NEXT: mov w9, #5243 // =0x147b
+; CHECK-GI-NEXT: mul w8, w8, w9
+; CHECK-GI-NEXT: asr w8, w8, #16
+; CHECK-GI-NEXT: asr w8, w8, #3
+; CHECK-GI-NEXT: ubfx w9, w8, #15, #1
+; CHECK-GI-NEXT: add w8, w8, w9
; CHECK-GI-NEXT: mov w9, #100 // =0x64
-; CHECK-GI-NEXT: sdiv w8, w8, w9
; CHECK-GI-NEXT: msub w0, w8, w9, w0
; CHECK-GI-NEXT: ret
entry:
@@ -240,8 +260,13 @@ define i32 @si32_7(i32 %a, i32 %b) {
;
; CHECK-GI-LABEL: si32_7:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov w8, #7 // =0x7
-; CHECK-GI-NEXT: sdiv w8, w0, w8
+; CHECK-GI-NEXT: mov w8, #9363 // =0x2493
+; CHECK-GI-NEXT: movk w8, #37449, lsl #16
+; CHECK-GI-NEXT: smull x8, w0, w8
+; CHECK-GI-NEXT: asr x8, x8, #32
+; CHECK-GI-NEXT: add w8, w8, w0
+; CHECK-GI-NEXT: asr w8, w8, #2
+; CHECK-GI-NEXT: add w8, w8, w8, lsr #31
; CHECK-GI-NEXT: lsl w9, w8, #3
; CHECK-GI-NEXT: sub w8, w9, w8
; CHECK-GI-NEXT: sub w0, w0, w8
@@ -265,9 +290,14 @@ define i32 @si32_100(i32 %a, i32 %b) {
;
; CHECK-GI-LABEL: si32_100:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov w8, #100 // =0x64
-; CHECK-GI-NEXT: sdiv w9, w0, w8
-; CHECK-GI-NEXT: msub w0, w9, w8, w0
+; CHECK-GI-NEXT: mov w8, #34079 // =0x851f
+; CHECK-GI-NEXT: mov w9, #100 // =0x64
+; CHECK-GI-NEXT: movk w8, #20971, lsl #16
+; CHECK-GI-NEXT: smull x8, w0, w8
+; CHECK-GI-NEXT: asr x8, x8, #32
+; CHECK-GI-NEXT: asr w8, w8, #5
+; CHECK-GI-NEXT: add w8, w8, w8, lsr #31
+; CHECK-GI-NEXT: msub w0, w8, w9, w0
; CHECK-GI-NEXT: ret
entry:
%s = srem i32 %a, 100
@@ -348,8 +378,13 @@ define i64 @si64_7(i64 %a, i64 %b) {
;
; CHECK-GI-LABEL: si64_7:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov w8, #7 // =0x7
-; CHECK-GI-NEXT: sdiv x8, x0, x8
+; CHECK-GI-NEXT: mov x8, #18725 // =0x4925
+; CHECK-GI-NEXT: movk x8, #9362, lsl #16
+; CHECK-GI-NEXT: movk x8, #37449, lsl #32
+; CHECK-GI-NEXT: movk x8, #18724, lsl #48
+; CHECK-GI-NEXT: smulh x8, x0, x8
+; CHECK-GI-NEXT: asr x8, x8, #1
+; CHECK-GI-NEXT: add x8, x8, x8, lsr #63
; CHECK-GI-NEXT: lsl x9, x8, #3
; CHECK-GI-NEXT: sub x8, x9, x8
; CHECK-GI-NEXT: sub x0, x0, x8
@@ -376,9 +411,16 @@ define i64 @si64_100(i64 %a, i64 %b) {
;
; CHECK-GI-LABEL: si64_100:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov w8, #100 // =0x64
-; CHECK-GI-NEXT: sdiv x9, x0, x8
-; CHECK-GI-NEXT: msub x0, x9, x8, x0
+; CHECK-GI-NEXT: mov x8, #55051 // =0xd70b
+; CHECK-GI-NEXT: mov w9, #100 // =0x64
+; CHECK-GI-NEXT: movk x8, #28835, lsl #16
+; CHECK-GI-NEXT: movk x8, #2621, lsl #32
+; CHECK-GI-NEXT: movk x8, #41943, lsl #48
+; CHECK-GI-NEXT: smulh x8, x0, x8
+; CHECK-GI-NEXT: add x8, x8, x0
+; CHECK-GI-NEXT: asr x8, x8, #6
+; CHECK-GI-NEXT: add x8, x8, x8, lsr #63
+; CHECK-GI-NEXT: msub x0, x8, x9, x0
; CHECK-GI-NEXT: ret
entry:
%s = srem i64 %a, 100
@@ -644,25 +686,49 @@ define <2 x i8> @sv2i8_7(<2 x i8> %d, <2 x i8> %e) {
;
; CHECK-GI-LABEL: sv2i8_7:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov w8, #7 // =0x7
-; CHECK-GI-NEXT: shl v0.2s, v0.2s, #24
+; CHECK-GI-NEXT: mov w8, #65427 // =0xff93
; CHECK-GI-NEXT: fmov s1, w8
-; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-GI-NEXT: mov v1.h[1], w8
-; CHECK-GI-NEXT: fmov w9, s0
-; CHECK-GI-NEXT: mov w10, v0.s[1]
; CHECK-GI-NEXT: shl v1.4h, v1.4h, #8
-; CHECK-GI-NEXT: sdiv w9, w9, w8
; CHECK-GI-NEXT: sshr v1.4h, v1.4h, #8
-; CHECK-GI-NEXT: smov w11, v1.h[1]
-; CHECK-GI-NEXT: sdiv w8, w10, w8
-; CHECK-GI-NEXT: smov w10, v1.h[0]
+; CHECK-GI-NEXT: smov w8, v1.h[0]
+; CHECK-GI-NEXT: smov w9, v1.h[1]
+; CHECK-GI-NEXT: shl v1.2s, v0.2s, #24
+; CHECK-GI-NEXT: sshr v1.2s, v1.2s, #24
+; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: mov w8, #8 // =0x8
+; CHECK-GI-NEXT: mov v2.s[1], w9
+; CHECK-GI-NEXT: mul v1.2s, v1.2s, v2.2s
+; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: mov v2.h[1], w8
+; CHECK-GI-NEXT: mov w8, #2 // =0x2
+; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h
+; CHECK-GI-NEXT: neg v2.4h, v2.4h
+; CHECK-GI-NEXT: sshl v1.4h, v1.4h, v2.4h
+; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: mov v2.b[1], w8
+; CHECK-GI-NEXT: mov w8, #7 // =0x7
+; CHECK-GI-NEXT: fmov s3, w8
+; CHECK-GI-NEXT: add v1.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT: mov v3.b[1], w8
+; CHECK-GI-NEXT: neg v2.8b, v2.8b
+; CHECK-GI-NEXT: mov w9, v1.s[1]
+; CHECK-GI-NEXT: mov v1.b[1], w9
+; CHECK-GI-NEXT: sshl v1.8b, v1.8b, v2.8b
+; CHECK-GI-NEXT: neg v2.8b, v3.8b
+; CHECK-GI-NEXT: movi v3.2s, #7
+; CHECK-GI-NEXT: ushl v2.8b, v1.8b, v2.8b
+; CHECK-GI-NEXT: umov w8, v1.b[0]
+; CHECK-GI-NEXT: umov w10, v1.b[1]
+; CHECK-GI-NEXT: umov w9, v2.b[0]
+; CHECK-GI-NEXT: umov w11, v2.b[1]
+; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: fmov s2, w9
-; CHECK-GI-NEXT: fmov s1, w10
-; CHECK-GI-NEXT: mov v1.s[1], w11
-; CHECK-GI-NEXT: mov v2.s[1], w8
-; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: mov v1.s[1], w10
+; CHECK-GI-NEXT: mov v2.s[1], w11
+; CHECK-GI-NEXT: add v1.2s, v1.2s, v2.2s
+; CHECK-GI-NEXT: mls v0.2s, v1.2s, v3.2s
; CHECK-GI-NEXT: ret
entry:
%s = srem <2 x i8> %d, <i8 7, i8 7>
@@ -687,25 +753,46 @@ define <2 x i8> @sv2i8_100(<2 x i8> %d, <2 x i8> %e) {
;
; CHECK-GI-LABEL: sv2i8_100:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov w8, #100 // =0x64
-; CHECK-GI-NEXT: shl v0.2s, v0.2s, #24
+; CHECK-GI-NEXT: mov w8, #41 // =0x29
; CHECK-GI-NEXT: fmov s1, w8
-; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-GI-NEXT: mov v1.h[1], w8
-; CHECK-GI-NEXT: fmov w9, s0
-; CHECK-GI-NEXT: mov w10, v0.s[1]
; CHECK-GI-NEXT: shl v1.4h, v1.4h, #8
-; CHECK-GI-NEXT: sdiv w9, w9, w8
; CHECK-GI-NEXT: sshr v1.4h, v1.4h, #8
-; CHECK-GI-NEXT: smov w11, v1.h[1]
-; CHECK-GI-NEXT: sdiv w8, w10, w8
-; CHECK-GI-NEXT: smov w10, v1.h[0]
+; CHECK-GI-NEXT: smov w8, v1.h[0]
+; CHECK-GI-NEXT: smov w9, v1.h[1]
+; CHECK-GI-NEXT: shl v1.2s, v0.2s, #24
+; CHECK-GI-NEXT: sshr v1.2s, v1.2s, #24
+; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: mov w8, #8 // =0x8
+; CHECK-GI-NEXT: mov v2.s[1], w9
+; CHECK-GI-NEXT: mul v1.2s, v1.2s, v2.2s
+; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: mov v2.h[1], w8
+; CHECK-GI-NEXT: mov w8, #4 // =0x4
+; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h
+; CHECK-GI-NEXT: fmov s3, w8
+; CHECK-GI-NEXT: neg v2.4h, v2.4h
+; CHECK-GI-NEXT: mov v3.b[1], w8
+; CHECK-GI-NEXT: mov w8, #7 // =0x7
+; CHECK-GI-NEXT: sshl v1.4h, v1.4h, v2.4h
+; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: neg v3.8b, v3.8b
+; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b
+; CHECK-GI-NEXT: mov v2.b[1], w8
+; CHECK-GI-NEXT: sshl v1.8b, v1.8b, v3.8b
+; CHECK-GI-NEXT: neg v2.8b, v2.8b
+; CHECK-GI-NEXT: movi v3.2s, #100
+; CHECK-GI-NEXT: ushl v2.8b, v1.8b, v2.8b
+; CHECK-GI-NEXT: umov w8, v1.b[0]
+; CHECK-GI-NEXT: umov w10, v1.b[1]
+; CHECK-GI-NEXT: umov w9, v2.b[0]
+; CHECK-GI-NEXT: umov w11, v2.b[1]
+; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: fmov s2, w9
-; CHECK-GI-NEXT: fmov s1, w10
-; CHECK-GI-NEXT: mov v1.s[1], w11
-; CHECK-GI-NEXT: mov v2.s[1], w8
-; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: mov v1.s[1], w10
+; CHECK-GI-NEXT: mov v2.s[1], w11
+; CHECK-GI-NEXT: add v1.2s, v1.2s, v2.2s
+; CHECK-GI-NEXT: mls v0.2s, v1.2s, v3.2s
; CHECK-GI-NEXT: ret
entry:
%s = srem <2 x i8> %d, <i8 100, i8 100>
@@ -872,30 +959,37 @@ define <4 x i8> @sv4i8_7(<4 x i8> %d, <4 x i8> %e) {
;
; CHECK-GI-LABEL: sv4i8_7:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: mov w8, #7 // =0x7
-; CHECK-GI-NEXT: movi v3.4h, #7
-; CHECK-GI-NEXT: fmov s2, w8
-; CHECK-GI-NEXT: shl v0.4s, v0.4s, #24
-; CHECK-GI-NEXT: mov v2.h[1], w8
-; CHECK-GI-NEXT: sshll v3.4s, v3.4h, #0
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #24
-; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0
-; CHECK-GI-NEXT: fmov w9, s0
-; CHECK-GI-NEXT: mov w10, v0.s[1]
-; CHECK-GI-NEXT: mov w11, v0.s[2]
-; CHECK-GI-NEXT: mov w12, v0.s[3]
-; CHECK-GI-NEXT: mov v3.d[1], v2.d[0]
-; CHECK-GI-NEXT: sdiv w9, w9, w8
-; CHECK-GI-NEXT: sdiv w10, w10, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: sdiv w11, w11, w8
-; CHECK-GI-NEXT: mov v1.s[1], w10
-; CHECK-GI-NEXT: sdiv w9, w12, w8
-; CHECK-GI-NEXT: mov v1.s[2], w11
-; CHECK-GI-NEXT: mov v1.s[3], w9
-; CHECK-GI-NEXT: mls v0.4s, v1.4s, v3.4s
-; CHECK-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-NEXT: mov w8, #147 // =0x93
+; CHECK-GI-NEXT: shl v2.4h, v0.4h, #8
+; CHECK-GI-NEXT: mov w9, #7 // =0x7
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: fmov s4, w9
+; CHECK-GI-NEXT: sshr v2.4h, v2.4h, #8
+; CHECK-GI-NEXT: mov v1.b[1], w8
+; CHECK-GI-NEXT: mov v4.b[1], w9
+; CHECK-GI-NEXT: mov v1.b[2], w8
+; CHECK-GI-NEXT: mov v4.b[2], w9
+; CHECK-GI-NEXT: mov v1.b[3], w8
+; CHECK-GI-NEXT: mov w8, #2 // =0x2
+; CHECK-GI-NEXT: mov v4.b[3], w9
+; CHECK-GI-NEXT: fmov s3, w8
+; CHECK-GI-NEXT: mov v3.b[1], w8
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mul v1.4h, v2.4h, v1.4h
+; CHECK-GI-NEXT: fmov d2, d0
+; CHECK-GI-NEXT: mov v3.b[2], w8
+; CHECK-GI-NEXT: ssra v2.4h, v1.4h...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/148845
More information about the llvm-commits
mailing list