[llvm] [GlobalIsel] Allow expansion of urem by constant in prelegalizer (PR #145914)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 26 07:59:55 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: None (jyli0116)
<details>
<summary>Changes</summary>
This patch allows urem by a constant to be expanded more efficiently to avoid the need for expensive udiv instructions.
---
Patch is 49.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145914.diff
7 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+7-3)
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+9-1)
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+61-8)
- (added) llvm/test/CodeGen/AArch64/GlobalISel/combine_urem.ll (+243)
- (modified) llvm/test/CodeGen/AArch64/pr58431.ll (+5-3)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll (+27-77)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll (+89-506)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index c15263e0b06f8..9139425658480 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -693,18 +693,22 @@ class CombinerHelper {
/// feeding a G_AND instruction \p MI.
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const;
- /// Given an G_UDIV \p MI expressing a divide by constant, return an
- /// expression that implements it by multiplying by a magic number.
+ /// Given an G_UDIV \p MI or G_UREM \p MI expressing a divide by constant,
+ /// return an expression that implements it by multiplying by a magic number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
- MachineInstr *buildUDivUsingMul(MachineInstr &MI) const;
+ MachineInstr *buildUDivorURemUsingMul(MachineInstr &MI) const;
/// Combine G_UDIV by constant into a multiply by magic constant.
bool matchUDivByConst(MachineInstr &MI) const;
void applyUDivByConst(MachineInstr &MI) const;
+ /// Combine G_UREM by constant into a multiply by magic constant.
+ bool matchURemByConst(MachineInstr &MI) const;
+ void applyURemByConst(MachineInstr &MI) const;
/// Given an G_SDIV \p MI expressing a signed divide by constant, return an
/// expression that implements it by multiplying by a magic number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
MachineInstr *buildSDivUsingMul(MachineInstr &MI) const;
+ /// Combine G_SDIV by constant into a multiply by magic constant.
bool matchSDivByConst(MachineInstr &MI) const;
void applySDivByConst(MachineInstr &MI) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 4a92dc16c1bf4..52cbbf91849b6 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1156,6 +1156,14 @@ def udiv_by_pow2 : GICombineRule<
def intdiv_combines : GICombineGroup<[udiv_by_const, sdiv_by_const,
sdiv_by_pow2, udiv_by_pow2]>;
+def urem_by_const : GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_UREM):$root,
+ [{ return Helper.matchURemByConst(*${root}); }]),
+ (apply [{ Helper.applyURemByConst(*${root}); }])>;
+
+def intrem_combines : GICombineGroup<[urem_by_const]>;
+
def reassoc_ptradd : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_PTR_ADD):$root,
@@ -2048,7 +2056,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
constant_fold_cast_op, fabs_fneg_fold,
intdiv_combines, mulh_combines, redundant_neg_operands,
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
- sub_add_reg, select_to_minmax,
+ intrem_combines, sub_add_reg, select_to_minmax,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
simplify_neg_minmax, combine_concat_vector,
sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b1e851183de0d..c511f27a5e8ce 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5288,12 +5288,13 @@ bool CombinerHelper::matchSubAddSameReg(MachineInstr &MI,
return false;
}
-MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) const {
- assert(MI.getOpcode() == TargetOpcode::G_UDIV);
- auto &UDiv = cast<GenericMachineInstr>(MI);
- Register Dst = UDiv.getReg(0);
- Register LHS = UDiv.getReg(1);
- Register RHS = UDiv.getReg(2);
+MachineInstr *CombinerHelper::buildUDivorURemUsingMul(MachineInstr &MI) const {
+ unsigned opcode = MI.getOpcode();
+ assert(opcode == TargetOpcode::G_UDIV || opcode == TargetOpcode::G_UREM);
+ auto &UDivorRem = cast<GenericMachineInstr>(MI);
+ Register Dst = UDivorRem.getReg(0);
+ Register LHS = UDivorRem.getReg(1);
+ Register RHS = UDivorRem.getReg(2);
LLT Ty = MRI.getType(Dst);
LLT ScalarTy = Ty.getScalarType();
const unsigned EltBits = ScalarTy.getScalarSizeInBits();
@@ -5446,7 +5447,13 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) const {
auto IsOne = MIB.buildICmp(
CmpInst::Predicate::ICMP_EQ,
Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
- return MIB.buildSelect(Ty, IsOne, LHS, Q);
+ auto ret = MIB.buildSelect(Ty, IsOne, LHS, Q);
+
+ if (opcode == TargetOpcode::G_UREM) {
+ auto Prod = MIB.buildMul(Ty, ret, RHS);
+ return MIB.buildSub(Ty, LHS, Prod);
+ }
+ return ret;
}
bool CombinerHelper::matchUDivByConst(MachineInstr &MI) const {
@@ -5494,7 +5501,53 @@ bool CombinerHelper::matchUDivByConst(MachineInstr &MI) const {
}
void CombinerHelper::applyUDivByConst(MachineInstr &MI) const {
- auto *NewMI = buildUDivUsingMul(MI);
+ auto *NewMI = buildUDivorURemUsingMul(MI);
+ replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
+}
+
+bool CombinerHelper::matchURemByConst(MachineInstr &MI) const {
+ assert(MI.getOpcode() == TargetOpcode::G_UREM);
+ Register Dst = MI.getOperand(0).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(Dst);
+
+ auto &MF = *MI.getMF();
+ AttributeList Attr = MF.getFunction().getAttributes();
+ const auto &TLI = getTargetLowering();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
+ return false;
+
+ // Don't do this for minsize because the instruction sequence is usually
+ // larger.
+ if (MF.getFunction().hasMinSize())
+ return false;
+
+ auto *RHSDef = MRI.getVRegDef(RHS);
+ if (!isConstantOrConstantVector(*RHSDef, MRI))
+ return false;
+
+ // Don't do this if the types are not going to be legal.
+ if (LI) {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
+ return false;
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
+ return false;
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ICMP,
+ {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
+ DstTy}}))
+ return false;
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
+ return false;
+ }
+
+ return matchUnaryPredicate(
+ MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
+}
+
+void CombinerHelper::applyURemByConst(MachineInstr &MI) const {
+ auto *NewMI = buildUDivorURemUsingMul(MI);
replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine_urem.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine_urem.ll
new file mode 100644
index 0000000000000..0cf827410c30c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine_urem.ll
@@ -0,0 +1,243 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=CHECK-SD
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=CHECK-GI
+
+
+define i8 @test7s8(i8 %a) {
+; CHECK-SD-LABEL: test7s8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #37 // =0x25
+; CHECK-SD-NEXT: and w9, w0, #0xff
+; CHECK-SD-NEXT: mul w8, w9, w8
+; CHECK-SD-NEXT: lsr w8, w8, #8
+; CHECK-SD-NEXT: sub w9, w0, w8
+; CHECK-SD-NEXT: and w9, w9, #0xfe
+; CHECK-SD-NEXT: add w8, w8, w9, lsr #1
+; CHECK-SD-NEXT: lsr w8, w8, #2
+; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
+; CHECK-SD-NEXT: add w0, w0, w8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test7s8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #37 // =0x25
+; CHECK-GI-NEXT: and w9, w0, #0xff
+; CHECK-GI-NEXT: mul w8, w9, w8
+; CHECK-GI-NEXT: lsr w8, w8, #8
+; CHECK-GI-NEXT: sub w9, w0, w8
+; CHECK-GI-NEXT: ubfx w9, w9, #1, #7
+; CHECK-GI-NEXT: add w8, w9, w8
+; CHECK-GI-NEXT: ubfx w8, w8, #2, #6
+; CHECK-GI-NEXT: lsl w9, w8, #3
+; CHECK-GI-NEXT: sub w8, w9, w8
+; CHECK-GI-NEXT: sub w0, w0, w8
+; CHECK-GI-NEXT: ret
+ %r = urem i8 %a, 7
+ ret i8 %r
+}
+
+define i8 @test100s8(i8 %a) {
+; CHECK-SD-LABEL: test100s8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #41 // =0x29
+; CHECK-SD-NEXT: and w9, w0, #0xff
+; CHECK-SD-NEXT: mul w8, w9, w8
+; CHECK-SD-NEXT: mov w9, #100 // =0x64
+; CHECK-SD-NEXT: lsr w8, w8, #12
+; CHECK-SD-NEXT: msub w0, w8, w9, w0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test100s8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #41 // =0x29
+; CHECK-GI-NEXT: and w9, w0, #0xff
+; CHECK-GI-NEXT: mul w8, w9, w8
+; CHECK-GI-NEXT: mov w9, #100 // =0x64
+; CHECK-GI-NEXT: lsr w8, w8, #8
+; CHECK-GI-NEXT: lsr w8, w8, #4
+; CHECK-GI-NEXT: msub w0, w8, w9, w0
+; CHECK-GI-NEXT: ret
+ %r = urem i8 %a, 100
+ ret i8 %r
+}
+
+define i32 @test7s32(i32 %a) {
+; CHECK-SD-LABEL: test7s32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #18725 // =0x4925
+; CHECK-SD-NEXT: movk w8, #9362, lsl #16
+; CHECK-SD-NEXT: umull x8, w0, w8
+; CHECK-SD-NEXT: lsr x8, x8, #32
+; CHECK-SD-NEXT: sub w9, w0, w8
+; CHECK-SD-NEXT: add w8, w8, w9, lsr #1
+; CHECK-SD-NEXT: lsr w8, w8, #2
+; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
+; CHECK-SD-NEXT: add w0, w0, w8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test7s32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #18725 // =0x4925
+; CHECK-GI-NEXT: movk w8, #9362, lsl #16
+; CHECK-GI-NEXT: umull x8, w0, w8
+; CHECK-GI-NEXT: lsr x8, x8, #32
+; CHECK-GI-NEXT: sub w9, w0, w8
+; CHECK-GI-NEXT: add w8, w8, w9, lsr #1
+; CHECK-GI-NEXT: lsr w8, w8, #2
+; CHECK-GI-NEXT: lsl w9, w8, #3
+; CHECK-GI-NEXT: sub w8, w9, w8
+; CHECK-GI-NEXT: sub w0, w0, w8
+; CHECK-GI-NEXT: ret
+ %c = urem i32 %a, 7
+ ret i32 %c
+}
+
+define i32 @test100s32(i32 %a) {
+; CHECK-SD-LABEL: test100s32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
+; CHECK-SD-NEXT: mov w9, #100 // =0x64
+; CHECK-SD-NEXT: movk w8, #20971, lsl #16
+; CHECK-SD-NEXT: umull x8, w0, w8
+; CHECK-SD-NEXT: lsr x8, x8, #37
+; CHECK-SD-NEXT: msub w0, w8, w9, w0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test100s32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #34079 // =0x851f
+; CHECK-GI-NEXT: mov w9, #100 // =0x64
+; CHECK-GI-NEXT: movk w8, #20971, lsl #16
+; CHECK-GI-NEXT: umull x8, w0, w8
+; CHECK-GI-NEXT: lsr x8, x8, #32
+; CHECK-GI-NEXT: lsr w8, w8, #5
+; CHECK-GI-NEXT: msub w0, w8, w9, w0
+; CHECK-GI-NEXT: ret
+ %c = urem i32 %a, 100
+ ret i32 %c
+}
+
+define <8 x i16> @test7v8s16(<8 x i16> %a) {
+; CHECK-SD-LABEL: test7v8s16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #9363 // =0x2493
+; CHECK-SD-NEXT: dup v1.8h, w8
+; CHECK-SD-NEXT: umull2 v2.4s, v0.8h, v1.8h
+; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v2.8h
+; CHECK-SD-NEXT: sub v2.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: usra v1.8h, v2.8h, #1
+; CHECK-SD-NEXT: movi v2.8h, #7
+; CHECK-SD-NEXT: ushr v1.8h, v1.8h, #2
+; CHECK-SD-NEXT: mls v0.8h, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test7v8s16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI4_0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
+; CHECK-GI-NEXT: umull2 v2.4s, v0.8h, v1.8h
+; CHECK-GI-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: uzp2 v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: sub v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: usra v1.8h, v2.8h, #1
+; CHECK-GI-NEXT: movi v2.8h, #7
+; CHECK-GI-NEXT: ushr v1.8h, v1.8h, #2
+; CHECK-GI-NEXT: mls v0.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: ret
+ %r = urem <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+ ret <8 x i16> %r
+}
+
+define <8 x i16> @test100v8s16(<8 x i16> %a) {
+; CHECK-SD-LABEL: test100v8s16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #5243 // =0x147b
+; CHECK-SD-NEXT: ushr v2.8h, v0.8h, #2
+; CHECK-SD-NEXT: dup v1.8h, w8
+; CHECK-SD-NEXT: umull2 v3.4s, v2.8h, v1.8h
+; CHECK-SD-NEXT: umull v1.4s, v2.4h, v1.4h
+; CHECK-SD-NEXT: movi v2.8h, #100
+; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v3.8h
+; CHECK-SD-NEXT: ushr v1.8h, v1.8h, #1
+; CHECK-SD-NEXT: mls v0.8h, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test100v8s16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI5_0
+; CHECK-GI-NEXT: ushr v1.8h, v0.8h, #2
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI5_0]
+; CHECK-GI-NEXT: umull2 v3.4s, v1.8h, v2.8h
+; CHECK-GI-NEXT: umull v1.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: movi v2.8h, #100
+; CHECK-GI-NEXT: uzp2 v1.8h, v1.8h, v3.8h
+; CHECK-GI-NEXT: ushr v1.8h, v1.8h, #1
+; CHECK-GI-NEXT: mls v0.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: ret
+ %r = urem <8 x i16> %a, <i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100>
+ ret <8 x i16> %r
+}
+
+define <4 x i32> @test7v4s32(<4 x i32> %a) {
+; CHECK-SD-LABEL: test7v4s32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #18725 // =0x4925
+; CHECK-SD-NEXT: movk w8, #9362, lsl #16
+; CHECK-SD-NEXT: dup v1.4s, w8
+; CHECK-SD-NEXT: umull2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-NEXT: umull v1.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT: uzp2 v1.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT: sub v2.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: usra v1.4s, v2.4s, #1
+; CHECK-SD-NEXT: movi v2.4s, #7
+; CHECK-SD-NEXT: ushr v1.4s, v1.4s, #2
+; CHECK-SD-NEXT: mls v0.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test7v4s32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI6_0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI6_0]
+; CHECK-GI-NEXT: umull2 v2.2d, v0.4s, v1.4s
+; CHECK-GI-NEXT: umull v1.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: uzp2 v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: sub v2.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: usra v1.4s, v2.4s, #1
+; CHECK-GI-NEXT: movi v2.4s, #7
+; CHECK-GI-NEXT: ushr v1.4s, v1.4s, #2
+; CHECK-GI-NEXT: mls v0.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: ret
+ %r = urem <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+ ret <4 x i32> %r
+}
+
+define <4 x i32> @test100v4s32(<4 x i32> %a) {
+; CHECK-SD-LABEL: test100v4s32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
+; CHECK-SD-NEXT: movk w8, #20971, lsl #16
+; CHECK-SD-NEXT: dup v1.4s, w8
+; CHECK-SD-NEXT: umull2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-NEXT: umull v1.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT: uzp2 v1.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT: movi v2.4s, #100
+; CHECK-SD-NEXT: ushr v1.4s, v1.4s, #5
+; CHECK-SD-NEXT: mls v0.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test100v4s32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI7_0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI7_0]
+; CHECK-GI-NEXT: umull2 v2.2d, v0.4s, v1.4s
+; CHECK-GI-NEXT: umull v1.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: uzp2 v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: movi v2.4s, #100
+; CHECK-GI-NEXT: ushr v1.4s, v1.4s, #5
+; CHECK-GI-NEXT: mls v0.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: ret
+ %r = urem <4 x i32> %a, <i32 100, i32 100, i32 100, i32 100>
+ ret <4 x i32> %r
+}
+
diff --git a/llvm/test/CodeGen/AArch64/pr58431.ll b/llvm/test/CodeGen/AArch64/pr58431.ll
index 88bab4af95d64..467ceb062f249 100644
--- a/llvm/test/CodeGen/AArch64/pr58431.ll
+++ b/llvm/test/CodeGen/AArch64/pr58431.ll
@@ -4,10 +4,12 @@
define i32 @f(i64 %0) {
; CHECK-LABEL: f:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #10 // =0xa
+; CHECK-NEXT: mov x8, #-7378697629483820647 // =0x9999999999999999
; CHECK-NEXT: mov w9, w0
-; CHECK-NEXT: udiv x10, x9, x8
-; CHECK-NEXT: msub x0, x10, x8, x9
+; CHECK-NEXT: mov w10, #10 // =0xa
+; CHECK-NEXT: eor x8, x8, #0x8000000000000003
+; CHECK-NEXT: umulh x8, x9, x8
+; CHECK-NEXT: msub x0, x8, x10, x9
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%2 = trunc i64 %0 to i32
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
index 24ec4fa48f778..6ae2f56f6ae6d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
@@ -211,91 +211,41 @@ define i32 @v_urem_i32_oddk_denom(i32 %num) {
; CHECK-LABEL: v_urem_i32_oddk_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, 0x4996c7d8
-; CHECK-NEXT: v_mov_b32_e32 v2, 0xffed2705
-; CHECK-NEXT: v_mov_b32_e32 v3, 0x12d8fb
-; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
-; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_lo_u32 v2, v1, v2
-; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
-; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT: v_mov_b32_e32 v1, 0xb2a50881
; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
-; CHECK-NEXT: v_mul_lo_u32 v1, v1, v3
+; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
+; CHECK-NEXT: v_lshrrev_b32_e32 v2, 1, v2
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1
+; CHECK-NEXT: v_lshrrev_b32_e32 v1, 20, v1
+; CHECK-NEXT: v_mov_b32_e32 v2, 0x12d8fb
+; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT: v_add_i32_e32 v1, vcc, 0xffed2705, v0
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; CHECK-NEXT: v_add_i32_e32 v1, vcc, 0xffed2705, v0
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = urem i32 %num, 1235195
ret i32 %result
}
define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
-; GISEL-LABEL: v_urem_v2i32_oddk_denom:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT: v_mov_b32_e32 v2, 0x12d8fb
-; GISEL-NEXT: v_cvt_f32_u32_e32 v3, 0x12d8fb
-; GISEL-NEXT: v_mov_b32_e32 v4, 0xffed2705
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
-; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
-; GISEL-NEXT: v_mul_lo_u32 v5, v3, v4
-; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5
-; GISEL-NEXT: v_mul_hi_u32 v5, v0, v3
-; GISEL-NEXT: v_mul_hi_u32 v3, v1, v3
-; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2
-; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v0, v4
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, 0xffed2705, v1
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v0, v4
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, 0xffed2705, v1
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_urem_v2i32_oddk_denom:
-; CGP: ; %bb.0:
-; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT: v_rcp_iflag_f32_e32 v2, 0x4996c7d8
-; CGP-NEXT: v_mov_b32_e32 v3, 0xffed2705
-; CGP-NEXT: v_mov_b32_e32 v4, 0x12d8fb
-; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
-; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
-; CGP-NEXT: v_mul_lo_u32 v5, v2, v3
-; CGP-NEXT: v_mul_hi_u32 v5, v2, v5
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5
-; CGP-NEXT: v_mul_hi_u32 v5, v0, v2
-; CGP-NEXT: v_mul_hi_u32 v2, v1, v2
-; CGP-NEXT: v_mul_lo_u32 v5, v5, v4
-; CGP-NEXT: v_mul_lo_u32 v2, v2, v4
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v0, v3
-; CGP-NEXT: v_add_i32_e32 v5, vcc, 0xffed2705, v1
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; CGP-NEXT: v_cmp_g...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/145914
More information about the llvm-commits
mailing list