[llvm] 4cf3db4 - [GlobalISel] Add sdiv exact (X, constant) -> mul combine.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 1 05:34:10 PDT 2022
Author: Amara Emerson
Date: 2022-09-01T13:34:00+01:00
New Revision: 4cf3db41daa936626fd1857ce1f962b38374f2af
URL: https://github.com/llvm/llvm-project/commit/4cf3db41daa936626fd1857ce1f962b38374f2af
DIFF: https://github.com/llvm/llvm-project/commit/4cf3db41daa936626fd1857ce1f962b38374f2af.diff
LOG: [GlobalISel] Add sdiv exact (X, constant) -> mul combine.
This port of the SDAG optimization is only for exact sdiv case.
Differential Revision: https://reviews.llvm.org/D130517
Added:
llvm/test/CodeGen/AArch64/GlobalISel/combine-sdiv.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/lib/CodeGen/GlobalISel/Utils.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 73edc3c379704..fa87a48ba3514 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -647,6 +647,13 @@ class CombinerHelper {
bool matchUDivByConst(MachineInstr &MI);
void applyUDivByConst(MachineInstr &MI);
+ /// Given an G_SDIV \p MI expressing a signed divide by constant, return an
+ /// expression that implements it by multiplying by a magic number.
+ /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+ MachineInstr *buildSDivUsingMul(MachineInstr &MI);
+ bool matchSDivByConst(MachineInstr &MI);
+ void applySDivByConst(MachineInstr &MI);
+
// G_UMULH x, (1 << c)) -> x >> (bitwidth - c)
bool matchUMulHToLShr(MachineInstr &MI);
void applyUMulHToLShr(MachineInstr &MI);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 89f08d2000215..f8313adf3f92c 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -764,7 +764,13 @@ def udiv_by_const : GICombineRule<
[{ return Helper.matchUDivByConst(*${root}); }]),
(apply [{ Helper.applyUDivByConst(*${root}); }])>;
-def intdiv_combines : GICombineGroup<[udiv_by_const]>;
+def sdiv_by_const : GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_SDIV):$root,
+ [{ return Helper.matchSDivByConst(*${root}); }]),
+ (apply [{ Helper.applySDivByConst(*${root}); }])>;
+
+def intdiv_combines : GICombineGroup<[udiv_by_const, sdiv_by_const]>;
def reassoc_ptradd : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 05a25bc3078eb..84a6feaa6513f 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4935,6 +4935,108 @@ void CombinerHelper::applyUDivByConst(MachineInstr &MI) {
replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
}
+bool CombinerHelper::matchSDivByConst(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
+ Register Dst = MI.getOperand(0).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(Dst);
+
+ auto &MF = *MI.getMF();
+ AttributeList Attr = MF.getFunction().getAttributes();
+ const auto &TLI = getTargetLowering();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
+ return false;
+
+ // Don't do this for minsize because the instruction sequence is usually
+ // larger.
+ if (MF.getFunction().hasMinSize())
+ return false;
+
+ // If the sdiv has an 'exact' flag we can use a simpler lowering.
+ if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
+ return matchUnaryPredicate(
+ MRI, RHS, [](const Constant *C) { return C && !C->isZeroValue(); });
+ }
+
+ // Don't support the general case for now.
+ return false;
+}
+
+void CombinerHelper::applySDivByConst(MachineInstr &MI) {
+ auto *NewMI = buildSDivUsingMul(MI);
+ replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
+}
+
+MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
+ auto &SDiv = cast<GenericMachineInstr>(MI);
+ Register Dst = SDiv.getReg(0);
+ Register LHS = SDiv.getReg(1);
+ Register RHS = SDiv.getReg(2);
+ LLT Ty = MRI.getType(Dst);
+ LLT ScalarTy = Ty.getScalarType();
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
+ auto &MIB = Builder;
+ MIB.setInstrAndDebugLoc(MI);
+
+ bool UseSRA = false;
+ SmallVector<Register, 16> Shifts, Factors;
+
+ auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
+ bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).hasValue();
+
+ auto BuildSDIVPattern = [&](const Constant *C) {
+ // Don't recompute inverses for each splat element.
+ if (IsSplat && !Factors.empty()) {
+ Shifts.push_back(Shifts[0]);
+ Factors.push_back(Factors[0]);
+ return true;
+ }
+
+ auto *CI = cast<ConstantInt>(C);
+ APInt Divisor = CI->getValue();
+ unsigned Shift = Divisor.countTrailingZeros();
+ if (Shift) {
+ Divisor.ashrInPlace(Shift);
+ UseSRA = true;
+ }
+
+ // Calculate the multiplicative inverse modulo BW.
+ // 2^W requires W + 1 bits, so we have to extend and then truncate.
+ unsigned W = Divisor.getBitWidth();
+ APInt Factor = Divisor.zext(W + 1)
+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
+ .trunc(W);
+ Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
+ Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
+ return true;
+ };
+
+ // Collect all magic values from the build vector.
+ bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
+ (void)Matched;
+ assert(Matched && "Expected unary predicate match to succeed");
+
+ Register Shift, Factor;
+ if (Ty.isVector()) {
+ Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
+ Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
+ } else {
+ Shift = Shifts[0];
+ Factor = Factors[0];
+ }
+
+ Register Res = LHS;
+
+ if (UseSRA)
+ Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
+
+ return MIB.buildMul(Ty, Res, Factor);
+}
+
bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UMULH);
Register RHS = MI.getOperand(2).getReg();
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 15c21fe48c43e..a6a51f35ab7c9 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1077,8 +1077,8 @@ Optional<APInt> llvm::getIConstantSplatVal(const Register Reg,
return None;
}
-Optional<APInt> getIConstantSplatVal(const MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
+Optional<APInt> llvm::getIConstantSplatVal(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
return getIConstantSplatVal(MI.getOperand(0).getReg(), MRI);
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-sdiv.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-sdiv.mir
new file mode 100644
index 0000000000000..e99ee84100a39
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-sdiv.mir
@@ -0,0 +1,133 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
+--- |
+ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+ define void @sdiv_exact() { ret void }
+ define void @sdiv_noexact() { ret void }
+ define void @sdiv_exact_minsize() #0 { ret void }
+ define void @div_v4s32() { ret void }
+ define void @div_v4s32_splat() { ret void }
+
+ attributes #0 = { minsize }
+
+...
+---
+name: sdiv_exact
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: sdiv_exact
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -991146299
+ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = exact G_ASHR [[COPY]], [[C]](s32)
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[ASHR]], [[C1]]
+ ; CHECK-NEXT: $w0 = COPY [[MUL]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = G_CONSTANT i32 104
+ %2:_(s32) = exact G_SDIV %0, %1
+ $w0 = COPY %2(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: sdiv_noexact
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: sdiv_noexact
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 104
+ ; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[C]]
+ ; CHECK-NEXT: $w0 = COPY [[SDIV]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = G_CONSTANT i32 104
+ %2:_(s32) = G_SDIV %0, %1
+ $w0 = COPY %2(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: sdiv_exact_minsize
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: sdiv_exact_minsize
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 104
+ ; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s32) = exact G_SDIV [[COPY]], [[C]]
+ ; CHECK-NEXT: $w0 = COPY [[SDIV]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = G_CONSTANT i32 104
+ %2:_(s32) = exact G_SDIV %0, %1
+ $w0 = COPY %2(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: div_v4s32
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: div_v4s32
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -991146299
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 954437177
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C2]](s32), [[C1]](s32), [[C2]](s32)
+ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(<4 x s32>) = exact G_ASHR [[COPY]], [[BUILD_VECTOR]](<4 x s32>)
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<4 x s32>) = G_MUL [[ASHR]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: $q0 = COPY [[MUL]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<4 x s32>) = COPY $q0
+ %c1:_(s32) = G_CONSTANT i32 104
+ %c2:_(s32) = G_CONSTANT i32 72
+ %1:_(<4 x s32>) = G_BUILD_VECTOR %c1(s32), %c2(s32), %c1(s32), %c2(s32)
+ %3:_(<4 x s32>) = exact G_SDIV %0, %1
+ $q0 = COPY %3(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: div_v4s32_splat
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: div_v4s32_splat
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -991146299
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
+ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(<4 x s32>) = exact G_ASHR [[COPY]], [[BUILD_VECTOR]](<4 x s32>)
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<4 x s32>) = G_MUL [[ASHR]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: $q0 = COPY [[MUL]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<4 x s32>) = COPY $q0
+ %c1:_(s32) = G_CONSTANT i32 104
+ %1:_(<4 x s32>) = G_BUILD_VECTOR %c1(s32), %c1(s32), %c1(s32), %c1(s32)
+ %3:_(<4 x s32>) = exact G_SDIV %0, %1
+ $q0 = COPY %3(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
More information about the llvm-commits
mailing list