[llvm] [CombinerHelper]: Use undef for handling divisors of one (PR #91037)
via llvm-commits
llvm-commits at lists.llvm.org
Sun May 19 16:11:38 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: AtariDreams (AtariDreams)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/91037.diff
3 Files Affected:
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+31-21)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll (+68-27)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir (+12-11)
``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 22eb4a3e0d7cb..644f56b6bf276 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5099,6 +5099,8 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
auto &MIB = Builder;
bool UseNPQ = false;
+ bool UsePreShift = false;
+ bool UsePostShift = false;
SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
auto BuildUDIVPattern = [&](const Constant *C) {
@@ -5111,27 +5113,30 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
// Magic algorithm doesn't work for division by 1. We need to emit a select
// at the end.
- // TODO: Use undef values for divisor of 1.
- if (!Divisor.isOne()) {
-
- // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
- // in the dividend exceeds the leading zeros for the divisor.
- UnsignedDivisionByConstantInfo magics =
- UnsignedDivisionByConstantInfo::get(
- Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
-
- Magic = std::move(magics.Magic);
-
- assert(magics.PreShift < Divisor.getBitWidth() &&
- "We shouldn't generate an undefined shift!");
- assert(magics.PostShift < Divisor.getBitWidth() &&
- "We shouldn't generate an undefined shift!");
- assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
- PreShift = magics.PreShift;
- PostShift = magics.PostShift;
- SelNPQ = magics.IsAdd;
+ if (Divisor.isOne()) {
+ PreShifts.push_back(MIB.buildUndef(ScalarShiftAmtTy).getReg(0));
+ MagicFactors.push_back(MIB.buildUndef(ScalarTy).getReg(0));
+ NPQFactors.push_back(MIB.buildUndef(ScalarTy).getReg(0));
+ PostShifts.push_back(MIB.buildUndef(ScalarShiftAmtTy).getReg(0));
+ return true;
}
+ // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
+ // in the dividend exceeds the leading zeros for the divisor.
+ UnsignedDivisionByConstantInfo magics = UnsignedDivisionByConstantInfo::get(
+ Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
+
+ Magic = std::move(magics.Magic);
+
+ assert(magics.PreShift < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ assert(magics.PostShift < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
+ PreShift = magics.PreShift;
+ PostShift = magics.PostShift;
+ SelNPQ = magics.IsAdd;
+
PreShifts.push_back(
MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
@@ -5143,6 +5148,8 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
PostShifts.push_back(
MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
UseNPQ |= SelNPQ;
+ UsePreShift |= PreShift != 0;
+ UsePostShift |= magics.PostShift != 0;
return true;
};
@@ -5167,7 +5174,9 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
}
Register Q = LHS;
- Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
+
+ if (UsePreShift)
+ Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
// Multiply the numerator (operand 0) by the magic value.
Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
@@ -5185,7 +5194,8 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
}
- Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
+ if (UsePostShift)
+ Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
auto One = MIB.buildConstant(Ty, 1);
auto IsOne = MIB.buildICmp(
CmpInst::Predicate::ICMP_EQ,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
index c97a00ccdd455..ab2f2cb5f12a9 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
@@ -169,17 +169,17 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
;
; GISEL-LABEL: combine_vec_udiv_nonuniform4:
; GISEL: // %bb.0:
-; GISEL-NEXT: adrp x8, .LCPI4_2
-; GISEL-NEXT: adrp x9, .LCPI4_0
-; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2]
-; GISEL-NEXT: adrp x8, .LCPI4_1
-; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_0]
-; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_1]
+; GISEL-NEXT: mov w8, #171 // =0xab
+; GISEL-NEXT: fmov s1, w8
+; GISEL-NEXT: adrp x8, .LCPI4_0
+; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_0]
+; GISEL-NEXT: mov w8, #7 // =0x7
; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b
; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b
+; GISEL-NEXT: shl v3.16b, v3.16b, #7
; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b
-; GISEL-NEXT: neg v2.16b, v3.16b
-; GISEL-NEXT: shl v3.16b, v4.16b, #7
+; GISEL-NEXT: fmov s2, w8
+; GISEL-NEXT: neg v2.16b, v2.16b
; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b
; GISEL-NEXT: sshr v2.16b, v3.16b, #7
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
@@ -217,25 +217,66 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
;
; GISEL-LABEL: pr38477:
; GISEL: // %bb.0:
-; GISEL-NEXT: adrp x8, .LCPI5_3
-; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_3]
-; GISEL-NEXT: adrp x8, .LCPI5_2
-; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_2]
-; GISEL-NEXT: adrp x8, .LCPI5_0
-; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
+; GISEL-NEXT: mov w8, #4957 // =0x135d
+; GISEL-NEXT: mov w9, #16385 // =0x4001
+; GISEL-NEXT: fmov s1, w8
+; GISEL-NEXT: mov w8, #57457 // =0xe071
+; GISEL-NEXT: fmov s4, w9
+; GISEL-NEXT: fmov s2, w8
+; GISEL-NEXT: mov w8, #4103 // =0x1007
+; GISEL-NEXT: mov w9, #35545 // =0x8ad9
+; GISEL-NEXT: fmov s5, w9
+; GISEL-NEXT: mov w9, #2048 // =0x800
+; GISEL-NEXT: mov v1.h[1], v1.h[0]
+; GISEL-NEXT: fmov s6, w9
+; GISEL-NEXT: adrp x9, .LCPI5_0
+; GISEL-NEXT: mov v1.h[2], v2.h[0]
+; GISEL-NEXT: fmov s2, w8
+; GISEL-NEXT: mov w8, #32768 // =0x8000
+; GISEL-NEXT: fmov s3, w8
+; GISEL-NEXT: mov w8, #0 // =0x0
+; GISEL-NEXT: mov v1.h[3], v2.h[0]
+; GISEL-NEXT: mov v2.h[1], v3.h[0]
+; GISEL-NEXT: mov v1.h[4], v4.h[0]
+; GISEL-NEXT: fmov s4, w8
+; GISEL-NEXT: mov w8, #6 // =0x6
+; GISEL-NEXT: mov v2.h[2], v4.h[0]
+; GISEL-NEXT: mov v1.h[5], v5.h[0]
+; GISEL-NEXT: fmov s5, w8
+; GISEL-NEXT: mov w8, #2115 // =0x843
+; GISEL-NEXT: mov v2.h[3], v4.h[0]
+; GISEL-NEXT: mov v7.h[1], v5.h[0]
+; GISEL-NEXT: mov v1.h[6], v6.h[0]
+; GISEL-NEXT: fmov s6, w8
+; GISEL-NEXT: mov w8, #12 // =0xc
+; GISEL-NEXT: mov v2.h[4], v4.h[0]
+; GISEL-NEXT: mov v7.h[2], v5.h[0]
+; GISEL-NEXT: mov v1.h[7], v6.h[0]
+; GISEL-NEXT: fmov s6, w8
+; GISEL-NEXT: mov w8, #14 // =0xe
+; GISEL-NEXT: fmov s16, w8
+; GISEL-NEXT: mov w8, #4 // =0x4
+; GISEL-NEXT: mov v2.h[5], v4.h[0]
+; GISEL-NEXT: mov v7.h[3], v6.h[0]
+; GISEL-NEXT: umull2 v6.4s, v0.8h, v1.8h
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
-; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
-; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h
-; GISEL-NEXT: umull2 v4.4s, v2.8h, v3.8h
-; GISEL-NEXT: umull v2.4s, v2.4h, v3.4h
-; GISEL-NEXT: ldr d3, [x8, :lo12:.LCPI5_0]
-; GISEL-NEXT: adrp x8, .LCPI5_1
-; GISEL-NEXT: ushll v3.8h, v3.8b, #0
+; GISEL-NEXT: mov v2.h[6], v4.h[0]
+; GISEL-NEXT: mov v7.h[4], v16.h[0]
+; GISEL-NEXT: uzp2 v1.8h, v1.8h, v6.8h
+; GISEL-NEXT: mov v2.h[7], v3.h[0]
+; GISEL-NEXT: mov v7.h[5], v5.h[0]
+; GISEL-NEXT: ldr d5, [x9, :lo12:.LCPI5_0]
+; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
+; GISEL-NEXT: mov v7.h[6], v4.h[0]
+; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h
+; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h
+; GISEL-NEXT: fmov s3, w8
+; GISEL-NEXT: mov v7.h[7], v3.h[0]
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
-; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI5_1]
+; GISEL-NEXT: ushll v3.8h, v5.8b, #0
; GISEL-NEXT: shl v3.8h, v3.8h, #15
; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
-; GISEL-NEXT: neg v2.8h, v4.8h
+; GISEL-NEXT: neg v2.8h, v7.8h
; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
; GISEL-NEXT: sshr v2.8h, v3.8h, #15
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
@@ -257,10 +298,10 @@ define i32 @udiv_div_by_180(i32 %x)
;
; GISEL-LABEL: udiv_div_by_180:
; GISEL: // %bb.0:
-; GISEL-NEXT: uxtb w8, w0
-; GISEL-NEXT: mov w9, #5826 // =0x16c2
-; GISEL-NEXT: movk w9, #364, lsl #16
-; GISEL-NEXT: umull x8, w8, w9
+; GISEL-NEXT: mov w8, #5826 // =0x16c2
+; GISEL-NEXT: and w9, w0, #0xff
+; GISEL-NEXT: movk w8, #364, lsl #16
+; GISEL-NEXT: umull x8, w9, w8
; GISEL-NEXT: lsr x0, x8, #32
; GISEL-NEXT: // kill: def $w0 killed $w0 killed $x0
; GISEL-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir
index 02233b9f498bd..11203925d1fe0 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir
@@ -228,16 +228,16 @@ body: |
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 -85
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 7
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C1]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 -85
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 7
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C1]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<16 x s8>) = G_UMULH [[COPY]], [[BUILD_VECTOR]]
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<16 x s8>) = G_LSHR [[UMULH]], [[BUILD_VECTOR1]](<16 x s8>)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s1>) = G_BUILD_VECTOR [[C3]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s1>) = G_BUILD_VECTOR [[C2]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1)
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<16 x s8>) = G_SELECT [[BUILD_VECTOR2]](<16 x s1>), [[COPY]], [[LSHR]]
; CHECK-NEXT: $q0 = COPY [[SELECT]](<16 x s8>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
@@ -264,6 +264,7 @@ body: |
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 4957
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
@@ -277,9 +278,9 @@ body: |
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 2048
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 2115
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C4]](s16), [[C5]](s16), [[C7]](s16), [[C9]](s16), [[C10]](s16), [[C11]](s16)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C2]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C2]](s16)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C3]](s16), [[C3]](s16), [[C6]](s16), [[C8]](s16), [[C3]](s16), [[C]](s16), [[C12]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[C1]](s16), [[C4]](s16), [[C5]](s16), [[C7]](s16), [[C9]](s16), [[C10]](s16), [[C11]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[C2]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C2]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[C3]](s16), [[C3]](s16), [[C6]](s16), [[C8]](s16), [[C3]](s16), [[C]](s16), [[C12]](s16)
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[COPY]], [[BUILD_VECTOR]]
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR1]]
``````````
</details>
https://github.com/llvm/llvm-project/pull/91037
More information about the llvm-commits
mailing list