[PATCH] D16195: [AArch64] Multiply extended 32-bit ints with `[U|S]MADDL'.
Chris Diamand via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 14 09:10:45 PST 2016
chrisdiamand_arm created this revision.
chrisdiamand_arm added reviewers: t.p.northover, jmolloy.
chrisdiamand_arm added a subscriber: llvm-commits.
Herald added subscribers: rengolin, aemerson.
During instruction selection, the AArch64 backend can recognise the
following pattern and generate an [U|S]MADDL instruction, i.e. a
multiply of two 32-bit operands with a 64-bit result:
(mul (sext i32), (sext i32))
However, when one of the operands is constant, the sign extension
gets folded into the constant in SelectionDAG::getNode(). This means
that the instruction selection sees this:
(mul (sext i32), i64)
...which doesn't match the pattern. Sign-extension and 64-bit
multiply instructions are generated, which are slower than one 32-bit
multiply.
Add a pattern to match this and generate the correct instruction, for
both signed and unsigned multiplies.
http://reviews.llvm.org/D16195
Files:
lib/Target/AArch64/AArch64InstrInfo.td
test/CodeGen/AArch64/arm64-mul.ll
Index: test/CodeGen/AArch64/arm64-mul.ll
===================================================================
--- test/CodeGen/AArch64/arm64-mul.ll
+++ test/CodeGen/AArch64/arm64-mul.ll
@@ -88,3 +88,34 @@
%tmp4 = sub i64 0, %tmp3
ret i64 %tmp4
}
+
+define i64 @t9(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK-LABEL: t9:
+; CHECK: umull {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+ %tmp1 = zext i32 %a to i64
+ %tmp2 = mul i64 %tmp1, 139968
+ ret i64 %tmp2
+}
+
+; Check 64-bit multiplication is used for constants > 32 bits.
+define i64 @t10(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK-LABEL: t10:
+; CHECK: mul {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+ %tmp1 = sext i32 %a to i64
+ %tmp2 = mul i64 %tmp1, 2147483650 ; = 2^31 + 2
+ ret i64 %tmp2
+}
+
+; Check the sext_inreg case.
+define i64 @t11(i64 %a, i32 %b) nounwind {
+entry:
+; CHECK-LABEL: t11:
+; CHECK: smnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+ %tmp1 = trunc i64 %a to i32
+ %tmp2 = sext i32 %tmp1 to i64
+ %tmp3 = mul i64 %tmp2, -2395238
+ %tmp4 = sub i64 0, %tmp3
+ ret i64 %tmp4
+}
Index: lib/Target/AArch64/AArch64InstrInfo.td
===================================================================
--- lib/Target/AArch64/AArch64InstrInfo.td
+++ lib/Target/AArch64/AArch64InstrInfo.td
@@ -528,6 +528,12 @@
return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
}]>;
+def s64imm_32bit : ImmLeaf<i64, [{
+ int64_t Imm64 = static_cast<int64_t>(Imm);
+ return Imm64 >= std::numeric_limits<int32_t>::min() &&
+ Imm64 <= std::numeric_limits<int32_t>::max();
+}]>;
+
def trunc_imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
}]>;
@@ -730,10 +736,26 @@
def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
(UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
+def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$N))),
+ (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$N)), XZR)>;
+def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$N))),
+ (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$N)), XZR)>;
+def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$N))),
+ (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
+ (MOVi32imm (trunc_imm imm:$N)), XZR)>;
+
def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
(SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
(UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
+
+def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$N)))),
+ (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$N)), XZR)>;
+def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$N)))),
+ (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$N)), XZR)>;
+def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$N)))),
+ (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
+ (MOVi32imm (trunc_imm imm:$N)), XZR)>;
} // AddedComplexity = 5
def : MulAccumWAlias<"mul", MADDWrrr>;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D16195.44895.patch
Type: text/x-patch
Size: 3056 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160114/77c500c7/attachment.bin>
More information about the llvm-commits
mailing list