[llvm] 081b7f6 - [AAch64] Optimize muls with operands having enough sign bits.
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 5 07:08:57 PST 2022
Author: bipmis
Date: 2022-12-05T15:08:31Z
New Revision: 081b7f6b031310b61c2fc952d9508bccb38162dd
URL: https://github.com/llvm/llvm-project/commit/081b7f6b031310b61c2fc952d9508bccb38162dd
DIFF: https://github.com/llvm/llvm-project/commit/081b7f6b031310b61c2fc952d9508bccb38162dd.diff
LOG: [AAch64] Optimize muls with operands having enough sign bits.
Muls with 64bit operands where each of the operand is having more than 32 sign bits, we can generate a single smull instruction on a 32bit operand.
Differential Revision: https://reviews.llvm.org/D138817
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
llvm/test/CodeGen/AArch64/aarch64-smull.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 9c0c37410a3e..a6a70e07404a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -795,6 +795,12 @@ def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
}];
}
+// Match mul with enough sign-bits. Can be reduced to a smaller mul operand.
+def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{
+ return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 &&
+ CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32;
+}]>;
+
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -1925,6 +1931,26 @@ def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32),
(s64imm_32bit:$C)))),
(SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
(MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
+
+def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)),
+ (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
+def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))),
+ (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
+
+def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)),
+ (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
+def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)),
+ (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
+
+def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
+ (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
+def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
+ (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
+
+def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
+ (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
+def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
+ (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
} // AddedComplexity = 5
def : MulAccumWAlias<"mul", MADDWrrr>;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
index a10063d9e0c6..270f545a919a 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
@@ -76,3 +76,831 @@ entry:
%mul = mul nsw i64 %conv, %shr
ret i64 %mul
}
+
+define i64 @smull_ldrsb_b(i8* %x0, i8 %x1) {
+; CHECK-LABEL: smull_ldrsb_b:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsb x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtb x9, w1
+; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i8, i8* %x0
+ %sext = sext i8 %ext64 to i64
+ %sext4 = sext i8 %x1 to i64
+ %mul = mul i64 %sext, %sext4
+ ret i64 %mul
+}
+
+define i64 @smull_ldrsb_b_commuted(i8* %x0, i8 %x1) {
+; CHECK-LABEL: smull_ldrsb_b_commuted:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsb x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtb x9, w1
+; CHECK-NEXT: smull x0, w9, w8
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i8, i8* %x0
+ %sext = sext i8 %ext64 to i64
+ %sext4 = sext i8 %x1 to i64
+ %mul = mul i64 %sext4, %sext
+ ret i64 %mul
+}
+
+define i64 @smull_ldrsb_h(i8* %x0, i16 %x1) {
+; CHECK-LABEL: smull_ldrsb_h:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsb x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxth x9, w1
+; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i8, i8* %x0
+ %sext = sext i8 %ext64 to i64
+ %sext4 = sext i16 %x1 to i64
+ %mul = mul i64 %sext, %sext4
+ ret i64 %mul
+}
+
+define i64 @smull_ldrsb_w(i8* %x0, i32 %x1) {
+; CHECK-LABEL: smull_ldrsb_w:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsb x8, [x0]
+; CHECK-NEXT: smull x0, w8, w1
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i8, i8* %x0
+ %sext = sext i8 %ext64 to i64
+ %sext4 = sext i32 %x1 to i64
+ %mul = mul i64 %sext, %sext4
+ ret i64 %mul
+}
+
+define i64 @smull_ldrsh_b(i16* %x0, i8 %x1) {
+; CHECK-LABEL: smull_ldrsh_b:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsh x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtb x9, w1
+; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i16, i16* %x0
+ %sext = sext i16 %ext64 to i64
+ %sext4 = sext i8 %x1 to i64
+ %mul = mul i64 %sext, %sext4
+ ret i64 %mul
+}
+
+define i64 @smull_ldrsh_h(i16* %x0, i16 %x1) {
+; CHECK-LABEL: smull_ldrsh_h:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsh x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxth x9, w1
+; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i16, i16* %x0
+ %sext = sext i16 %ext64 to i64
+ %sext4 = sext i16 %x1 to i64
+ %mul = mul i64 %sext, %sext4
+ ret i64 %mul
+}
+
+define i64 @smull_ldrsh_h_commuted(i16* %x0, i16 %x1) {
+; CHECK-LABEL: smull_ldrsh_h_commuted:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsh x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxth x9, w1
+; CHECK-NEXT: smull x0, w9, w8
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i16, i16* %x0
+ %sext = sext i16 %ext64 to i64
+ %sext4 = sext i16 %x1 to i64
+ %mul = mul i64 %sext4, %sext
+ ret i64 %mul
+}
+
+define i64 @smull_ldrsh_w(i16* %x0, i32 %x1) {
+; CHECK-LABEL: smull_ldrsh_w:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsh x8, [x0]
+; CHECK-NEXT: smull x0, w8, w1
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i16, i16* %x0
+ %sext = sext i16 %ext64 to i64
+ %sext4 = sext i32 %x1 to i64
+ %mul = mul i64 %sext, %sext4
+ ret i64 %mul
+}
+
+define i64 @smull_ldrsw_b(i32* %x0, i8 %x1) {
+; CHECK-LABEL: smull_ldrsw_b:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtb x9, w1
+; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %sext4 = sext i8 %x1 to i64
+ %mul = mul i64 %sext, %sext4
+ ret i64 %mul
+}
+
+define i64 @smull_ldrsw_h(i32* %x0, i16 %x1) {
+; CHECK-LABEL: smull_ldrsw_h:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxth x9, w1
+; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %sext4 = sext i16 %x1 to i64
+ %mul = mul i64 %sext, %sext4
+ ret i64 %mul
+}
+
+define i64 @smull_ldrsw_w(i32* %x0, i32 %x1) {
+; CHECK-LABEL: smull_ldrsw_w:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: smull x0, w8, w1
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %sext4 = sext i32 %x1 to i64
+ %mul = mul i64 %sext, %sext4
+ ret i64 %mul
+}
+
+define i64 @smull_ldrsw_w_commuted(i32* %x0, i32 %x1) {
+; CHECK-LABEL: smull_ldrsw_w_commuted:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: smull x0, w8, w1
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %sext4 = sext i32 %x1 to i64
+ %mul = mul i64 %sext4, %sext
+ ret i64 %mul
+}
+
+define i64 @smull_sext_bb(i8 %x0, i8 %x1) {
+; CHECK-LABEL: smull_sext_bb:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sxtb x8, w0
+; CHECK-NEXT: sxtb x9, w1
+; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: ret
+entry:
+ %sext = sext i8 %x0 to i64
+ %sext4 = sext i8 %x1 to i64
+ %mul = mul i64 %sext, %sext4
+ ret i64 %mul
+}
+
+define i64 @smull_ldrsw_shift(i32* %x0, i64 %x1) {
+; CHECK-LABEL: smull_ldrsw_shift:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: sxtw x9, w1
+; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %shl = shl i64 %x1, 32
+ %shr = ashr exact i64 %shl, 32
+ %mul = mul i64 %sext, %shr
+ ret i64 %mul
+}
+
+define i64 @smull_ldrsh_zextw(i16* %x0, i32 %x1) {
+; CHECK-LABEL: smull_ldrsh_zextw:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsh x8, [x0]
+; CHECK-NEXT: mov w9, w1
+; CHECK-NEXT: mul x0, x8, x9
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i16, i16* %x0
+ %sext = sext i16 %ext64 to i64
+ %zext = zext i32 %x1 to i64
+ %mul = mul i64 %sext, %zext
+ ret i64 %mul
+}
+
+define i64 @smull_ldrsw_zexth(i32* %x0, i16 %x1) {
+; CHECK-LABEL: smull_ldrsw_zexth:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: and x9, x1, #0xffff
+; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %zext = zext i16 %x1 to i64
+ %mul = mul i64 %sext, %zext
+ ret i64 %mul
+}
+
+define i64 @smull_ldrsw_zextb(i32* %x0, i8 %x1) {
+; CHECK-LABEL: smull_ldrsw_zextb:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: and x9, x1, #0xff
+; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %zext = zext i8 %x1 to i64
+ %mul = mul i64 %sext, %zext
+ ret i64 %mul
+}
+
+define i64 @smull_ldrsw_zextb_commuted(i32* %x0, i8 %x1) {
+; CHECK-LABEL: smull_ldrsw_zextb_commuted:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: and x9, x1, #0xff
+; CHECK-NEXT: smull x0, w9, w8
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %zext = zext i8 %x1 to i64
+ %mul = mul i64 %zext, %sext
+ ret i64 %mul
+}
+
+define i64 @smaddl_ldrsb_h(i8* %x0, i16 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsb_h:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsb x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxth x9, w1
+; CHECK-NEXT: smaddl x0, w8, w9, x2
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i8, i8* %x0
+ %sext = sext i8 %ext64 to i64
+ %sext4 = sext i16 %x1 to i64
+ %mul = mul i64 %sext, %sext4
+ %add = add i64 %x2, %mul
+ ret i64 %add
+}
+
+define i64 @smaddl_ldrsb_h_commuted(i8* %x0, i16 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsb_h_commuted:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsb x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxth x9, w1
+; CHECK-NEXT: smaddl x0, w9, w8, x2
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i8, i8* %x0
+ %sext = sext i8 %ext64 to i64
+ %sext4 = sext i16 %x1 to i64
+ %mul = mul i64 %sext4, %sext
+ %add = add i64 %x2, %mul
+ ret i64 %add
+}
+
+define i64 @smaddl_ldrsh_w(i16* %x0, i32 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsh_w:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsh x8, [x0]
+; CHECK-NEXT: smaddl x0, w8, w1, x2
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i16, i16* %x0
+ %sext = sext i16 %ext64 to i64
+ %sext4 = sext i32 %x1 to i64
+ %mul = mul i64 %sext, %sext4
+ %add = add i64 %x2, %mul
+ ret i64 %add
+}
+
+define i64 @smaddl_ldrsh_w_commuted(i16* %x0, i32 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsh_w_commuted:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsh x8, [x0]
+; CHECK-NEXT: smaddl x0, w8, w1, x2
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i16, i16* %x0
+ %sext = sext i16 %ext64 to i64
+ %sext4 = sext i32 %x1 to i64
+ %mul = mul i64 %sext4, %sext
+ %add = add i64 %x2, %mul
+ ret i64 %add
+}
+
+define i64 @smaddl_ldrsw_b(i32* %x0, i8 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsw_b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtb x9, w1
+; CHECK-NEXT: smaddl x0, w8, w9, x2
+; CHECK-NEXT: ret
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %sext2 = sext i8 %x1 to i64
+ %mul = mul i64 %sext, %sext2
+ %add = add i64 %x2, %mul
+ ret i64 %add
+}
+
+define i64 @smaddl_ldrsw_b_commuted(i32* %x0, i8 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsw_b_commuted:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtb x9, w1
+; CHECK-NEXT: smaddl x0, w9, w8, x2
+; CHECK-NEXT: ret
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %sext2 = sext i8 %x1 to i64
+ %mul = mul i64 %sext2, %sext
+ %add = add i64 %x2, %mul
+ ret i64 %add
+}
+
+define i64 @smaddl_ldrsw_ldrsw(i32* %x0, i32* %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsw_ldrsw:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: ldrsw x9, [x1]
+; CHECK-NEXT: smaddl x0, w8, w9, x2
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i32, i32* %x0
+ %ext64_2 = load i32, i32* %x1
+ %sext = sext i32 %ext64 to i64
+ %sext2 = sext i32 %ext64_2 to i64
+ %mul = mul i64 %sext, %sext2
+ %add = add i64 %x2, %mul
+ ret i64 %add
+}
+
+define i64 @smaddl_sext_hh(i16 %x0, i16 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_sext_hh:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sxth x8, w0
+; CHECK-NEXT: sxth x9, w1
+; CHECK-NEXT: smaddl x0, w8, w9, x2
+; CHECK-NEXT: ret
+entry:
+ %sext = sext i16 %x0 to i64
+ %sext2 = sext i16 %x1 to i64
+ %mul = mul i64 %sext, %sext2
+ %add = add i64 %x2, %mul
+ ret i64 %add
+}
+
+define i64 @smaddl_ldrsw_shift(i32* %x0, i64 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsw_shift:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: sxtw x9, w1
+; CHECK-NEXT: smaddl x0, w8, w9, x2
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %shl = shl i64 %x1, 32
+ %shr = ashr exact i64 %shl, 32
+ %mul = mul i64 %sext, %shr
+ %add = add i64 %x2, %mul
+ ret i64 %add
+}
+
+define i64 @smaddl_ldrsw_zextb(i32* %x0, i8 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsw_zextb:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: and x9, x1, #0xff
+; CHECK-NEXT: smaddl x0, w8, w9, x2
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %zext = zext i8 %x1 to i64
+ %mul = mul i64 %sext, %zext
+ %add = add i64 %x2, %mul
+ ret i64 %add
+}
+
+define i64 @smnegl_ldrsb_h(i8* %x0, i16 %x1) {
+; CHECK-LABEL: smnegl_ldrsb_h:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsb x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxth x9, w1
+; CHECK-NEXT: smnegl x0, w8, w9
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i8, i8* %x0
+ %sext = sext i8 %ext64 to i64
+ %sext4 = sext i16 %x1 to i64
+ %mul = mul i64 %sext, %sext4
+ %sub = sub i64 0, %mul
+ ret i64 %sub
+}
+
+define i64 @smnegl_ldrsb_h_commuted(i8* %x0, i16 %x1) {
+; CHECK-LABEL: smnegl_ldrsb_h_commuted:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsb x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxth x9, w1
+; CHECK-NEXT: smnegl x0, w9, w8
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i8, i8* %x0
+ %sext = sext i8 %ext64 to i64
+ %sext4 = sext i16 %x1 to i64
+ %mul = mul i64 %sext4, %sext
+ %sub = sub i64 0, %mul
+ ret i64 %sub
+}
+
+define i64 @smnegl_ldrsh_w(i16* %x0, i32 %x1) {
+; CHECK-LABEL: smnegl_ldrsh_w:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsh x8, [x0]
+; CHECK-NEXT: smnegl x0, w8, w1
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i16, i16* %x0
+ %sext = sext i16 %ext64 to i64
+ %sext4 = sext i32 %x1 to i64
+ %mul = mul i64 %sext, %sext4
+ %sub = sub i64 0, %mul
+ ret i64 %sub
+}
+
+define i64 @smnegl_ldrsh_w_commuted(i16* %x0, i32 %x1) {
+; CHECK-LABEL: smnegl_ldrsh_w_commuted:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsh x8, [x0]
+; CHECK-NEXT: smnegl x0, w8, w1
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i16, i16* %x0
+ %sext = sext i16 %ext64 to i64
+ %sext4 = sext i32 %x1 to i64
+ %mul = mul i64 %sext4, %sext
+ %sub = sub i64 0, %mul
+ ret i64 %sub
+}
+
+define i64 @smnegl_ldrsw_b(i32* %x0, i8 %x1) {
+; CHECK-LABEL: smnegl_ldrsw_b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtb x9, w1
+; CHECK-NEXT: smnegl x0, w8, w9
+; CHECK-NEXT: ret
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %sext2 = sext i8 %x1 to i64
+ %mul = mul i64 %sext, %sext2
+ %sub = sub i64 0, %mul
+ ret i64 %sub
+}
+
+define i64 @smnegl_ldrsw_b_commuted(i32* %x0, i8 %x1) {
+; CHECK-LABEL: smnegl_ldrsw_b_commuted:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtb x9, w1
+; CHECK-NEXT: smnegl x0, w9, w8
+; CHECK-NEXT: ret
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %sext2 = sext i8 %x1 to i64
+ %mul = mul i64 %sext2, %sext
+ %sub = sub i64 0, %mul
+ ret i64 %sub
+}
+
+define i64 @smnegl_ldrsw_ldrsw(i32* %x0, i32* %x1) {
+; CHECK-LABEL: smnegl_ldrsw_ldrsw:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: ldrsw x9, [x1]
+; CHECK-NEXT: smnegl x0, w8, w9
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i32, i32* %x0
+ %ext64_2 = load i32, i32* %x1
+ %sext = sext i32 %ext64 to i64
+ %sext2 = sext i32 %ext64_2 to i64
+ %mul = mul i64 %sext, %sext2
+ %sub = sub i64 0, %mul
+ ret i64 %sub
+}
+
+define i64 @smnegl_sext_hh(i16 %x0, i16 %x1) {
+; CHECK-LABEL: smnegl_sext_hh:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sxth x8, w0
+; CHECK-NEXT: sxth x9, w1
+; CHECK-NEXT: smnegl x0, w8, w9
+; CHECK-NEXT: ret
+entry:
+ %sext = sext i16 %x0 to i64
+ %sext2 = sext i16 %x1 to i64
+ %mul = mul i64 %sext, %sext2
+ %sub = sub i64 0, %mul
+ ret i64 %sub
+}
+
+define i64 @smnegl_ldrsw_shift(i32* %x0, i64 %x1) {
+; CHECK-LABEL: smnegl_ldrsw_shift:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: sxtw x9, w1
+; CHECK-NEXT: smnegl x0, w8, w9
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %shl = shl i64 %x1, 32
+ %shr = ashr exact i64 %shl, 32
+ %mul = mul i64 %sext, %shr
+ %sub = sub i64 0, %mul
+ ret i64 %sub
+}
+
+define i64 @smnegl_ldrsw_zextb(i32* %x0, i8 %x1) {
+; CHECK-LABEL: smnegl_ldrsw_zextb:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: and x9, x1, #0xff
+; CHECK-NEXT: smnegl x0, w8, w9
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %zext = zext i8 %x1 to i64
+ %mul = mul i64 %sext, %zext
+ %sub = sub i64 0, %mul
+ ret i64 %sub
+}
+
+define i64 @smsubl_ldrsb_h(i8* %x0, i16 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsb_h:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsb x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxth x9, w1
+; CHECK-NEXT: smsubl x0, w8, w9, x2
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i8, i8* %x0
+ %sext = sext i8 %ext64 to i64
+ %sext4 = sext i16 %x1 to i64
+ %mul = mul i64 %sext, %sext4
+ %sub = sub i64 %x2, %mul
+ ret i64 %sub
+}
+
+define i64 @smsubl_ldrsb_h_commuted(i8* %x0, i16 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsb_h_commuted:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsb x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxth x9, w1
+; CHECK-NEXT: smsubl x0, w9, w8, x2
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i8, i8* %x0
+ %sext = sext i8 %ext64 to i64
+ %sext4 = sext i16 %x1 to i64
+ %mul = mul i64 %sext4, %sext
+ %sub = sub i64 %x2, %mul
+ ret i64 %sub
+}
+
+define i64 @smsubl_ldrsh_w(i16* %x0, i32 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsh_w:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsh x8, [x0]
+; CHECK-NEXT: smsubl x0, w8, w1, x2
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i16, i16* %x0
+ %sext = sext i16 %ext64 to i64
+ %sext4 = sext i32 %x1 to i64
+ %mul = mul i64 %sext, %sext4
+ %sub = sub i64 %x2, %mul
+ ret i64 %sub
+}
+
+define i64 @smsubl_ldrsh_w_commuted(i16* %x0, i32 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsh_w_commuted:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsh x8, [x0]
+; CHECK-NEXT: smsubl x0, w8, w1, x2
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i16, i16* %x0
+ %sext = sext i16 %ext64 to i64
+ %sext4 = sext i32 %x1 to i64
+ %mul = mul i64 %sext4, %sext
+ %sub = sub i64 %x2, %mul
+ ret i64 %sub
+}
+
+define i64 @smsubl_ldrsw_b(i32* %x0, i8 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsw_b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtb x9, w1
+; CHECK-NEXT: smsubl x0, w8, w9, x2
+; CHECK-NEXT: ret
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %sext2 = sext i8 %x1 to i64
+ %mul = mul i64 %sext, %sext2
+ %sub = sub i64 %x2, %mul
+ ret i64 %sub
+}
+
+define i64 @smsubl_ldrsw_b_commuted(i32* %x0, i8 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsw_b_commuted:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtb x9, w1
+; CHECK-NEXT: smsubl x0, w9, w8, x2
+; CHECK-NEXT: ret
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %sext2 = sext i8 %x1 to i64
+ %mul = mul i64 %sext2, %sext
+ %sub = sub i64 %x2, %mul
+ ret i64 %sub
+}
+
+define i64 @smsubl_ldrsw_ldrsw(i32* %x0, i32* %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsw_ldrsw:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: ldrsw x9, [x1]
+; CHECK-NEXT: smsubl x0, w8, w9, x2
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i32, i32* %x0
+ %ext64_2 = load i32, i32* %x1
+ %sext = sext i32 %ext64 to i64
+ %sext2 = sext i32 %ext64_2 to i64
+ %mul = mul i64 %sext, %sext2
+ %sub = sub i64 %x2, %mul
+ ret i64 %sub
+}
+
+define i64 @smsubl_sext_hh(i16 %x0, i16 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_sext_hh:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sxth x8, w0
+; CHECK-NEXT: sxth x9, w1
+; CHECK-NEXT: smsubl x0, w8, w9, x2
+; CHECK-NEXT: ret
+entry:
+ %sext = sext i16 %x0 to i64
+ %sext2 = sext i16 %x1 to i64
+ %mul = mul i64 %sext, %sext2
+ %sub = sub i64 %x2, %mul
+ ret i64 %sub
+}
+
+define i64 @smsubl_ldrsw_shift(i32* %x0, i64 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsw_shift:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: sxtw x9, w1
+; CHECK-NEXT: smsubl x0, w8, w9, x2
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %shl = shl i64 %x1, 32
+ %shr = ashr exact i64 %shl, 32
+ %mul = mul i64 %sext, %shr
+ %sub = sub i64 %x2, %mul
+ ret i64 %sub
+}
+
+define i64 @smsubl_ldrsw_zextb(i32* %x0, i8 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsw_zextb:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: and x9, x1, #0xff
+; CHECK-NEXT: smsubl x0, w8, w9, x2
+; CHECK-NEXT: ret
+entry:
+ %ext64 = load i32, i32* %x0
+ %sext = sext i32 %ext64 to i64
+ %zext = zext i8 %x1 to i64
+ %mul = mul i64 %sext, %zext
+ %sub = sub i64 %x2, %mul
+ ret i64 %sub
+}
+
+define i64 @smull_sext_ashr31(i32 %a, i64 %b) nounwind {
+; CHECK-LABEL: smull_sext_ashr31:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: asr x9, x1, #31
+; CHECK-NEXT: mul x0, x8, x9
+; CHECK-NEXT: ret
+entry:
+ %tmp1 = sext i32 %a to i64
+ %c = ashr i64 %b, 31
+ %tmp3 = mul i64 %tmp1, %c
+ ret i64 %tmp3
+}
+
+define i64 @smull_sext_ashr32(i32 %a, i64 %b) nounwind {
+; CHECK-LABEL: smull_sext_ashr32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: asr x8, x1, #32
+; CHECK-NEXT: smull x0, w8, w0
+; CHECK-NEXT: ret
+entry:
+ %tmp1 = sext i32 %a to i64
+ %c = ashr i64 %b, 32
+ %tmp3 = mul i64 %tmp1, %c
+ ret i64 %tmp3
+}
+
+
+define i64 @smull_ashr31_both(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: smull_ashr31_both:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: asr x8, x0, #31
+; CHECK-NEXT: asr x9, x1, #31
+; CHECK-NEXT: mul x0, x8, x9
+; CHECK-NEXT: ret
+entry:
+ %tmp1 = ashr i64 %a, 31
+ %c = ashr i64 %b, 31
+ %tmp3 = mul i64 %tmp1, %c
+ ret i64 %tmp3
+}
+
+define i64 @smull_ashr32_both(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: smull_ashr32_both:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: asr x8, x0, #32
+; CHECK-NEXT: asr x9, x1, #32
+; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: ret
+entry:
+ %tmp1 = ashr i64 %a, 32
+ %c = ashr i64 %b, 32
+ %tmp3 = mul i64 %tmp1, %c
+ ret i64 %tmp3
+}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 7f058dd7e56d..bdc156f8b05d 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -137,8 +137,8 @@ define <2 x i64> @smull_zext_v2i32_v2i64(<2 x i16>* %A, <2 x i32>* %B) nounwind
; CHECK-NEXT: sshll v0.2d, v0.2s, #0
; CHECK-NEXT: fmov x9, d0
; CHECK-NEXT: mov x10, v0.d[1]
-; CHECK-NEXT: mul x8, x8, x9
-; CHECK-NEXT: mul x9, x11, x10
+; CHECK-NEXT: smull x8, w8, w9
+; CHECK-NEXT: smull x9, w11, w10
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x9
; CHECK-NEXT: ret
@@ -162,8 +162,8 @@ define <2 x i64> @smull_zext_and_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounw
; CHECK-NEXT: fmov x10, d0
; CHECK-NEXT: mov x8, v1.d[1]
; CHECK-NEXT: mov x11, v0.d[1]
-; CHECK-NEXT: mul x9, x10, x9
-; CHECK-NEXT: mul x8, x11, x8
+; CHECK-NEXT: smull x9, w10, w9
+; CHECK-NEXT: smull x8, w11, w8
; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: mov v0.d[1], x8
; CHECK-NEXT: ret
More information about the llvm-commits
mailing list