[llvm] 081b7f6 - [AAch64] Optimize muls with operands having enough sign bits.

via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 5 07:08:57 PST 2022


Author: bipmis
Date: 2022-12-05T15:08:31Z
New Revision: 081b7f6b031310b61c2fc952d9508bccb38162dd

URL: https://github.com/llvm/llvm-project/commit/081b7f6b031310b61c2fc952d9508bccb38162dd
DIFF: https://github.com/llvm/llvm-project/commit/081b7f6b031310b61c2fc952d9508bccb38162dd.diff

LOG: [AAch64] Optimize muls with operands having enough sign bits.

Muls with 64bit operands where each of the operand is having more than 32 sign bits, we can generate a single smull instruction on a 32bit operand.

Differential Revision: https://reviews.llvm.org/D138817

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
    llvm/test/CodeGen/AArch64/aarch64-smull.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 9c0c37410a3e..a6a70e07404a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -795,6 +795,12 @@ def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
   }];
 }
 
+// Match mul with enough sign-bits. Can be reduced to a smaller mul operand.
+def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{
+  return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 &&
+         CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32;
+}]>;
+
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
@@ -1925,6 +1931,26 @@ def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32),
                                     (s64imm_32bit:$C)))),
           (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
                      (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
+
+def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)),
+          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
+def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))),
+          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
+
+def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)),
+          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
+def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)),
+          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
+
+def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
+          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
+def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
+          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
+
+def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
+          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
+def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
+          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
 } // AddedComplexity = 5
 
 def : MulAccumWAlias<"mul", MADDWrrr>;

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
index a10063d9e0c6..270f545a919a 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
@@ -76,3 +76,831 @@ entry:
   %mul = mul nsw i64 %conv, %shr
   ret i64 %mul
 }
+
+define i64 @smull_ldrsb_b(i8* %x0, i8 %x1) {
+; CHECK-LABEL: smull_ldrsb_b:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsb x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtb x9, w1
+; CHECK-NEXT:    smull x0, w8, w9
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i8, i8* %x0
+  %sext = sext i8 %ext64 to i64
+  %sext4 = sext i8 %x1 to i64
+  %mul = mul i64 %sext, %sext4
+  ret i64 %mul
+}
+
+define i64 @smull_ldrsb_b_commuted(i8* %x0, i8 %x1) {
+; CHECK-LABEL: smull_ldrsb_b_commuted:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsb x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtb x9, w1
+; CHECK-NEXT:    smull x0, w9, w8
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i8, i8* %x0
+  %sext = sext i8 %ext64 to i64
+  %sext4 = sext i8 %x1 to i64
+  %mul = mul i64 %sext4, %sext
+  ret i64 %mul
+}
+
+define i64 @smull_ldrsb_h(i8* %x0, i16 %x1) {
+; CHECK-LABEL: smull_ldrsb_h:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsb x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxth x9, w1
+; CHECK-NEXT:    smull x0, w8, w9
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i8, i8* %x0
+  %sext = sext i8 %ext64 to i64
+  %sext4 = sext i16 %x1 to i64
+  %mul = mul i64 %sext, %sext4
+  ret i64 %mul
+}
+
+define i64 @smull_ldrsb_w(i8* %x0, i32 %x1) {
+; CHECK-LABEL: smull_ldrsb_w:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsb x8, [x0]
+; CHECK-NEXT:    smull x0, w8, w1
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i8, i8* %x0
+  %sext = sext i8 %ext64 to i64
+  %sext4 = sext i32 %x1 to i64
+  %mul = mul i64 %sext, %sext4
+  ret i64 %mul
+}
+
+define i64 @smull_ldrsh_b(i16* %x0, i8 %x1) {
+; CHECK-LABEL: smull_ldrsh_b:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsh x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtb x9, w1
+; CHECK-NEXT:    smull x0, w8, w9
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i16, i16* %x0
+  %sext = sext i16 %ext64 to i64
+  %sext4 = sext i8 %x1 to i64
+  %mul = mul i64 %sext, %sext4
+  ret i64 %mul
+}
+
+define i64 @smull_ldrsh_h(i16* %x0, i16 %x1) {
+; CHECK-LABEL: smull_ldrsh_h:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsh x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxth x9, w1
+; CHECK-NEXT:    smull x0, w8, w9
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i16, i16* %x0
+  %sext = sext i16 %ext64 to i64
+  %sext4 = sext i16 %x1 to i64
+  %mul = mul i64 %sext, %sext4
+  ret i64 %mul
+}
+
+define i64 @smull_ldrsh_h_commuted(i16* %x0, i16 %x1) {
+; CHECK-LABEL: smull_ldrsh_h_commuted:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsh x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxth x9, w1
+; CHECK-NEXT:    smull x0, w9, w8
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i16, i16* %x0
+  %sext = sext i16 %ext64 to i64
+  %sext4 = sext i16 %x1 to i64
+  %mul = mul i64 %sext4, %sext
+  ret i64 %mul
+}
+
+define i64 @smull_ldrsh_w(i16* %x0, i32 %x1) {
+; CHECK-LABEL: smull_ldrsh_w:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsh x8, [x0]
+; CHECK-NEXT:    smull x0, w8, w1
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i16, i16* %x0
+  %sext = sext i16 %ext64 to i64
+  %sext4 = sext i32 %x1 to i64
+  %mul = mul i64 %sext, %sext4
+  ret i64 %mul
+}
+
+define i64 @smull_ldrsw_b(i32* %x0, i8 %x1) {
+; CHECK-LABEL: smull_ldrsw_b:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtb x9, w1
+; CHECK-NEXT:    smull x0, w8, w9
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %sext4 = sext i8 %x1 to i64
+  %mul = mul i64 %sext, %sext4
+  ret i64 %mul
+}
+
+define i64 @smull_ldrsw_h(i32* %x0, i16 %x1) {
+; CHECK-LABEL: smull_ldrsw_h:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxth x9, w1
+; CHECK-NEXT:    smull x0, w8, w9
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %sext4 = sext i16 %x1 to i64
+  %mul = mul i64 %sext, %sext4
+  ret i64 %mul
+}
+
+define i64 @smull_ldrsw_w(i32* %x0, i32 %x1) {
+; CHECK-LABEL: smull_ldrsw_w:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    smull x0, w8, w1
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %sext4 = sext i32 %x1 to i64
+  %mul = mul i64 %sext, %sext4
+  ret i64 %mul
+}
+
+define i64 @smull_ldrsw_w_commuted(i32* %x0, i32 %x1) {
+; CHECK-LABEL: smull_ldrsw_w_commuted:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    smull x0, w8, w1
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %sext4 = sext i32 %x1 to i64
+  %mul = mul i64 %sext4, %sext
+  ret i64 %mul
+}
+
+define i64 @smull_sext_bb(i8 %x0, i8 %x1) {
+; CHECK-LABEL: smull_sext_bb:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxtb x8, w0
+; CHECK-NEXT:    sxtb x9, w1
+; CHECK-NEXT:    smull x0, w8, w9
+; CHECK-NEXT:    ret
+entry:
+  %sext = sext i8 %x0 to i64
+  %sext4 = sext i8 %x1 to i64
+  %mul = mul i64 %sext, %sext4
+  ret i64 %mul
+}
+
+define i64 @smull_ldrsw_shift(i32* %x0, i64 %x1) {
+; CHECK-LABEL: smull_ldrsw_shift:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    sxtw x9, w1
+; CHECK-NEXT:    smull x0, w8, w9
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %shl = shl i64 %x1, 32
+  %shr = ashr exact i64 %shl, 32
+  %mul = mul i64 %sext, %shr
+  ret i64 %mul
+}
+
+define i64 @smull_ldrsh_zextw(i16* %x0, i32 %x1) {
+; CHECK-LABEL: smull_ldrsh_zextw:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsh x8, [x0]
+; CHECK-NEXT:    mov w9, w1
+; CHECK-NEXT:    mul x0, x8, x9
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i16, i16* %x0
+  %sext = sext i16 %ext64 to i64
+  %zext = zext i32 %x1 to i64
+  %mul = mul i64 %sext, %zext
+  ret i64 %mul
+}
+
+define i64 @smull_ldrsw_zexth(i32* %x0, i16 %x1) {
+; CHECK-LABEL: smull_ldrsw_zexth:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    and x9, x1, #0xffff
+; CHECK-NEXT:    smull x0, w8, w9
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %zext = zext i16 %x1 to i64
+  %mul = mul i64 %sext, %zext
+  ret i64 %mul
+}
+
+define i64 @smull_ldrsw_zextb(i32* %x0, i8 %x1) {
+; CHECK-LABEL: smull_ldrsw_zextb:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    and x9, x1, #0xff
+; CHECK-NEXT:    smull x0, w8, w9
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %zext = zext i8 %x1 to i64
+  %mul = mul i64 %sext, %zext
+  ret i64 %mul
+}
+
+define i64 @smull_ldrsw_zextb_commuted(i32* %x0, i8 %x1) {
+; CHECK-LABEL: smull_ldrsw_zextb_commuted:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    and x9, x1, #0xff
+; CHECK-NEXT:    smull x0, w9, w8
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %zext = zext i8 %x1 to i64
+  %mul = mul i64 %zext, %sext
+  ret i64 %mul
+}
+
+define i64 @smaddl_ldrsb_h(i8* %x0, i16 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsb_h:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsb x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxth x9, w1
+; CHECK-NEXT:    smaddl x0, w8, w9, x2
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i8, i8* %x0
+  %sext = sext i8 %ext64 to i64
+  %sext4 = sext i16 %x1 to i64
+  %mul = mul i64 %sext, %sext4
+  %add = add i64 %x2, %mul
+  ret i64 %add
+}
+
+define i64 @smaddl_ldrsb_h_commuted(i8* %x0, i16 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsb_h_commuted:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsb x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxth x9, w1
+; CHECK-NEXT:    smaddl x0, w9, w8, x2
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i8, i8* %x0
+  %sext = sext i8 %ext64 to i64
+  %sext4 = sext i16 %x1 to i64
+  %mul = mul i64 %sext4, %sext
+  %add = add i64 %x2, %mul
+  ret i64 %add
+}
+
+define i64 @smaddl_ldrsh_w(i16* %x0, i32 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsh_w:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsh x8, [x0]
+; CHECK-NEXT:    smaddl x0, w8, w1, x2
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i16, i16* %x0
+  %sext = sext i16 %ext64 to i64
+  %sext4 = sext i32 %x1 to i64
+  %mul = mul i64 %sext, %sext4
+  %add = add i64 %x2, %mul
+  ret i64 %add
+}
+
+define i64 @smaddl_ldrsh_w_commuted(i16* %x0, i32 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsh_w_commuted:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsh x8, [x0]
+; CHECK-NEXT:    smaddl x0, w8, w1, x2
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i16, i16* %x0
+  %sext = sext i16 %ext64 to i64
+  %sext4 = sext i32 %x1 to i64
+  %mul = mul i64 %sext4, %sext
+  %add = add i64 %x2, %mul
+  ret i64 %add
+}
+
+define i64 @smaddl_ldrsw_b(i32* %x0, i8 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsw_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtb x9, w1
+; CHECK-NEXT:    smaddl x0, w8, w9, x2
+; CHECK-NEXT:    ret
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %sext2 = sext i8 %x1 to i64
+  %mul = mul i64 %sext, %sext2
+  %add = add i64 %x2, %mul
+  ret i64 %add
+}
+
+define i64 @smaddl_ldrsw_b_commuted(i32* %x0, i8 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsw_b_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtb x9, w1
+; CHECK-NEXT:    smaddl x0, w9, w8, x2
+; CHECK-NEXT:    ret
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %sext2 = sext i8 %x1 to i64
+  %mul = mul i64 %sext2, %sext
+  %add = add i64 %x2, %mul
+  ret i64 %add
+}
+
+define i64 @smaddl_ldrsw_ldrsw(i32* %x0, i32* %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsw_ldrsw:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    ldrsw x9, [x1]
+; CHECK-NEXT:    smaddl x0, w8, w9, x2
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i32, i32* %x0
+  %ext64_2 = load i32, i32* %x1
+  %sext = sext i32 %ext64 to i64
+  %sext2 = sext i32 %ext64_2 to i64
+  %mul = mul i64 %sext, %sext2
+  %add = add i64 %x2, %mul
+  ret i64 %add
+}
+
+define i64 @smaddl_sext_hh(i16 %x0, i16 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_sext_hh:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxth x8, w0
+; CHECK-NEXT:    sxth x9, w1
+; CHECK-NEXT:    smaddl x0, w8, w9, x2
+; CHECK-NEXT:    ret
+entry:
+  %sext = sext i16 %x0 to i64
+  %sext2 = sext i16 %x1 to i64
+  %mul = mul i64 %sext, %sext2
+  %add = add i64 %x2, %mul
+  ret i64 %add
+}
+
+define i64 @smaddl_ldrsw_shift(i32* %x0, i64 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsw_shift:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    sxtw x9, w1
+; CHECK-NEXT:    smaddl x0, w8, w9, x2
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %shl = shl i64 %x1, 32
+  %shr = ashr exact i64 %shl, 32
+  %mul = mul i64 %sext, %shr
+  %add = add i64 %x2, %mul
+  ret i64 %add
+}
+
+define i64 @smaddl_ldrsw_zextb(i32* %x0, i8 %x1, i64 %x2) {
+; CHECK-LABEL: smaddl_ldrsw_zextb:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    and x9, x1, #0xff
+; CHECK-NEXT:    smaddl x0, w8, w9, x2
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %zext = zext i8 %x1 to i64
+  %mul = mul i64 %sext, %zext
+  %add = add i64 %x2, %mul
+  ret i64 %add
+}
+
+define i64 @smnegl_ldrsb_h(i8* %x0, i16 %x1) {
+; CHECK-LABEL: smnegl_ldrsb_h:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsb x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxth x9, w1
+; CHECK-NEXT:    smnegl x0, w8, w9
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i8, i8* %x0
+  %sext = sext i8 %ext64 to i64
+  %sext4 = sext i16 %x1 to i64
+  %mul = mul i64 %sext, %sext4
+  %sub = sub i64 0, %mul
+  ret i64 %sub
+}
+
+define i64 @smnegl_ldrsb_h_commuted(i8* %x0, i16 %x1) {
+; CHECK-LABEL: smnegl_ldrsb_h_commuted:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsb x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxth x9, w1
+; CHECK-NEXT:    smnegl x0, w9, w8
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i8, i8* %x0
+  %sext = sext i8 %ext64 to i64
+  %sext4 = sext i16 %x1 to i64
+  %mul = mul i64 %sext4, %sext
+  %sub = sub i64 0, %mul
+  ret i64 %sub
+}
+
+define i64 @smnegl_ldrsh_w(i16* %x0, i32 %x1) {
+; CHECK-LABEL: smnegl_ldrsh_w:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsh x8, [x0]
+; CHECK-NEXT:    smnegl x0, w8, w1
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i16, i16* %x0
+  %sext = sext i16 %ext64 to i64
+  %sext4 = sext i32 %x1 to i64
+  %mul = mul i64 %sext, %sext4
+  %sub = sub i64 0, %mul
+  ret i64 %sub
+}
+
+define i64 @smnegl_ldrsh_w_commuted(i16* %x0, i32 %x1) {
+; CHECK-LABEL: smnegl_ldrsh_w_commuted:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsh x8, [x0]
+; CHECK-NEXT:    smnegl x0, w8, w1
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i16, i16* %x0
+  %sext = sext i16 %ext64 to i64
+  %sext4 = sext i32 %x1 to i64
+  %mul = mul i64 %sext4, %sext
+  %sub = sub i64 0, %mul
+  ret i64 %sub
+}
+
+define i64 @smnegl_ldrsw_b(i32* %x0, i8 %x1) {
+; CHECK-LABEL: smnegl_ldrsw_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtb x9, w1
+; CHECK-NEXT:    smnegl x0, w8, w9
+; CHECK-NEXT:    ret
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %sext2 = sext i8 %x1 to i64
+  %mul = mul i64 %sext, %sext2
+  %sub = sub i64 0, %mul
+  ret i64 %sub
+}
+
+define i64 @smnegl_ldrsw_b_commuted(i32* %x0, i8 %x1) {
+; CHECK-LABEL: smnegl_ldrsw_b_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtb x9, w1
+; CHECK-NEXT:    smnegl x0, w9, w8
+; CHECK-NEXT:    ret
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %sext2 = sext i8 %x1 to i64
+  %mul = mul i64 %sext2, %sext
+  %sub = sub i64 0, %mul
+  ret i64 %sub
+}
+
+define i64 @smnegl_ldrsw_ldrsw(i32* %x0, i32* %x1) {
+; CHECK-LABEL: smnegl_ldrsw_ldrsw:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    ldrsw x9, [x1]
+; CHECK-NEXT:    smnegl x0, w8, w9
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i32, i32* %x0
+  %ext64_2 = load i32, i32* %x1
+  %sext = sext i32 %ext64 to i64
+  %sext2 = sext i32 %ext64_2 to i64
+  %mul = mul i64 %sext, %sext2
+  %sub = sub i64 0, %mul
+  ret i64 %sub
+}
+
+define i64 @smnegl_sext_hh(i16 %x0, i16 %x1) {
+; CHECK-LABEL: smnegl_sext_hh:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxth x8, w0
+; CHECK-NEXT:    sxth x9, w1
+; CHECK-NEXT:    smnegl x0, w8, w9
+; CHECK-NEXT:    ret
+entry:
+  %sext = sext i16 %x0 to i64
+  %sext2 = sext i16 %x1 to i64
+  %mul = mul i64 %sext, %sext2
+  %sub = sub i64 0, %mul
+  ret i64 %sub
+}
+
+define i64 @smnegl_ldrsw_shift(i32* %x0, i64 %x1) {
+; CHECK-LABEL: smnegl_ldrsw_shift:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    sxtw x9, w1
+; CHECK-NEXT:    smnegl x0, w8, w9
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %shl = shl i64 %x1, 32
+  %shr = ashr exact i64 %shl, 32
+  %mul = mul i64 %sext, %shr
+  %sub = sub i64 0, %mul
+  ret i64 %sub
+}
+
+define i64 @smnegl_ldrsw_zextb(i32* %x0, i8 %x1) {
+; CHECK-LABEL: smnegl_ldrsw_zextb:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    and x9, x1, #0xff
+; CHECK-NEXT:    smnegl x0, w8, w9
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %zext = zext i8 %x1 to i64
+  %mul = mul i64 %sext, %zext
+  %sub = sub i64 0, %mul
+  ret i64 %sub
+}
+
+define i64 @smsubl_ldrsb_h(i8* %x0, i16 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsb_h:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsb x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxth x9, w1
+; CHECK-NEXT:    smsubl x0, w8, w9, x2
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i8, i8* %x0
+  %sext = sext i8 %ext64 to i64
+  %sext4 = sext i16 %x1 to i64
+  %mul = mul i64 %sext, %sext4
+  %sub = sub i64 %x2, %mul
+  ret i64 %sub
+}
+
+define i64 @smsubl_ldrsb_h_commuted(i8* %x0, i16 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsb_h_commuted:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsb x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxth x9, w1
+; CHECK-NEXT:    smsubl x0, w9, w8, x2
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i8, i8* %x0
+  %sext = sext i8 %ext64 to i64
+  %sext4 = sext i16 %x1 to i64
+  %mul = mul i64 %sext4, %sext
+  %sub = sub i64 %x2, %mul
+  ret i64 %sub
+}
+
+define i64 @smsubl_ldrsh_w(i16* %x0, i32 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsh_w:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsh x8, [x0]
+; CHECK-NEXT:    smsubl x0, w8, w1, x2
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i16, i16* %x0
+  %sext = sext i16 %ext64 to i64
+  %sext4 = sext i32 %x1 to i64
+  %mul = mul i64 %sext, %sext4
+  %sub = sub i64 %x2, %mul
+  ret i64 %sub
+}
+
+define i64 @smsubl_ldrsh_w_commuted(i16* %x0, i32 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsh_w_commuted:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsh x8, [x0]
+; CHECK-NEXT:    smsubl x0, w8, w1, x2
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i16, i16* %x0
+  %sext = sext i16 %ext64 to i64
+  %sext4 = sext i32 %x1 to i64
+  %mul = mul i64 %sext4, %sext
+  %sub = sub i64 %x2, %mul
+  ret i64 %sub
+}
+
+define i64 @smsubl_ldrsw_b(i32* %x0, i8 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsw_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtb x9, w1
+; CHECK-NEXT:    smsubl x0, w8, w9, x2
+; CHECK-NEXT:    ret
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %sext2 = sext i8 %x1 to i64
+  %mul = mul i64 %sext, %sext2
+  %sub = sub i64 %x2, %mul
+  ret i64 %sub
+}
+
+define i64 @smsubl_ldrsw_b_commuted(i32* %x0, i8 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsw_b_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtb x9, w1
+; CHECK-NEXT:    smsubl x0, w9, w8, x2
+; CHECK-NEXT:    ret
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %sext2 = sext i8 %x1 to i64
+  %mul = mul i64 %sext2, %sext
+  %sub = sub i64 %x2, %mul
+  ret i64 %sub
+}
+
+define i64 @smsubl_ldrsw_ldrsw(i32* %x0, i32* %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsw_ldrsw:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    ldrsw x9, [x1]
+; CHECK-NEXT:    smsubl x0, w8, w9, x2
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i32, i32* %x0
+  %ext64_2 = load i32, i32* %x1
+  %sext = sext i32 %ext64 to i64
+  %sext2 = sext i32 %ext64_2 to i64
+  %mul = mul i64 %sext, %sext2
+  %sub = sub i64 %x2, %mul
+  ret i64 %sub
+}
+
+define i64 @smsubl_sext_hh(i16 %x0, i16 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_sext_hh:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxth x8, w0
+; CHECK-NEXT:    sxth x9, w1
+; CHECK-NEXT:    smsubl x0, w8, w9, x2
+; CHECK-NEXT:    ret
+entry:
+  %sext = sext i16 %x0 to i64
+  %sext2 = sext i16 %x1 to i64
+  %mul = mul i64 %sext, %sext2
+  %sub = sub i64 %x2, %mul
+  ret i64 %sub
+}
+
+define i64 @smsubl_ldrsw_shift(i32* %x0, i64 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsw_shift:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    sxtw x9, w1
+; CHECK-NEXT:    smsubl x0, w8, w9, x2
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %shl = shl i64 %x1, 32
+  %shr = ashr exact i64 %shl, 32
+  %mul = mul i64 %sext, %shr
+  %sub = sub i64 %x2, %mul
+  ret i64 %sub
+}
+
+define i64 @smsubl_ldrsw_zextb(i32* %x0, i8 %x1, i64 %x2) {
+; CHECK-LABEL: smsubl_ldrsw_zextb:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrsw x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    and x9, x1, #0xff
+; CHECK-NEXT:    smsubl x0, w8, w9, x2
+; CHECK-NEXT:    ret
+entry:
+  %ext64 = load i32, i32* %x0
+  %sext = sext i32 %ext64 to i64
+  %zext = zext i8 %x1 to i64
+  %mul = mul i64 %sext, %zext
+  %sub = sub i64 %x2, %mul
+  ret i64 %sub
+}
+
+define i64 @smull_sext_ashr31(i32 %a, i64 %b) nounwind {
+; CHECK-LABEL: smull_sext_ashr31:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    asr x9, x1, #31
+; CHECK-NEXT:    mul x0, x8, x9
+; CHECK-NEXT:    ret
+entry:
+  %tmp1 = sext i32 %a to i64
+  %c = ashr i64 %b, 31
+  %tmp3 = mul i64 %tmp1, %c
+  ret i64 %tmp3
+}
+
+define i64 @smull_sext_ashr32(i32 %a, i64 %b) nounwind {
+; CHECK-LABEL: smull_sext_ashr32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    asr x8, x1, #32
+; CHECK-NEXT:    smull x0, w8, w0
+; CHECK-NEXT:    ret
+entry:
+  %tmp1 = sext i32 %a to i64
+  %c = ashr i64 %b, 32
+  %tmp3 = mul i64 %tmp1, %c
+  ret i64 %tmp3
+}
+
+
+define i64 @smull_ashr31_both(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: smull_ashr31_both:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    asr x8, x0, #31
+; CHECK-NEXT:    asr x9, x1, #31
+; CHECK-NEXT:    mul x0, x8, x9
+; CHECK-NEXT:    ret
+entry:
+  %tmp1 = ashr i64 %a, 31
+  %c = ashr i64 %b, 31
+  %tmp3 = mul i64 %tmp1, %c
+  ret i64 %tmp3
+}
+
+define i64 @smull_ashr32_both(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: smull_ashr32_both:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    asr x8, x0, #32
+; CHECK-NEXT:    asr x9, x1, #32
+; CHECK-NEXT:    smull x0, w8, w9
+; CHECK-NEXT:    ret
+entry:
+  %tmp1 = ashr i64 %a, 32
+  %c = ashr i64 %b, 32
+  %tmp3 = mul i64 %tmp1, %c
+  ret i64 %tmp3
+}

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 7f058dd7e56d..bdc156f8b05d 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -137,8 +137,8 @@ define <2 x i64> @smull_zext_v2i32_v2i64(<2 x i16>* %A, <2 x i32>* %B) nounwind
 ; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    mov x10, v0.d[1]
-; CHECK-NEXT:    mul x8, x8, x9
-; CHECK-NEXT:    mul x9, x11, x10
+; CHECK-NEXT:    smull x8, w8, w9
+; CHECK-NEXT:    smull x9, w11, w10
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    mov v0.d[1], x9
 ; CHECK-NEXT:    ret
@@ -162,8 +162,8 @@ define <2 x i64> @smull_zext_and_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounw
 ; CHECK-NEXT:    fmov x10, d0
 ; CHECK-NEXT:    mov x8, v1.d[1]
 ; CHECK-NEXT:    mov x11, v0.d[1]
-; CHECK-NEXT:    mul x9, x10, x9
-; CHECK-NEXT:    mul x8, x11, x8
+; CHECK-NEXT:    smull x9, w10, w9
+; CHECK-NEXT:    smull x8, w11, w8
 ; CHECK-NEXT:    fmov d0, x9
 ; CHECK-NEXT:    mov v0.d[1], x8
 ; CHECK-NEXT:    ret


        


More information about the llvm-commits mailing list