[llvm] 454997d - [AAch64] Optimize muls with operands having enough zero bits.

via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 20 06:34:57 PST 2022


Author: bipmis
Date: 2022-12-20T14:34:17Z
New Revision: 454997d3965936ebc813d4bea5e3a639fb0e7cb4

URL: https://github.com/llvm/llvm-project/commit/454997d3965936ebc813d4bea5e3a639fb0e7cb4
DIFF: https://github.com/llvm/llvm-project/commit/454997d3965936ebc813d4bea5e3a639fb0e7cb4.diff

LOG: [AAch64] Optimize muls with operands having enough zero bits.

Muls with 64bit operands where each of the operand is having top 32 bits as zero, we can generate a single umull instruction on a 32bit operand.

Differential Revision: https://reviews.llvm.org/D139411

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
    llvm/test/CodeGen/AArch64/addcarry-crash.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 6bfeae06e3f1..01f21e38e839 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1954,6 +1954,26 @@ def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
           (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
 def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
           (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
+
+def : Pat<(i64 (mul top32Zero:$Rn, top32Zero:$Rm)),
+          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
+def : Pat<(i64 (mul top32Zero:$Rn, (zext GPR32:$Rm))),
+          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
+
+def : Pat<(i64 (add (mul top32Zero:$Rn, top32Zero:$Rm), GPR64:$Ra)),
+          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
+def : Pat<(i64 (add (mul top32Zero:$Rn, (zext GPR32:$Rm)), GPR64:$Ra)),
+          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
+
+def : Pat<(i64 (ineg (mul top32Zero:$Rn, top32Zero:$Rm))),
+          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
+def : Pat<(i64 (ineg (mul top32Zero:$Rn, (zext GPR32:$Rm)))),
+          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
+
+def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, top32Zero:$Rm))),
+          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
+def : Pat<(i64 (sub GPR64:$Ra, (mul GPR64:$Rn, (zext GPR32:$Rm)))),
+          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
 } // AddedComplexity = 5
 
 def : MulAccumWAlias<"mul", MADDWrrr>;

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
index 71db09f77062..44d31d9b4424 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
@@ -911,7 +911,7 @@ define i64 @umull_ldrb_h(ptr %x0, i16 %x1) {
 ; CHECK-NEXT:    ldrb w8, [x0]
 ; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and x9, x1, #0xffff
-; CHECK-NEXT:    smull x0, w8, w9
+; CHECK-NEXT:    umull x0, w8, w9
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i8, ptr %x0
@@ -927,7 +927,7 @@ define i64 @umull_ldrb_h_commuted(ptr %x0, i16 %x1) {
 ; CHECK-NEXT:    ldrb w8, [x0]
 ; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and x9, x1, #0xffff
-; CHECK-NEXT:    smull x0, w9, w8
+; CHECK-NEXT:    umull x0, w9, w8
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i8, ptr %x0
@@ -941,8 +941,7 @@ define i64 @umull_ldrh_w(ptr %x0, i32 %x1) {
 ; CHECK-LABEL: umull_ldrh_w:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldrh w8, [x0]
-; CHECK-NEXT:    mov w9, w1
-; CHECK-NEXT:    mul x0, x8, x9
+; CHECK-NEXT:    umull x0, w8, w1
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i16, ptr %x0
@@ -958,7 +957,7 @@ define i64 @umull_ldr_b(ptr %x0, i8 %x1) {
 ; CHECK-NEXT:    ldr w8, [x0]
 ; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and x9, x1, #0xff
-; CHECK-NEXT:    mul x0, x8, x9
+; CHECK-NEXT:    umull x0, w8, w9
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i32, ptr %x0
@@ -972,8 +971,7 @@ define i64 @umull_ldr2_w(ptr %x0, i32 %x1) {
 ; CHECK-LABEL: umull_ldr2_w:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    mov w9, w1
-; CHECK-NEXT:    mul x0, x8, x9
+; CHECK-NEXT:    umull x0, w8, w1
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i64, ptr %x0
@@ -988,7 +986,7 @@ define i64 @umull_ldr2_ldr2(ptr %x0, ptr %x1) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr w8, [x0]
 ; CHECK-NEXT:    ldr w9, [x1]
-; CHECK-NEXT:    mul x0, x8, x9
+; CHECK-NEXT:    umull x0, w8, w9
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i64, ptr %x0
@@ -1004,7 +1002,7 @@ define i64 @umull_ldr2_d(ptr %x0, i64 %x1) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr w8, [x0]
 ; CHECK-NEXT:    and x9, x1, #0xffffffff
-; CHECK-NEXT:    mul x0, x8, x9
+; CHECK-NEXT:    umull x0, w8, w9
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i64, ptr %x0
@@ -1020,7 +1018,7 @@ define i64 @umaddl_ldrb_h(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-NEXT:    ldrb w8, [x0]
 ; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and x9, x1, #0xffff
-; CHECK-NEXT:    smaddl x0, w8, w9, x2
+; CHECK-NEXT:    umaddl x0, w8, w9, x2
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i8, ptr %x0
@@ -1037,7 +1035,7 @@ define i64 @umaddl_ldrb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-NEXT:    ldrb w8, [x0]
 ; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and x9, x1, #0xffff
-; CHECK-NEXT:    smaddl x0, w9, w8, x2
+; CHECK-NEXT:    umaddl x0, w9, w8, x2
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i8, ptr %x0
@@ -1052,8 +1050,7 @@ define i64 @umaddl_ldrh_w(ptr %x0, i32 %x1, i64 %x2) {
 ; CHECK-LABEL: umaddl_ldrh_w:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldrh w8, [x0]
-; CHECK-NEXT:    mov w9, w1
-; CHECK-NEXT:    madd x0, x8, x9, x2
+; CHECK-NEXT:    umaddl x0, w8, w1, x2
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i16, ptr %x0
@@ -1070,7 +1067,7 @@ define i64 @umaddl_ldr_b(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-NEXT:    ldr w8, [x0]
 ; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and x9, x1, #0xff
-; CHECK-NEXT:    madd x0, x8, x9, x2
+; CHECK-NEXT:    umaddl x0, w8, w9, x2
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i32, ptr %x0
@@ -1085,8 +1082,7 @@ define i64 @umaddl_ldr2_w(ptr %x0, i32 %x1, i64 %x2) {
 ; CHECK-LABEL: umaddl_ldr2_w:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    mov w9, w1
-; CHECK-NEXT:    madd x0, x8, x9, x2
+; CHECK-NEXT:    umaddl x0, w8, w1, x2
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i64, ptr %x0
@@ -1102,7 +1098,7 @@ define i64 @umaddl_ldr2_ldr2(ptr %x0, ptr %x1, i64 %x2) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr w8, [x0]
 ; CHECK-NEXT:    ldr w9, [x1]
-; CHECK-NEXT:    madd x0, x8, x9, x2
+; CHECK-NEXT:    umaddl x0, w8, w9, x2
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i64, ptr %x0
@@ -1119,7 +1115,7 @@ define i64 @umaddl_ldr2_d(ptr %x0, i64 %x1, i64 %x2) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr w8, [x0]
 ; CHECK-NEXT:    and x9, x1, #0xffffffff
-; CHECK-NEXT:    madd x0, x8, x9, x2
+; CHECK-NEXT:    umaddl x0, w8, w9, x2
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i64, ptr %x0
@@ -1136,7 +1132,7 @@ define i64 @umnegl_ldrb_h(ptr %x0, i16 %x1) {
 ; CHECK-NEXT:    ldrb w8, [x0]
 ; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and x9, x1, #0xffff
-; CHECK-NEXT:    smnegl x0, w8, w9
+; CHECK-NEXT:    umnegl x0, w8, w9
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i8, ptr %x0
@@ -1153,7 +1149,7 @@ define i64 @umnegl_ldrb_h_commuted(ptr %x0, i16 %x1) {
 ; CHECK-NEXT:    ldrb w8, [x0]
 ; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and x9, x1, #0xffff
-; CHECK-NEXT:    smnegl x0, w9, w8
+; CHECK-NEXT:    umnegl x0, w9, w8
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i8, ptr %x0
@@ -1168,8 +1164,7 @@ define i64 @umnegl_ldrh_w(ptr %x0, i32 %x1) {
 ; CHECK-LABEL: umnegl_ldrh_w:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldrh w8, [x0]
-; CHECK-NEXT:    mov w9, w1
-; CHECK-NEXT:    mneg x0, x8, x9
+; CHECK-NEXT:    umnegl x0, w8, w1
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i16, ptr %x0
@@ -1186,7 +1181,7 @@ define i64 @umnegl_ldr_b(ptr %x0, i8 %x1) {
 ; CHECK-NEXT:    ldr w8, [x0]
 ; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and x9, x1, #0xff
-; CHECK-NEXT:    mneg x0, x8, x9
+; CHECK-NEXT:    umnegl x0, w8, w9
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i32, ptr %x0
@@ -1201,8 +1196,7 @@ define i64 @umnegl_ldr2_w(ptr %x0, i32 %x1) {
 ; CHECK-LABEL: umnegl_ldr2_w:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    mov w9, w1
-; CHECK-NEXT:    mneg x0, x8, x9
+; CHECK-NEXT:    umnegl x0, w8, w1
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i64, ptr %x0
@@ -1218,7 +1212,7 @@ define i64 @umnegl_ldr2_ldr2(ptr %x0, ptr %x1) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr w8, [x0]
 ; CHECK-NEXT:    ldr w9, [x1]
-; CHECK-NEXT:    mneg x0, x8, x9
+; CHECK-NEXT:    umnegl x0, w8, w9
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i64, ptr %x0
@@ -1235,7 +1229,7 @@ define i64 @umnegl_ldr2_d(ptr %x0, i64 %x1) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr w8, [x0]
 ; CHECK-NEXT:    and x9, x1, #0xffffffff
-; CHECK-NEXT:    mneg x0, x8, x9
+; CHECK-NEXT:    umnegl x0, w8, w9
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i64, ptr %x0
@@ -1252,7 +1246,7 @@ define i64 @umsubl_ldrb_h(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-NEXT:    ldrb w8, [x0]
 ; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and x9, x1, #0xffff
-; CHECK-NEXT:    smsubl x0, w8, w9, x2
+; CHECK-NEXT:    umsubl x0, w8, w9, x2
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i8, ptr %x0
@@ -1269,7 +1263,7 @@ define i64 @umsubl_ldrb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-NEXT:    ldrb w8, [x0]
 ; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and x9, x1, #0xffff
-; CHECK-NEXT:    smsubl x0, w9, w8, x2
+; CHECK-NEXT:    umsubl x0, w9, w8, x2
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i8, ptr %x0
@@ -1284,8 +1278,7 @@ define i64 @umsubl_ldrh_w(ptr %x0, i32 %x1, i64 %x2) {
 ; CHECK-LABEL: umsubl_ldrh_w:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldrh w8, [x0]
-; CHECK-NEXT:    mov w9, w1
-; CHECK-NEXT:    msub x0, x8, x9, x2
+; CHECK-NEXT:    umsubl x0, w8, w1, x2
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i16, ptr %x0
@@ -1302,7 +1295,7 @@ define i64 @umsubl_ldr_b(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-NEXT:    ldr w8, [x0]
 ; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and x9, x1, #0xff
-; CHECK-NEXT:    msub x0, x8, x9, x2
+; CHECK-NEXT:    umsubl x0, w8, w9, x2
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i32, ptr %x0
@@ -1317,8 +1310,7 @@ define i64 @umsubl_ldr2_w(ptr %x0, i32 %x1, i64 %x2) {
 ; CHECK-LABEL: umsubl_ldr2_w:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    mov w9, w1
-; CHECK-NEXT:    msub x0, x8, x9, x2
+; CHECK-NEXT:    umsubl x0, w8, w1, x2
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i64, ptr %x0
@@ -1334,7 +1326,7 @@ define i64 @umsubl_ldr2_ldr2(ptr %x0, ptr %x1, i64 %x2) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr w8, [x0]
 ; CHECK-NEXT:    ldr w9, [x1]
-; CHECK-NEXT:    msub x0, x8, x9, x2
+; CHECK-NEXT:    umsubl x0, w8, w9, x2
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i64, ptr %x0
@@ -1351,7 +1343,7 @@ define i64 @umsubl_ldr2_d(ptr %x0, i64 %x1, i64 %x2) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr w8, [x0]
 ; CHECK-NEXT:    and x9, x1, #0xffffffff
-; CHECK-NEXT:    msub x0, x8, x9, x2
+; CHECK-NEXT:    umsubl x0, w8, w9, x2
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i64, ptr %x0
@@ -1365,10 +1357,9 @@ entry:
 define i64 @umull_ldr2_w_cc1(ptr %x0, i32 %x1) {
 ; CHECK-LABEL: umull_ldr2_w_cc1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldr x9, [x0]
-; CHECK-NEXT:    mov w8, w1
-; CHECK-NEXT:    and x9, x9, #0x7fffffff
-; CHECK-NEXT:    mul x0, x9, x8
+; CHECK-NEXT:    ldr x8, [x0]
+; CHECK-NEXT:    and x8, x8, #0x7fffffff
+; CHECK-NEXT:    umull x0, w8, w1
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i64, ptr %x0

diff  --git a/llvm/test/CodeGen/AArch64/addcarry-crash.ll b/llvm/test/CodeGen/AArch64/addcarry-crash.ll
index 0df9131f3c55..4d07e048c13e 100644
--- a/llvm/test/CodeGen/AArch64/addcarry-crash.ll
+++ b/llvm/test/CodeGen/AArch64/addcarry-crash.ll
@@ -8,7 +8,7 @@ define i64 @foo(ptr nocapture readonly %ptr, i64 %a, i64 %b, i64 %c) local_unnam
 ; CHECK-NEXT:    ldr w8, [x0, #4]
 ; CHECK-NEXT:    lsr x9, x1, #32
 ; CHECK-NEXT:    cmn x3, x2
-; CHECK-NEXT:    mul x8, x8, x9
+; CHECK-NEXT:    umull x8, w8, w9
 ; CHECK-NEXT:    cinc x0, x8, hs
 ; CHECK-NEXT:    ret
 entry:


        


More information about the llvm-commits mailing list