[llvm] 7a81782 - [AArch64][CodeGen]Fold the mov and lsl into ubfiz

Fri Sep 9 08:50:54 PDT 2022

Author: zhongyunde
Date: 2022-09-09T23:50:29+08:00
New Revision: 7a8178258516bb3a209dd90b7fbf5525f97151e4

URL: https://github.com/llvm/llvm-project/commit/7a8178258516bb3a209dd90b7fbf5525f97151e4
DIFF: https://github.com/llvm/llvm-project/commit/7a8178258516bb3a209dd90b7fbf5525f97151e4.diff

LOG: [AArch64][CodeGen]Fold the mov and lsl into ubfiz

Fix the issue exposed by D132322, depand on D132939
Reviewed By: efriedma, paulwalker-arm
Differential Revision: https://reviews.llvm.org/D132325

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
    llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
    llvm/test/CodeGen/AArch64/select_cc.ll
    llvm/test/CodeGen/AArch64/shrink-wrapping-vla.ll
    llvm/test/CodeGen/AArch64/tbl-loops.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a951bfafd8f31..85a174e13147e 100644

--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7349,6 +7349,11 @@ def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)),
                    (i64 (i64shift_a        imm0_63:$imm)),
                    (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
 
+def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)),
+          (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
+                   (i64 (i64shift_a        imm0_63:$imm)),
+                   (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
+
 // sra patterns have an AddedComplexity of 10, so make sure we have a higher
 // AddedComplexity for the following patterns since we want to match sext + sra
 // patterns before we attempt to match a single sra node.

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
index 2f8b6273ca9ff..107cc4f15ccaa 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
@@ -303,13 +303,12 @@ entry:
 ; CHECK: sub	sp, sp, #16
 ;   Check correct access to arguments passed on the stack, through frame pointer
 ; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #40]
-; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #56]
 ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
-; CHECK: mov	w9, w0
+; CHECK: ubfiz	 x9, x0, #2, #32
+; CHECK: add	 x9, x9, #15
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #56]
+; CHECK: and	 x9, x9, #0x7fffffff0
 ; CHECK: mov	 x10, sp
-; CHECK: lsl	x9, x9, #2
-; CHECK: add	x9, x9, #15
-; CHECK: and	x9, x9, #0x7fffffff0
 ; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
 ; CHECK: mov	 sp, x[[VLASPTMP]]
 ;   Check correct access to local variable, through frame pointer
@@ -350,15 +349,14 @@ entry:
 ;   Check correctness of cfi pseudo-instructions
 ;   Check correct access to arguments passed on the stack, through frame pointer
 ; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
-; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
 ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
-; CHECK: mov	w9, w0
-; CHECK: mov	 x10, sp
-; CHECK: lsl	x9, x9, #2
+; CHECK: ubfiz	x9, x0, #2, #32
 ; CHECK: add	x9, x9, #15
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
 ; CHECK: and	x9, x9, #0x7fffffff0
-; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
-; CHECK: mov	 sp, x[[VLASPTMP]]
+; CHECK: mov	x10, sp
+; CHECK: sub	x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov	sp, x[[VLASPTMP]]
 ;   Check correct access to local variable, through frame pointer
 ; CHECK: ldur	w[[ILOC:[0-9]+]], [x29, #-4]
 ;   Check correct accessing of the VLA variable through the base pointer
@@ -411,14 +409,13 @@ entry:
 ; CHECK: mov    x19, sp
 ;   Check correct access to arguments passed on the stack, through frame pointer
 ; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #56]
-; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #72]
 ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
 ;   and set-up of base pointer (x19).
-; CHECK: mov	w9, w0
+; CHECK: ubfiz	 x9, x0, #2, #32
+; CHECK: add	 x9, x9, #15
+; CHECK: ldr	 d[[DARG:[0-9]+]], [x29, #72]
+; CHECK: and	 x9, x9, #0x7fffffff0
 ; CHECK: mov	 x10, sp
-; CHECK: lsl	x9, x9, #2
-; CHECK: add	x9, x9, #15
-; CHECK: and	x9, x9, #0x7fffffff0
 ; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
 ; CHECK: mov	 sp, x[[VLASPTMP]]
 ;   Check correct access to local variable, through base pointer
@@ -459,14 +456,13 @@ entry:
 ; CHECK-MACHO: mov    x19, sp
 ;   Check correct access to arguments passed on the stack, through frame pointer
 ; CHECK-MACHO: ldr	w[[IARG:[0-9]+]], [x29, #20]
-; CHECK-MACHO: ldr	d[[DARG:[0-9]+]], [x29, #32]
 ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
 ;   and set-up of base pointer (x19).
-; CHECK-MACHO: mov	w9, w0
+; CHECK-MACHO: ubfiz	 x9, x0, #2, #32
+; CHECK-MACHO: add	 x9, x9, #15
+; CHECK-MACHO: ldr	d[[DARG:[0-9]+]], [x29, #32]
+; CHECK-MACHO: and	 x9, x9, #0x7fffffff0
 ; CHECK-MACHO: mov	 x10, sp
-; CHECK-MACHO: lsl	x9, x9, #2
-; CHECK-MACHO: add	x9, x9, #15
-; CHECK-MACHO: and	x9, x9, #0x7fffffff0
 ; CHECK-MACHO: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
 ; CHECK-MACHO: mov	 sp, x[[VLASPTMP]]
 ;   Check correct access to local variable, through base pointer
@@ -512,14 +508,13 @@ entry:
 ; CHECK: mov    x19, sp
 ;   Check correct access to arguments passed on the stack, through frame pointer
 ; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #40]
-; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #56]
 ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
 ;   and set-up of base pointer (x19).
-; CHECK: mov	w9, w0
+; CHECK: ubfiz	 x9, x0, #2, #32
+; CHECK: add	 x9, x9, #15
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #56]
+; CHECK: and	 x9, x9, #0x7fffffff0
 ; CHECK: mov	 x10, sp
-; CHECK: lsl	x9, x9, #2
-; CHECK: add	x9, x9, #15
-; CHECK: and	x9, x9, #0x7fffffff0
 ; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
 ; CHECK: mov	 sp, x[[VLASPTMP]]
 ;   Check correct access to local variable, through base pointer
@@ -547,14 +542,13 @@ entry:
 ; CHECK-MACHO: mov    x19, sp
 ;   Check correct access to arguments passed on the stack, through frame pointer
 ; CHECK-MACHO: ldr	w[[IARG:[0-9]+]], [x29, #20]
-; CHECK-MACHO: ldr	d[[DARG:[0-9]+]], [x29, #32]
 ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
 ;   and set-up of base pointer (x19).
-; CHECK-MACHO: mov	w9, w0
+; CHECK-MACHO: ubfiz	 x9, x0, #2, #32
+; CHECK-MACHO: add 	 x9, x9, #15
+; CHECK-MACHO: ldr	d[[DARG:[0-9]+]], [x29, #32]
+; CHECK-MACHO: and	 x9, x9, #0x7fffffff0
 ; CHECK-MACHO: mov	 x10, sp
-; CHECK-MACHO: lsl	x9, x9, #2
-; CHECK-MACHO: add	x9, x9, #15
-; CHECK-MACHO: and	x9, x9, #0x7fffffff0
 ; CHECK-MACHO: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
 ; CHECK-MACHO: mov	 sp, x[[VLASPTMP]]
 ;   Check correct access to local variable, through base pointer
@@ -598,14 +592,13 @@ entry:
 ; CHECK: mov    x19, sp
 ;   Check correct access to arguments passed on the stack, through frame pointer
 ; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #40]
-; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #56]
 ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
 ;   and set-up of base pointer (x19).
-; CHECK: mov	w9, w0
+; CHECK: ubfiz	 x9, x0, #2, #32
+; CHECK: add	 x9, x9, #15
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #56]
+; CHECK: and	 x9, x9, #0x7fffffff0
 ; CHECK: mov	 x10, sp
-; CHECK: lsl	x9, x9, #2
-; CHECK: add	x9, x9, #15
-; CHECK: and	x9, x9, #0x7fffffff0
 ; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
 ; CHECK: mov	 sp, x[[VLASPTMP]]
 ;   Check correct access to local variable, through base pointer
@@ -633,14 +626,13 @@ entry:
 ; CHECK-MACHO: mov    x19, sp
 ;   Check correct access to arguments passed on the stack, through frame pointer
 ; CHECK-MACHO: ldr	w[[IARG:[0-9]+]], [x29, #20]
-; CHECK-MACHO: ldr	d[[DARG:[0-9]+]], [x29, #32]
 ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
 ;   and set-up of base pointer (x19).
-; CHECK-MACHO: mov	w9, w0
+; CHECK-MACHO: ubfiz	 x9, x0, #2, #32
+; CHECK-MACHO: add	 x9, x9, #15
+; CHECK-MACHO: ldr	d[[DARG:[0-9]+]], [x29, #32]
+; CHECK-MACHO: and	 x9, x9, #0x7fffffff0
 ; CHECK-MACHO: mov	 x10, sp
-; CHECK-MACHO: lsl	x9, x9, #2
-; CHECK-MACHO: add	x9, x9, #15
-; CHECK-MACHO: and	x9, x9, #0x7fffffff0
 ; CHECK-MACHO: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
 ; CHECK-MACHO: mov	 sp, x[[VLASPTMP]]
 ;   Check correct access to local variable, through base pointer

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index ecfcef1c27149..a41f27efdee13 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -137,13 +137,13 @@ define void @matrix_mul_double_shuffle(i32 %N, i32* nocapture %C, i16* nocapture
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    dup v0.4h, w8
 ; CHECK-NEXT:    and x8, x0, #0xfffffff8
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0 def $x0
 ; CHECK-NEXT:  .LBB2_1: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldrh w9, [x2], #16
 ; CHECK-NEXT:    subs x8, x8, #8
 ; CHECK-NEXT:    dup v1.4h, w9
-; CHECK-NEXT:    mov w9, w0
-; CHECK-NEXT:    lsl x9, x9, #2
+; CHECK-NEXT:    ubfiz x9, x0, #2, #32
 ; CHECK-NEXT:    add w0, w0, #8
 ; CHECK-NEXT:    umull v1.4s, v0.4h, v1.4h
 ; CHECK-NEXT:    str q1, [x1, x9]

diff  --git a/llvm/test/CodeGen/AArch64/select_cc.ll b/llvm/test/CodeGen/AArch64/select_cc.ll
index 1444eecb946c4..e69df568d996b 100644
--- a/llvm/test/CodeGen/AArch64/select_cc.ll
+++ b/llvm/test/CodeGen/AArch64/select_cc.ll
@@ -6,7 +6,7 @@ define i64 @select_ogt_float(float %a, float %b) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    cset w8, gt
-; CHECK-NEXT:    lsl x0, x8, #2
+; CHECK-NEXT:    ubfiz x0, x8, #2, #32
 ; CHECK-NEXT:    ret
 entry:
   %cc = fcmp ogt float %a, %b
@@ -19,7 +19,7 @@ define i64 @select_ule_float_inverse(float %a, float %b) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    cset w8, gt
-; CHECK-NEXT:    lsl x0, x8, #2
+; CHECK-NEXT:    ubfiz x0, x8, #2, #32
 ; CHECK-NEXT:    ret
 entry:
   %cc = fcmp ule float %a, %b
@@ -32,7 +32,7 @@ define i64 @select_eq_i32(i32 %a, i32 %b) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cmp w0, w1
 ; CHECK-NEXT:    cset w8, eq
-; CHECK-NEXT:    lsl x0, x8, #2
+; CHECK-NEXT:    ubfiz x0, x8, #2, #32
 ; CHECK-NEXT:    ret
 entry:
   %cc = icmp eq i32 %a, %b
@@ -45,7 +45,7 @@ define i64 @select_ne_i32_inverse(i32 %a, i32 %b) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cmp w0, w1
 ; CHECK-NEXT:    cset w8, eq
-; CHECK-NEXT:    lsl x0, x8, #2
+; CHECK-NEXT:    ubfiz x0, x8, #2, #32
 ; CHECK-NEXT:    ret
 entry:
   %cc = icmp ne i32 %a, %b

diff  --git a/llvm/test/CodeGen/AArch64/shrink-wrapping-vla.ll b/llvm/test/CodeGen/AArch64/shrink-wrapping-vla.ll
index 6a120d9e4d13e..6dbfcf8a39ff6 100644
--- a/llvm/test/CodeGen/AArch64/shrink-wrapping-vla.ll
+++ b/llvm/test/CodeGen/AArch64/shrink-wrapping-vla.ll
@@ -86,12 +86,13 @@ declare void @llvm.stackrestore(i8*)
 
 
 ; VLA allocation
-; CHECK: mov [[X2:x[0-9]+]], sp
-; CHECK: mov [[SAVE:x[0-9]+]], sp
-; CHECK: add [[X1:x[0-9]+]], [[X1]], #15
-; CHECK: and [[X1]], [[X1]], #0x7fffffff0
+; CHECK: ubfiz	x8, x0, #2, #32
+; CHECK: mov	x9, sp
+; CHECK: add	x8, x8, #15
+; CHECK: mov	[[SAVE:x[0-9]+]], sp
+; CHECK: and	[[X1:x[0-9]+]], [[X1]], #0x7fffffff0
 ; Saving the SP via llvm.stacksave()
-; CHECK: sub [[X2]], [[X2]], [[X1]]
+; CHECK: sub	[[X1]], [[X2:x[0-9]+]], [[X1]]
 
 ; The next instruction comes from llvm.stackrestore()
 ; CHECK:      mov sp, [[SAVE]]

diff  --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll
index 946128cc18c0b..8d015abeed56e 100644
--- a/llvm/test/CodeGen/AArch64/tbl-loops.ll
+++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll
@@ -151,7 +151,7 @@ define void @loop2(i8* noalias nocapture noundef writeonly %dst, float* nocaptur
 ; CHECK-NEXT:    cmp w8, #2
 ; CHECK-NEXT:    b.ls .LBB1_4
 ; CHECK-NEXT:  // %bb.2: // %vector.memcheck
-; CHECK-NEXT:    lsl x9, x8, #1
+; CHECK-NEXT:    ubfiz x9, x8, #1, #32
 ; CHECK-NEXT:    add x9, x9, #2
 ; CHECK-NEXT:    add x10, x1, x9, lsl #2
 ; CHECK-NEXT:    cmp x10, x0
@@ -535,7 +535,7 @@ define void @loop4(i8* noalias nocapture noundef writeonly %dst, float* nocaptur
 ; CHECK-NEXT:    cmp w8, #2
 ; CHECK-NEXT:    b.ls .LBB3_4
 ; CHECK-NEXT:  // %bb.2: // %vector.memcheck
-; CHECK-NEXT:    lsl x9, x8, #2
+; CHECK-NEXT:    ubfiz x9, x8, #2, #32
 ; CHECK-NEXT:    add x9, x9, #4
 ; CHECK-NEXT:    add x10, x1, x9, lsl #2
 ; CHECK-NEXT:    cmp x10, x0