[llvm-branch-commits] [llvm] 63dce70 - [ARM] Handle any extend whilst lowering addw/addl/subw/subl

David Green via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Jan 6 03:30:54 PST 2021


Author: David Green
Date: 2021-01-06T11:26:39Z
New Revision: 63dce70b794eb99ebbfdeed3ca9aafca2b8fe5c4

URL: https://github.com/llvm/llvm-project/commit/63dce70b794eb99ebbfdeed3ca9aafca2b8fe5c4
DIFF: https://github.com/llvm/llvm-project/commit/63dce70b794eb99ebbfdeed3ca9aafca2b8fe5c4.diff

LOG: [ARM] Handle any extend whilst lowering addw/addl/subw/subl

Same as a9b6440edd, use zanyext to treat any_extends as zero extends
during lowering to create addw/addl/subw/subl nodes.

Differential Revision: https://reviews.llvm.org/D93835

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMInstrNEON.td
    llvm/test/CodeGen/ARM/vadd.ll
    llvm/test/CodeGen/ARM/vsub.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index bb30dbd3a5c9..a8c0d05d91c4 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -4197,10 +4197,10 @@ def  VADDhq   : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
 defm VADDLs   : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
                             "vaddl", "s", add, sext, 1>;
 defm VADDLu   : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
-                            "vaddl", "u", add, zext, 1>;
+                            "vaddl", "u", add, zanyext, 1>;
 //   VADDW    : Vector Add Wide (Q = Q + D)
 defm VADDWs   : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
-defm VADDWu   : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
+defm VADDWu   : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>;
 //   VHADD    : Vector Halving Add
 defm VHADDs   : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
@@ -5045,10 +5045,10 @@ def  VSUBhq   : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
 defm VSUBLs   : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
                             "vsubl", "s", sub, sext, 0>;
 defm VSUBLu   : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
-                            "vsubl", "u", sub, zext, 0>;
+                            "vsubl", "u", sub, zanyext, 0>;
 //   VSUBW    : Vector Subtract Wide (Q = Q - D)
 defm VSUBWs   : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
-defm VSUBWu   : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
+defm VSUBWu   : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>;
 //   VHSUB    : Vector Halving Subtract
 defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,

diff  --git a/llvm/test/CodeGen/ARM/vadd.ll b/llvm/test/CodeGen/ARM/vadd.ll
index 5f0ddd17c8c7..282108244e5c 100644
--- a/llvm/test/CodeGen/ARM/vadd.ll
+++ b/llvm/test/CodeGen/ARM/vadd.ll
@@ -224,9 +224,7 @@ define <2 x i64> @vaddlu32(<2 x i32> %A, <2 x i32> %B) {
 define <8 x i16> @vaddla8(<8 x i8> %A, <8 x i8> %B) {
 ; CHECK-LABEL: vaddla8:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmovl.u8 q8, d1
-; CHECK-NEXT:    vmovl.u8 q9, d0
-; CHECK-NEXT:    vadd.i16 q0, q9, q8
+; CHECK-NEXT:    vaddl.u8 q0, d0, d1
 ; CHECK-NEXT:    vbic.i16 q0, #0xff00
 ; CHECK-NEXT:    bx lr
   %tmp3 = zext <8 x i8> %A to <8 x i16>
@@ -239,11 +237,9 @@ define <8 x i16> @vaddla8(<8 x i8> %A, <8 x i8> %B) {
 define <4 x i32> @vaddla16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-LABEL: vaddla16:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmovl.u16 q8, d1
-; CHECK-NEXT:    vmovl.u16 q9, d0
-; CHECK-NEXT:    vmov.i32 q10, #0xffff
-; CHECK-NEXT:    vadd.i32 q8, q9, q8
-; CHECK-NEXT:    vand q0, q8, q10
+; CHECK-NEXT:    vmov.i32 q8, #0xffff
+; CHECK-NEXT:    vaddl.u16 q9, d0, d1
+; CHECK-NEXT:    vand q0, q9, q8
 ; CHECK-NEXT:    bx lr
   %tmp3 = zext <4 x i16> %A to <4 x i32>
   %tmp4 = zext <4 x i16> %B to <4 x i32>
@@ -255,11 +251,9 @@ define <4 x i32> @vaddla16(<4 x i16> %A, <4 x i16> %B) {
 define <2 x i64> @vaddla32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: vaddla32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmovl.u32 q8, d1
-; CHECK-NEXT:    vmovl.u32 q9, d0
-; CHECK-NEXT:    vmov.i64 q10, #0xffffffff
-; CHECK-NEXT:    vadd.i64 q8, q9, q8
-; CHECK-NEXT:    vand q0, q8, q10
+; CHECK-NEXT:    vmov.i64 q8, #0xffffffff
+; CHECK-NEXT:    vaddl.u32 q9, d0, d1
+; CHECK-NEXT:    vand q0, q9, q8
 ; CHECK-NEXT:    bx lr
   %tmp3 = zext <2 x i32> %A to <2 x i64>
   %tmp4 = zext <2 x i32> %B to <2 x i64>
@@ -331,8 +325,7 @@ define <2 x i64> @vaddwu32(<2 x i64> %A, <2 x i32> %B) {
 define <8 x i16> @vaddwa8(<8 x i16> %A, <8 x i8> %B) {
 ; CHECK-LABEL: vaddwa8:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmovl.u8 q8, d2
-; CHECK-NEXT:    vadd.i16 q0, q0, q8
+; CHECK-NEXT:    vaddw.u8 q0, q0, d2
 ; CHECK-NEXT:    vbic.i16 q0, #0xff00
 ; CHECK-NEXT:    bx lr
   %tmp3 = zext <8 x i8> %B to <8 x i16>
@@ -344,10 +337,9 @@ define <8 x i16> @vaddwa8(<8 x i16> %A, <8 x i8> %B) {
 define <4 x i32> @vaddwa16(<4 x i32> %A, <4 x i16> %B) {
 ; CHECK-LABEL: vaddwa16:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmovl.u16 q8, d2
-; CHECK-NEXT:    vmov.i32 q9, #0xffff
-; CHECK-NEXT:    vadd.i32 q8, q0, q8
-; CHECK-NEXT:    vand q0, q8, q9
+; CHECK-NEXT:    vmov.i32 q8, #0xffff
+; CHECK-NEXT:    vaddw.u16 q9, q0, d2
+; CHECK-NEXT:    vand q0, q9, q8
 ; CHECK-NEXT:    bx lr
   %tmp3 = zext <4 x i16> %B to <4 x i32>
   %tmp4 = add <4 x i32> %A, %tmp3
@@ -358,10 +350,9 @@ define <4 x i32> @vaddwa16(<4 x i32> %A, <4 x i16> %B) {
 define <2 x i64> @vaddwa32(<2 x i64> %A, <2 x i32> %B) {
 ; CHECK-LABEL: vaddwa32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmovl.u32 q8, d2
-; CHECK-NEXT:    vmov.i64 q9, #0xffffffff
-; CHECK-NEXT:    vadd.i64 q8, q0, q8
-; CHECK-NEXT:    vand q0, q8, q9
+; CHECK-NEXT:    vmov.i64 q8, #0xffffffff
+; CHECK-NEXT:    vaddw.u32 q9, q0, d2
+; CHECK-NEXT:    vand q0, q9, q8
 ; CHECK-NEXT:    bx lr
   %tmp3 = zext <2 x i32> %B to <2 x i64>
   %tmp4 = add <2 x i64> %A, %tmp3

diff  --git a/llvm/test/CodeGen/ARM/vsub.ll b/llvm/test/CodeGen/ARM/vsub.ll
index 8743fcc47889..b5b0a1026ae7 100644
--- a/llvm/test/CodeGen/ARM/vsub.ll
+++ b/llvm/test/CodeGen/ARM/vsub.ll
@@ -224,9 +224,7 @@ define <2 x i64> @vsublu32(<2 x i32> %A, <2 x i32> %B) {
 define <8 x i16> @vsubla8(<8 x i8> %A, <8 x i8> %B) {
 ; CHECK-LABEL: vsubla8:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmovl.u8 q8, d1
-; CHECK-NEXT:    vmovl.u8 q9, d0
-; CHECK-NEXT:    vsub.i16 q0, q9, q8
+; CHECK-NEXT:    vsubl.u8 q0, d0, d1
 ; CHECK-NEXT:    vbic.i16 q0, #0xff00
 ; CHECK-NEXT:    bx lr
   %tmp3 = zext <8 x i8> %A to <8 x i16>
@@ -239,11 +237,9 @@ define <8 x i16> @vsubla8(<8 x i8> %A, <8 x i8> %B) {
 define <4 x i32> @vsubla16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-LABEL: vsubla16:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmovl.u16 q8, d1
-; CHECK-NEXT:    vmovl.u16 q9, d0
-; CHECK-NEXT:    vmov.i32 q10, #0xffff
-; CHECK-NEXT:    vsub.i32 q8, q9, q8
-; CHECK-NEXT:    vand q0, q8, q10
+; CHECK-NEXT:    vmov.i32 q8, #0xffff
+; CHECK-NEXT:    vsubl.u16 q9, d0, d1
+; CHECK-NEXT:    vand q0, q9, q8
 ; CHECK-NEXT:    bx lr
   %tmp3 = zext <4 x i16> %A to <4 x i32>
   %tmp4 = zext <4 x i16> %B to <4 x i32>
@@ -255,11 +251,9 @@ define <4 x i32> @vsubla16(<4 x i16> %A, <4 x i16> %B) {
 define <2 x i64> @vsubla32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: vsubla32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmovl.u32 q8, d1
-; CHECK-NEXT:    vmovl.u32 q9, d0
-; CHECK-NEXT:    vmov.i64 q10, #0xffffffff
-; CHECK-NEXT:    vsub.i64 q8, q9, q8
-; CHECK-NEXT:    vand q0, q8, q10
+; CHECK-NEXT:    vmov.i64 q8, #0xffffffff
+; CHECK-NEXT:    vsubl.u32 q9, d0, d1
+; CHECK-NEXT:    vand q0, q9, q8
 ; CHECK-NEXT:    bx lr
   %tmp3 = zext <2 x i32> %A to <2 x i64>
   %tmp4 = zext <2 x i32> %B to <2 x i64>
@@ -331,8 +325,7 @@ define <2 x i64> @vsubwu32(<2 x i64> %A, <2 x i32> %B) {
 define <8 x i16> @vsubwa8(<8 x i16> %A, <8 x i8> %B) {
 ; CHECK-LABEL: vsubwa8:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmovl.u8 q8, d2
-; CHECK-NEXT:    vsub.i16 q0, q0, q8
+; CHECK-NEXT:    vsubw.u8 q0, q0, d2
 ; CHECK-NEXT:    vbic.i16 q0, #0xff00
 ; CHECK-NEXT:    bx lr
   %tmp3 = zext <8 x i8> %B to <8 x i16>
@@ -344,10 +337,9 @@ define <8 x i16> @vsubwa8(<8 x i16> %A, <8 x i8> %B) {
 define <4 x i32> @vsubwa16(<4 x i32> %A, <4 x i16> %B) {
 ; CHECK-LABEL: vsubwa16:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmovl.u16 q8, d2
-; CHECK-NEXT:    vmov.i32 q9, #0xffff
-; CHECK-NEXT:    vsub.i32 q8, q0, q8
-; CHECK-NEXT:    vand q0, q8, q9
+; CHECK-NEXT:    vmov.i32 q8, #0xffff
+; CHECK-NEXT:    vsubw.u16 q9, q0, d2
+; CHECK-NEXT:    vand q0, q9, q8
 ; CHECK-NEXT:    bx lr
   %tmp3 = zext <4 x i16> %B to <4 x i32>
   %tmp4 = sub <4 x i32> %A, %tmp3
@@ -358,10 +350,9 @@ define <4 x i32> @vsubwa16(<4 x i32> %A, <4 x i16> %B) {
 define <2 x i64> @vsubwa32(<2 x i64> %A, <2 x i32> %B) {
 ; CHECK-LABEL: vsubwa32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmovl.u32 q8, d2
-; CHECK-NEXT:    vmov.i64 q9, #0xffffffff
-; CHECK-NEXT:    vsub.i64 q8, q0, q8
-; CHECK-NEXT:    vand q0, q8, q9
+; CHECK-NEXT:    vmov.i64 q8, #0xffffffff
+; CHECK-NEXT:    vsubw.u32 q9, q0, d2
+; CHECK-NEXT:    vand q0, q9, q8
 ; CHECK-NEXT:    bx lr
   %tmp3 = zext <2 x i32> %B to <2 x i64>
   %tmp4 = sub <2 x i64> %A, %tmp3


        


More information about the llvm-branch-commits mailing list