[llvm] aaca8e2 - [AArch64] Don't recreate nodes in tryCombineLongOpWithDup

Mon Jun 26 14:41:22 PDT 2023

Author: David Green
Date: 2023-06-26T22:41:18+01:00
New Revision: aaca8e2c3489652ebdf27712ab396984fa577c1e

URL: https://github.com/llvm/llvm-project/commit/aaca8e2c3489652ebdf27712ab396984fa577c1e
DIFF: https://github.com/llvm/llvm-project/commit/aaca8e2c3489652ebdf27712ab396984fa577c1e.diff

LOG: [AArch64] Don't recreate nodes in tryCombineLongOpWithDup

If we don't find a node with either operand through
isEssentiallyExtractHighSubvector, there is little point
recreating the node with the same operands. Returning
SDValue better communicates that no changes were made.

This fixes #63491 by not recreating uabd nodes with swapped
operands. As noted in the ticket there are other fixes that
might be useful to make too, but this should prevent the
infinite combine.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/abd-combine.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f1a649b9e647a..96521bb75435d 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18586,7 +18586,8 @@ static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
     LHS = tryExtendDUPToExtractHigh(LHS, DAG);
     if (!LHS.getNode())
       return SDValue();
-  }
+  } else
+    return SDValue();
 
   if (IID == Intrinsic::not_intrinsic)
     return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);

diff  --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll
index a7e0c26fd7a15..e6891f2d53cbd 100644
--- a/llvm/test/CodeGen/AArch64/abd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/abd-combine.ll
@@ -260,7 +260,7 @@ define <8 x i16> @abdu_i_const_bothhigh() {
 define <8 x i16> @abdu_i_const_onehigh() {
 ; CHECK-LABEL: abdu_i_const_onehigh:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32765
+; CHECK-NEXT:    mov w8, #32765 // =0x7ffd
 ; CHECK-NEXT:    dup v0.8h, w8
 ; CHECK-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -480,7 +480,7 @@ define <8 x i16> @abds_i_const_bothhigh() {
 define <8 x i16> @abds_i_const_onehigh() {
 ; CHECK-LABEL: abds_i_const_onehigh:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32765
+; CHECK-NEXT:    mov w8, #32765 // =0x7ffd
 ; CHECK-NEXT:    dup v0.8h, w8
 ; CHECK-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -527,7 +527,33 @@ define <8 x i16> @abds_i_reassoc(<8 x i16> %src1) {
   ret <8 x i16> %result
 }
 
+define <1 x i64> @recursive() {
+; CHECK-LABEL: recursive:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.8b, #1
+; CHECK-NEXT:    movi v1.2d, #0xffffffffffffffff
+; CHECK-NEXT:    uabd v2.8b, v0.8b, v1.8b
+; CHECK-NEXT:    uabdl v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    dup v1.8b, v2.b[0]
+; CHECK-NEXT:    saddlp v0.1d, v0.2s
+; CHECK-NEXT:    orr v0.8b, v1.8b, v0.8b
+; CHECK-NEXT:    ret
+  %1 = tail call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> zeroinitializer, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+  %2 = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+  %3 = zext <8 x i8> %2 to <8 x i16>
+  %4 = bitcast <8 x i16> %3 to <4 x i32>
+  %5 = shufflevector <4 x i32> %4, <4 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
+  %6 = shufflevector <8 x i8> %2, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %7 = bitcast <16 x i8> %6 to <2 x i64>
+  %8 = shufflevector <2 x i64> %7, <2 x i64> zeroinitializer, <1 x i32> zeroinitializer
+  %9 = tail call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %5)
+  %10 = or <1 x i64> %8, %9
+  ret <1 x i64> %10
+}
 
+declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>)
+declare <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32>)
+declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
 declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>)
 declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>)
 declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)