[PATCH] D150345: [AArch64] Handle vector with two different values with efficient vector mask

JinGu Kang via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu May 11 00:29:57 PDT 2023


jaykang10 created this revision.
jaykang10 added reviewers: dmgreen, efriedma, t.p.northover.
Herald added subscribers: hiraditya, kristof.beyls.
Herald added a project: All.
jaykang10 requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Following @dmgreen's comment, when we lower BUILD_VECTOR to VECTOR_SHUFFL, we could generate efficient vector mask.


https://reviews.llvm.org/D150345

Files:
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/test/CodeGen/AArch64/build-vector-two-dup.ll


Index: llvm/test/CodeGen/AArch64/build-vector-two-dup.ll
===================================================================
--- llvm/test/CodeGen/AArch64/build-vector-two-dup.ll
+++ llvm/test/CodeGen/AArch64/build-vector-two-dup.ll
@@ -98,12 +98,10 @@
 define <8 x i8> @test6(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
 ; CHECK-LABEL: test6:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ld1r { v0.8b }, [x1]
-; CHECK-NEXT:    adrp x8, .LCPI5_0
-; CHECK-NEXT:    ld1r { v1.8b }, [x0]
-; CHECK-NEXT:    mov v1.d[1], v0.d[0]
-; CHECK-NEXT:    ldr d0, [x8, :lo12:.LCPI5_0]
-; CHECK-NEXT:    tbl v0.8b, { v1.16b }, v0.8b
+; CHECK-NEXT:    ld1r { v1.8b }, [x1]
+; CHECK-NEXT:    ld1r { v0.8b }, [x0]
+; CHECK-NEXT:    mov v0.s[1], v1.s[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = load i8, ptr %a, align 1
@@ -119,12 +117,10 @@
 define <8 x i8> @test7(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
 ; CHECK-LABEL: test7:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ld1r { v0.8b }, [x0]
-; CHECK-NEXT:    adrp x8, .LCPI6_0
-; CHECK-NEXT:    ld1r { v1.8b }, [x1]
-; CHECK-NEXT:    mov v1.d[1], v0.d[0]
-; CHECK-NEXT:    ldr d0, [x8, :lo12:.LCPI6_0]
-; CHECK-NEXT:    tbl v0.8b, { v1.16b }, v0.8b
+; CHECK-NEXT:    ld1r { v1.8b }, [x0]
+; CHECK-NEXT:    ld1r { v0.8b }, [x1]
+; CHECK-NEXT:    mov v0.s[1], v1.s[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = load i8, ptr %a, align 1
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -12653,13 +12653,13 @@
       }
     }
 
-    // Let's try to generate two DUPs and VECTOR_SHUFFLE. For example,
+    // Let's try to generate VECTOR_SHUFFLE. For example,
     //
     //  t24: v8i8 = BUILD_VECTOR t25, t25, t25, t25, t26, t26, t26, t26
     //  ==>
-    //    t28: v8i8 = AArch64ISD::DUP t25
-    //    t30: v8i8 = AArch64ISD::DUP t26
-    //  t31: v8i8 = vector_shuffle<0,0,0,0,8,8,8,8> t28, t30
+    //    t27: v8i8 = BUILD_VECTOR t26, t26, t26, t26, t26, t26, t26, t26
+    //    t28: v8i8 = BUILD_VECTOR t25, t25, t25, t25, t25, t25, t25, t25
+    //  t29: v8i8 = vector_shuffle<0,1,2,3,12,13,14,15> t27, t28
     if (NumElts >= 8) {
       SmallVector<int, 16> MaskVec;
       // Build mask for VECTOR_SHUFLLE.
@@ -12667,17 +12667,17 @@
       for (unsigned i = 0; i < NumElts; ++i) {
         SDValue Val = Op.getOperand(i);
         if (FirstLaneVal == Val)
-          MaskVec.push_back(0);
+          MaskVec.push_back(i);
         else
-          MaskVec.push_back(NumElts);
+          MaskVec.push_back(i + NumElts);
       }
 
       SmallVector<SDValue, 8> Ops1(NumElts, Vals[0]);
       SmallVector<SDValue, 8> Ops2(NumElts, Vals[1]);
-      SDValue DUP1 = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops1), DAG);
-      SDValue DUP2 = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops2), DAG);
+      SDValue VEC1 = DAG.getBuildVector(VT, dl, Ops1);
+      SDValue VEC2 = DAG.getBuildVector(VT, dl, Ops2);
       SDValue VECTOR_SHUFFLE =
-          DAG.getVectorShuffle(VT, dl, DUP1, DUP2, MaskVec);
+          DAG.getVectorShuffle(VT, dl, VEC1, VEC2, MaskVec);
       return VECTOR_SHUFFLE;
     }
   }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D150345.521218.patch
Type: text/x-patch
Size: 3418 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230511/e6d404f5/attachment.bin>


More information about the llvm-commits mailing list