[llvm] 85bbc05 - [AArch64][SVE][InstCombine] Move last{a,b} before binop if one operand is a splat value

Usman Nadeem via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 9 14:51:47 PDT 2021


Author: Usman Nadeem
Date: 2021-08-09T14:48:41-07:00
New Revision: 85bbc05154ba400b99045ade2864526a887e81aa

URL: https://github.com/llvm/llvm-project/commit/85bbc05154ba400b99045ade2864526a887e81aa
DIFF: https://github.com/llvm/llvm-project/commit/85bbc05154ba400b99045ade2864526a887e81aa.diff

LOG: [AArch64][SVE][InstCombine] Move last{a,b} before binop if one operand is a splat value

Move the last{a,b} operation to the vector operand of the binary instruction if
the binop's operand is a splat value. This essentially converts the binop
to a scalar operation.

Example:
    // If x and/or y is a splat value:
    lastX (binop (x, y)) --> binop(lastX(x), lastX(y))

Differential Revision: https://reviews.llvm.org/D106932

Change-Id: I93ff5302f9a7972405ee0d3854cf115f072e99c0

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-lasta-lastb.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 063c37dcb8b17..c6a833cbb02f1 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -544,14 +544,34 @@ static Optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
 
 static Optional<Instruction *> instCombineSVELast(InstCombiner &IC,
                                                   IntrinsicInst &II) {
+  IRBuilder<> Builder(II.getContext());
+  Builder.SetInsertPoint(&II);
   Value *Pg = II.getArgOperand(0);
   Value *Vec = II.getArgOperand(1);
-  bool IsAfter = II.getIntrinsicID() == Intrinsic::aarch64_sve_lasta;
+  auto IntrinsicID = II.getIntrinsicID();
+  bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
 
   // lastX(splat(X)) --> X
   if (auto *SplatVal = getSplatValue(Vec))
     return IC.replaceInstUsesWith(II, SplatVal);
 
+  // If x and/or y is a splat value then:
+  // lastX (binop (x, y)) --> binop(lastX(x), lastX(y))
+  Value *LHS, *RHS;
+  if (match(Vec, m_OneUse(m_BinOp(m_Value(LHS), m_Value(RHS))))) {
+    if (isSplatValue(LHS) || isSplatValue(RHS)) {
+      auto *OldBinOp = cast<BinaryOperator>(Vec);
+      auto OpC = OldBinOp->getOpcode();
+      auto *NewLHS =
+          Builder.CreateIntrinsic(IntrinsicID, {Vec->getType()}, {Pg, LHS});
+      auto *NewRHS =
+          Builder.CreateIntrinsic(IntrinsicID, {Vec->getType()}, {Pg, RHS});
+      auto *NewBinOp = BinaryOperator::CreateWithCopiedFlags(
+          OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(), &II);
+      return IC.replaceInstUsesWith(II, NewBinOp);
+    }
+  }
+
   auto *C = dyn_cast<Constant>(Pg);
   if (IsAfter && C && C->isNullValue()) {
     // The intrinsic is extracting lane 0 so use an extract instead.

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-lasta-lastb.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-lasta-lastb.ll
index 58c65bd9f24ad..fcb958b43b0c5 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-lasta-lastb.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-lasta-lastb.ll
@@ -147,7 +147,8 @@ define i8 @lastb_extractelement_invalid_predicate_pattern(<vscale x 16 x i8> %v)
 ; Return the splatted value irrespective of the predicate.
 define i8 @lasta_splat(<vscale x 16 x i1> %pg, i8 %a) #0 {
 ; OPT-LABEL: @lasta_splat(
-; OPT-NEXT:    ret i8 %a
+; OPT-NEXT:    ret i8 [[A:%.*]]
+;
   %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %a, i32 0
   %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
   %last = tail call i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %splat)
@@ -156,15 +157,155 @@ define i8 @lasta_splat(<vscale x 16 x i1> %pg, i8 %a) #0 {
 
 define i8 @lastb_splat(<vscale x 16 x i1> %pg, i8 %a) #0 {
 ; OPT-LABEL: @lastb_splat(
-; OPT-NEXT:    ret i8 %a
+; OPT-NEXT:    ret i8 [[A:%.*]]
+;
   %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %a, i32 0
   %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
   %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %splat)
   ret i8 %last
 }
 
+; Check that we move the lastb before the binary operation so that the new binary op is scalar.
+define i8 @lastb_binop_RHS_splat_sdiv(<vscale x 16 x i1> %pg, i8 %scalar, <vscale x 16 x i8> %vector) #0 {
+; OPT-LABEL: @lastb_binop_RHS_splat_sdiv(
+; OPT-NEXT:    [[TMP1:%.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[VECTOR:%.*]])
+; OPT-NEXT:    [[BINOP1:%.*]] = sdiv i8 [[TMP1]], [[SCALAR:%.*]]
+; OPT-NEXT:    ret i8 [[BINOP1]]
+;
+  %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar, i32 0
+  %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %binop = sdiv <vscale x 16 x i8> %vector, %splat
+  %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop)
+  ret i8 %last
+}
+
+define i8 @lastb_binop_RHS_splat_sdiv_exact(<vscale x 16 x i1> %pg, i8 %scalar, <vscale x 16 x i8> %vector) #0 {
+; OPT-LABEL: @lastb_binop_RHS_splat_sdiv_exact(
+; OPT-NEXT:    [[TMP1:%.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[VECTOR:%.*]])
+; OPT-NEXT:    [[BINOP1:%.*]] = sdiv exact i8 [[TMP1]], [[SCALAR:%.*]]
+; OPT-NEXT:    ret i8 [[BINOP1]]
+;
+  %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar, i32 0
+  %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %binop = sdiv exact <vscale x 16 x i8> %vector, %splat
+  %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop)
+  ret i8 %last
+}
+
+define float @lastb_binop_RHS_splat_fdiv_float_fast(<vscale x 4 x i1> %pg, float %scalar, <vscale x 4 x float> %vector) #0 {
+; OPT-LABEL: @lastb_binop_RHS_splat_fdiv_float_fast(
+; OPT-NEXT:    [[TMP1:%.*]] = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[VECTOR:%.*]])
+; OPT-NEXT:    [[BINOP1:%.*]] = fdiv fast float [[TMP1]], [[SCALAR:%.*]]
+; OPT-NEXT:    ret float [[BINOP1]]
+;
+  %splat_insert = insertelement <vscale x 4 x float> poison, float %scalar, i32 0
+  %splat = shufflevector <vscale x 4 x float> %splat_insert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+  %binop = fdiv fast <vscale x 4 x float> %vector, %splat
+  %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %binop)
+  ret float %last
+}
+
+define float @lastb_binop_RHS_splat_fdiv_float(<vscale x 4 x i1> %pg, float %scalar, <vscale x 4 x float> %vector) #0 {
+; OPT-LABEL: @lastb_binop_RHS_splat_fdiv_float(
+; OPT-NEXT:    [[TMP1:%.*]] = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[VECTOR:%.*]])
+; OPT-NEXT:    [[BINOP1:%.*]] = fdiv float [[TMP1]], [[SCALAR:%.*]]
+; OPT-NEXT:    ret float [[BINOP1]]
+;
+  %splat_insert = insertelement <vscale x 4 x float> poison, float %scalar, i32 0
+  %splat = shufflevector <vscale x 4 x float> %splat_insert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+  %binop = fdiv <vscale x 4 x float> %vector, %splat
+  %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %binop)
+  ret float %last
+}
+
+define i8 @lastb_binop_LHS_splat_sdiv(<vscale x 16 x i1> %pg, i8 %scalar, <vscale x 16 x i8> %vector) #0 {
+; OPT-LABEL: @lastb_binop_LHS_splat_sdiv(
+; OPT-NEXT:    [[TMP1:%.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[VECTOR:%.*]])
+; OPT-NEXT:    [[BINOP1:%.*]] = sdiv i8 [[SCALAR:%.*]], [[TMP1]]
+; OPT-NEXT:    ret i8 [[BINOP1]]
+;
+  %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar, i32 0
+  %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %binop = sdiv <vscale x 16 x i8> %splat, %vector
+  %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop)
+  ret i8 %last
+}
+
+define i8 @lastb_binop_LHS_splat_sdiv_exact(<vscale x 16 x i1> %pg, i8 %scalar, <vscale x 16 x i8> %vector) #0 {
+; OPT-LABEL: @lastb_binop_LHS_splat_sdiv_exact(
+; OPT-NEXT:    [[TMP1:%.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[VECTOR:%.*]])
+; OPT-NEXT:    [[BINOP1:%.*]] = sdiv exact i8 [[SCALAR:%.*]], [[TMP1]]
+; OPT-NEXT:    ret i8 [[BINOP1]]
+;
+  %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar, i32 0
+  %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %binop = sdiv exact <vscale x 16 x i8> %splat, %vector
+  %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop)
+  ret i8 %last
+}
+
+define float @lastb_binop_LHS_splat_fdiv_float_fast(<vscale x 4 x i1> %pg, float %scalar, <vscale x 4 x float> %vector) #0 {
+; OPT-LABEL: @lastb_binop_LHS_splat_fdiv_float_fast(
+; OPT-NEXT:    [[TMP1:%.*]] = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[VECTOR:%.*]])
+; OPT-NEXT:    [[BINOP1:%.*]] = fdiv fast float [[SCALAR:%.*]], [[TMP1]]
+; OPT-NEXT:    ret float [[BINOP1]]
+;
+  %splat_insert = insertelement <vscale x 4 x float> poison, float %scalar, i32 0
+  %splat = shufflevector <vscale x 4 x float> %splat_insert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+  %binop = fdiv fast <vscale x 4 x float> %splat, %vector
+  %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %binop)
+  ret float %last
+}
+
+define float @lastb_binop_LHS_splat_fdiv_float(<vscale x 4 x i1> %pg, float %scalar, <vscale x 4 x float> %vector) #0 {
+; OPT-LABEL: @lastb_binop_LHS_splat_fdiv_float(
+; OPT-NEXT:    [[TMP1:%.*]] = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[VECTOR:%.*]])
+; OPT-NEXT:    [[BINOP1:%.*]] = fdiv float [[SCALAR:%.*]], [[TMP1]]
+; OPT-NEXT:    ret float [[BINOP1]]
+;
+  %splat_insert = insertelement <vscale x 4 x float> poison, float %scalar, i32 0
+  %splat = shufflevector <vscale x 4 x float> %splat_insert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+  %binop = fdiv <vscale x 4 x float> %splat, %vector
+  %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %binop)
+  ret float %last
+}
+
+define i8 @lastb_binop_LHS_RHS_splat_sdiv(<vscale x 16 x i1> %pg, i8 %scalar1, i8 %scalar2) #0 {
+; OPT-LABEL: @lastb_binop_LHS_RHS_splat_sdiv(
+; OPT-NEXT:    [[BINOP1:%.*]] = sdiv i8 [[SCALAR1:%.*]], [[SCALAR2:%.*]]
+; OPT-NEXT:    ret i8 [[BINOP1]]
+;
+  %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar1, i8 0
+  %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %splat_insert2 = insertelement <vscale x 16 x i8> poison, i8 %scalar2, i8 0
+  %splat2 = shufflevector <vscale x 16 x i8> %splat_insert2, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %binop = sdiv <vscale x 16 x i8> %splat, %splat2
+  %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop)
+  ret i8 %last
+}
+
+; Check that we don't do anything as the binary op has multiple uses.
+define i8 @lastb_binop_nochange(<vscale x 16 x i1> %pg, i8 %scalar, <vscale x 16 x i8> %vector) #0 {
+; OPT-LABEL: @lastb_binop_nochange(
+; OPT-NEXT:    [[SPLAT_INSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[SCALAR:%.*]], i32 0
+; OPT-NEXT:    [[SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_INSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+; OPT-NEXT:    [[BINOP:%.*]] = sdiv <vscale x 16 x i8> [[VECTOR:%.*]], [[SPLAT]]
+; OPT-NEXT:    [[LAST:%.*]] = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[BINOP]])
+; OPT-NEXT:    call void @use(<vscale x 16 x i8> [[BINOP]])
+; OPT-NEXT:    ret i8 [[LAST]]
+;
+  %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar, i32 0
+  %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %binop = sdiv <vscale x 16 x i8> %vector, %splat
+  %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop)
+  call void @use(<vscale x 16 x i8> %binop)
+  ret i8 %last
+}
+
+declare void @use(<vscale x 16 x i8>)
 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
 declare i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
 declare i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
+declare float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
 
 attributes #0 = { "target-features"="+sve" }


        


More information about the llvm-commits mailing list