[llvm] 5d814b3 - Revert "[AArch64][SVE2] Change the cost of extends with S/URHADD to 0"

Kerry McLaughlin via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 14 03:52:43 PDT 2023


Author: Kerry McLaughlin
Date: 2023-08-14T10:44:13Z
New Revision: 5d814b3848265da8c10ca29d5e55f0637c5b50ef

URL: https://github.com/llvm/llvm-project/commit/5d814b3848265da8c10ca29d5e55f0637c5b50ef
DIFF: https://github.com/llvm/llvm-project/commit/5d814b3848265da8c10ca29d5e55f0637c5b50ef.diff

LOG: Revert "[AArch64][SVE2] Change the cost of extends with S/URHADD to 0"

This reverts commit dda2cd2505301aa626fcd3e8dea2a447227d00ca.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Removed: 
    llvm/test/Transforms/LoopVectorize/AArch64/sve2-ext-rhadd-costs.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve2-ext-rhadd.ll


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 7310e95220d622..8bc9a0a1b78a64 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2044,72 +2044,6 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
   return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
 }
 
-// Where SVE2 is enabled, we can combine an add of 1, add & shift right by 1
-// to a single s/urhadd instruction. Some extends can be folded into the
-// instruction and will be 'free', e.g.
-//    %ld1 = load i8, ptr %a
-//    %zext1 = zext i8 %ld1 to i16
-//    %ld2 = load i8, ptr %b
-//    %zext2 = zext i8 %ld2 to i16
-//    %add1 = add nuw nsw i16 %zext1, 1
-//    %add2 = add nuw nsw i16 %add1, %zext2
-//    %shr = lshr i16 %add2, 1
-//    %trunc = trunc i16 %shr to i8
-//
-bool isExtShiftRightAdd(const Instruction *I, const Instruction *Ext, Type *Dst,
-                        Type *Src) {
-  // Check that the cast is doubling the source type.
-  if ((Src->getScalarSizeInBits() != Dst->getScalarSizeInBits() / 2) ||
-      I->getOpcode() != Instruction::Add || !I->hasOneUser())
-    return false;
-
-  // Check for the add/shift/trunc pattern if I is an add of a constant.
-  auto Op1 = dyn_cast<ConstantInt>(I->getOperand(1));
-  if (!Op1) {
-    // Otherwise, get the other operand and look for the same pattern
-    // if this is an add.
-    auto *Op = I->getOperand(0) == Ext ? I->getOperand(1) : I->getOperand(0);
-
-    I = dyn_cast<Instruction>(Op);
-    if (!I || I->getOpcode() != Instruction::Add || !I->hasOneUser())
-      return false;
-
-    Op1 = dyn_cast<ConstantInt>(I->getOperand(1));
-  }
-
-  if (!Op1)
-    return false;
-
-  auto ExtVal = isa<ZExtInst>(Ext) ? Op1->getZExtValue() : Op1->getSExtValue();
-  if (ExtVal != 1)
-    return false;
-
-  // The add should only have one user, a right shift of 1.
-  auto *Add = cast<Instruction>(*I->user_begin());
-  if (Add->getOpcode() != Instruction::Add || !Add->hasOneUser())
-    return false;
-
-  auto *LShr = cast<Instruction>(*Add->user_begin());
-  if (LShr->getOpcode() != Instruction::LShr || !LShr->hasOneUser())
-    return false;
-
-  auto *LShrOp1 = dyn_cast<ConstantInt>(LShr->getOperand(1));
-  ExtVal = isa<ZExtInst>(Ext) ? LShrOp1->getZExtValue()
-                              : LShrOp1->getSExtValue();
-  if (!LShrOp1 || LShrOp1->getZExtValue() != 1)
-    return false;
-
-  // Ensure the only user of the shift is a trunc which is casting
-  // back to the original element type.
-  auto *Trunc = cast<Instruction>(*LShr->user_begin());
-  if (Trunc->getOpcode() != Instruction::Trunc ||
-      Src->getScalarSizeInBits() !=
-          cast<CastInst>(Trunc)->getDestTy()->getScalarSizeInBits())
-    return false;
-
-  return true;
-}
-
 InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
                                                  Type *Src,
                                                  TTI::CastContextHint CCH,
@@ -2134,11 +2068,6 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
       } else // Others are free so long as isWideningInstruction returned true.
         return 0;
     }
-
-    // The cast will be free for the SVE2 s/urhadd instructions
-    if (ST->hasSVE2() && (isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
-        isExtShiftRightAdd(SingleUser, I, Dst, Src))
-      return 0;
   }
 
   // TODO: Allow non-throughput costs that aren't binary.

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-ext-rhadd-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-ext-rhadd-costs.ll
deleted file mode 100644
index d1296951f4867c..00000000000000
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-ext-rhadd-costs.ll
+++ /dev/null
@@ -1,237 +0,0 @@
-; REQUIRES: asserts
-; RUN: opt -passes=loop-vectorize -mtriple aarch64-linux-gnu -mattr=+sve2 -sve-tail-folding=simple -debug-only=loop-vectorize -S 2>%t < %s
-; RUN: cat %t | FileCheck %s --check-prefix=CHECK-COST
-
-target triple = "aarch64-unknown-linux-gnu"
-
-; SRHADD
-
-define void @srhadd_i8_zext_i16(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %dst, i64 %n) {
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction:   %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction:   %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 2 For instruction:   %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 2 For instruction:   %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 4 For instruction:   %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 4 For instruction:   %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 8 For instruction:   %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 8 For instruction:   %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 16 For instruction:   %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 16 For instruction:   %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 1 For instruction:   %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 1 For instruction:   %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 2 For instruction:   %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 2 For instruction:   %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction:   %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction:   %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction:   %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction:   %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 16 For instruction:   %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 16 For instruction:   %sext2 = sext i8 %ld2 to i16
-
-entry:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
-  %ld1 = load i8, ptr %arrayidx1
-  %sext1 = sext i8 %ld1 to i16
-  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
-  %ld2 = load i8, ptr %arrayidx2
-  %sext2 = sext i8 %ld2 to i16
-  %add1 = add nuw nsw i16 %sext1, 1
-  %add2 = add nuw nsw i16 %add1, %sext2
-  %shr = lshr i16 %add2, 1
-  %trunc = trunc i16 %shr to i8
-  %arrayidx3 = getelementptr inbounds i8, ptr %dst, i64 %indvars.iv
-  store i8 %trunc, ptr %arrayidx3
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
-  br i1 %exitcond.not, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
-  br label %for.end
-
-for.end:
-  ret void
-}
-
-define void @srhadd_i16_zext_i32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %dst, i64 %n) {
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction:   %sext1 = sext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction:   %sext2 = sext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 2 For instruction:   %sext1 = sext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 2 For instruction:   %sext2 = sext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 4 For instruction:   %sext1 = sext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 4 For instruction:   %sext2 = sext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 8 For instruction:   %sext1 = sext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 8 For instruction:   %sext2 = sext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 1 For instruction:   %sext1 = sext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 1 For instruction:   %sext2 = sext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 2 For instruction:   %sext1 = sext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 2 For instruction:   %sext2 = sext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction:   %sext1 = sext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction:   %sext2 = sext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction:   %sext1 = sext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction:   %sext2 = sext i16 %ld2 to i32
-
-entry:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx1 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
-  %ld1 = load i16, ptr %arrayidx1
-  %sext1 = sext i16 %ld1 to i32
-  %arrayidx2 = getelementptr inbounds i16, ptr %b, i64 %indvars.iv
-  %ld2 = load i16, ptr %arrayidx2
-  %sext2 = sext i16 %ld2 to i32
-  %add1 = add nuw nsw i32 %sext1, 1
-  %add2 = add nuw nsw i32 %add1, %sext2
-  %shr = lshr i32 %add2, 1
-  %trunc = trunc i32 %shr to i16
-  %arrayidx3 = getelementptr inbounds i16, ptr %dst, i64 %indvars.iv
-  store i16 %trunc, ptr %arrayidx3
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
-  br i1 %exitcond.not, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
-  br label %for.end
-
-for.end:
-  ret void
-}
-
-; URHADD
-
-define void @urhadd_i8_zext_i16(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %dst, i64 %n) {
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction:   %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction:   %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 2 For instruction:   %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 2 For instruction:   %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 4 For instruction:   %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 4 For instruction:   %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 8 For instruction:   %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 8 For instruction:   %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 16 For instruction:   %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 16 For instruction:   %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 1 For instruction:   %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 1 For instruction:   %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 2 For instruction:   %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 2 For instruction:   %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction:   %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction:   %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction:   %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction:   %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 16 For instruction:   %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 16 For instruction:   %zext2 = zext i8 %ld2 to i16
-
-entry:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
-  %ld1 = load i8, ptr %arrayidx1
-  %zext1 = zext i8 %ld1 to i16
-  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
-  %ld2 = load i8, ptr %arrayidx2
-  %zext2 = zext i8 %ld2 to i16
-  %add1 = add nuw nsw i16 %zext1, 1
-  %add2 = add nuw nsw i16 %add1, %zext2
-  %shr = lshr i16 %add2, 1
-  %trunc = trunc i16 %shr to i8
-  %arrayidx3 = getelementptr inbounds i8, ptr %dst, i64 %indvars.iv
-  store i8 %trunc, ptr %arrayidx3
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
-  br i1 %exitcond.not, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
-  br label %for.end
-
-for.end:
-  ret void
-}
-
-define void @urhadd_i16_zext_i32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %dst, i64 %n) {
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction:   %zext1 = zext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction:   %zext2 = zext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 2 For instruction:   %zext1 = zext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 2 For instruction:   %zext2 = zext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 4 For instruction:   %zext1 = zext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 4 For instruction:   %zext2 = zext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 8 For instruction:   %zext1 = zext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 8 For instruction:   %zext2 = zext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 1 For instruction:   %zext1 = zext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 1 For instruction:   %zext2 = zext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 2 For instruction:   %zext1 = zext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 2 For instruction:   %zext2 = zext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction:   %zext1 = zext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction:   %zext2 = zext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction:   %zext1 = zext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction:   %zext2 = zext i16 %ld2 to i32
-
-entry:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx1 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
-  %ld1 = load i16, ptr %arrayidx1
-  %zext1 = zext i16 %ld1 to i32
-  %arrayidx2 = getelementptr inbounds i16, ptr %b, i64 %indvars.iv
-  %ld2 = load i16, ptr %arrayidx2
-  %zext2 = zext i16 %ld2 to i32
-  %add1 = add nuw nsw i32 %zext1, 1
-  %add2 = add nuw nsw i32 %add1, %zext2
-  %shr = lshr i32 %add2, 1
-  %trunc = trunc i32 %shr to i16
-  %arrayidx3 = getelementptr inbounds i16, ptr %dst, i64 %indvars.iv
-  store i16 %trunc, ptr %arrayidx3
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
-  br i1 %exitcond.not, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
-  br label %for.end
-
-for.end:
-  ret void
-}

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-ext-rhadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-ext-rhadd.ll
deleted file mode 100644
index 5fac0775214585..00000000000000
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-ext-rhadd.ll
+++ /dev/null
@@ -1,129 +0,0 @@
-; RUN: opt -passes=loop-vectorize -mtriple aarch64-linux-gnu -mattr=+sve2 -sve-tail-folding=simple -S < %s | FileCheck %s
-
-; SRHADD
-
-define void @srhadd_i8_zext_i16(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %dst, i64 %n) {
-; CHECK-LABEL: @srhadd_i8_zext_i16(
-; CHECK: trunc <vscale x 16 x i16> {{.*}} to <vscale x 16 x i8>
-entry:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
-  %ld1 = load i8, ptr %arrayidx1
-  %sext1 = sext i8 %ld1 to i16
-  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
-  %ld2 = load i8, ptr %arrayidx2
-  %sext2 = sext i8 %ld2 to i16
-  %add1 = add nuw nsw i16 %sext1, 1
-  %add2 = add nuw nsw i16 %add1, %sext2
-  %shr = lshr i16 %add2, 1
-  %trunc = trunc i16 %shr to i8
-  %arrayidx3 = getelementptr inbounds i8, ptr %dst, i64 %indvars.iv
-  store i8 %trunc, ptr %arrayidx3
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
-  br i1 %exitcond.not, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
-  br label %for.end
-
-for.end:
-  ret void
-}
-
-define void @srhadd_i16_zext_i32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %dst, i64 %n) {
-; CHECK-LABEL: @srhadd_i16_zext_i32(
-; CHECK: trunc <vscale x 8 x i32> {{.*}} to <vscale x 8 x i16>
-entry:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx1 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
-  %ld1 = load i16, ptr %arrayidx1
-  %sext1 = sext i16 %ld1 to i32
-  %arrayidx2 = getelementptr inbounds i16, ptr %b, i64 %indvars.iv
-  %ld2 = load i16, ptr %arrayidx2
-  %sext2 = sext i16 %ld2 to i32
-  %add1 = add nuw nsw i32 %sext1, 1
-  %add2 = add nuw nsw i32 %add1, %sext2
-  %shr = lshr i32 %add2, 1
-  %trunc = trunc i32 %shr to i16
-  %arrayidx3 = getelementptr inbounds i16, ptr %dst, i64 %indvars.iv
-  store i16 %trunc, ptr %arrayidx3
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
-  br i1 %exitcond.not, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
-  br label %for.end
-
-for.end:
-  ret void
-}
-
-; URHADD
-
-define void @urhadd_i8_zext_i16(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %dst, i64 %n) {
-; CHECK-LABEL: @urhadd_i8_zext_i16(
-; CHECK: trunc <vscale x 16 x i16> {{.*}} to <vscale x 16 x i8>
-entry:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
-  %ld1 = load i8, ptr %arrayidx1
-  %zext1 = zext i8 %ld1 to i16
-  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
-  %ld2 = load i8, ptr %arrayidx2
-  %zext2 = zext i8 %ld2 to i16
-  %add1 = add nuw nsw i16 %zext1, 1
-  %add2 = add nuw nsw i16 %add1, %zext2
-  %shr = lshr i16 %add2, 1
-  %trunc = trunc i16 %shr to i8
-  %arrayidx3 = getelementptr inbounds i8, ptr %dst, i64 %indvars.iv
-  store i8 %trunc, ptr %arrayidx3
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
-  br i1 %exitcond.not, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
-  br label %for.end
-
-for.end:
-  ret void
-}
-
-define void @urhadd_i16_zext_i32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %dst, i64 %n) {
-; CHECK-LABEL: @urhadd_i16_zext_i32(
-; CHECK: trunc <vscale x 8 x i32> {{.*}} to <vscale x 8 x i16>
-entry:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx1 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
-  %ld1 = load i16, ptr %arrayidx1
-  %zext1 = zext i16 %ld1 to i32
-  %arrayidx2 = getelementptr inbounds i16, ptr %b, i64 %indvars.iv
-  %ld2 = load i16, ptr %arrayidx2
-  %zext2 = zext i16 %ld2 to i32
-  %add1 = add nuw nsw i32 %zext1, 1
-  %add2 = add nuw nsw i32 %add1, %zext2
-  %shr = lshr i32 %add2, 1
-  %trunc = trunc i32 %shr to i16
-  %arrayidx3 = getelementptr inbounds i16, ptr %dst, i64 %indvars.iv
-  store i16 %trunc, ptr %arrayidx3
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
-  br i1 %exitcond.not, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
-  br label %for.end
-
-for.end:
-  ret void
-}


        


More information about the llvm-commits mailing list