[llvm] 5d814b3 - Revert "[AArch64][SVE2] Change the cost of extends with S/URHADD to 0"
Kerry McLaughlin via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 14 03:52:43 PDT 2023
Author: Kerry McLaughlin
Date: 2023-08-14T10:44:13Z
New Revision: 5d814b3848265da8c10ca29d5e55f0637c5b50ef
URL: https://github.com/llvm/llvm-project/commit/5d814b3848265da8c10ca29d5e55f0637c5b50ef
DIFF: https://github.com/llvm/llvm-project/commit/5d814b3848265da8c10ca29d5e55f0637c5b50ef.diff
LOG: Revert "[AArch64][SVE2] Change the cost of extends with S/URHADD to 0"
This reverts commit dda2cd2505301aa626fcd3e8dea2a447227d00ca.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Removed:
llvm/test/Transforms/LoopVectorize/AArch64/sve2-ext-rhadd-costs.ll
llvm/test/Transforms/LoopVectorize/AArch64/sve2-ext-rhadd.ll
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 7310e95220d622..8bc9a0a1b78a64 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2044,72 +2044,6 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
}
-// Where SVE2 is enabled, we can combine an add of 1, add & shift right by 1
-// to a single s/urhadd instruction. Some extends can be folded into the
-// instruction and will be 'free', e.g.
-// %ld1 = load i8, ptr %a
-// %zext1 = zext i8 %ld1 to i16
-// %ld2 = load i8, ptr %b
-// %zext2 = zext i8 %ld2 to i16
-// %add1 = add nuw nsw i16 %zext1, 1
-// %add2 = add nuw nsw i16 %add1, %zext2
-// %shr = lshr i16 %add2, 1
-// %trunc = trunc i16 %shr to i8
-//
-bool isExtShiftRightAdd(const Instruction *I, const Instruction *Ext, Type *Dst,
- Type *Src) {
- // Check that the cast is doubling the source type.
- if ((Src->getScalarSizeInBits() != Dst->getScalarSizeInBits() / 2) ||
- I->getOpcode() != Instruction::Add || !I->hasOneUser())
- return false;
-
- // Check for the add/shift/trunc pattern if I is an add of a constant.
- auto Op1 = dyn_cast<ConstantInt>(I->getOperand(1));
- if (!Op1) {
- // Otherwise, get the other operand and look for the same pattern
- // if this is an add.
- auto *Op = I->getOperand(0) == Ext ? I->getOperand(1) : I->getOperand(0);
-
- I = dyn_cast<Instruction>(Op);
- if (!I || I->getOpcode() != Instruction::Add || !I->hasOneUser())
- return false;
-
- Op1 = dyn_cast<ConstantInt>(I->getOperand(1));
- }
-
- if (!Op1)
- return false;
-
- auto ExtVal = isa<ZExtInst>(Ext) ? Op1->getZExtValue() : Op1->getSExtValue();
- if (ExtVal != 1)
- return false;
-
- // The add should only have one user, a right shift of 1.
- auto *Add = cast<Instruction>(*I->user_begin());
- if (Add->getOpcode() != Instruction::Add || !Add->hasOneUser())
- return false;
-
- auto *LShr = cast<Instruction>(*Add->user_begin());
- if (LShr->getOpcode() != Instruction::LShr || !LShr->hasOneUser())
- return false;
-
- auto *LShrOp1 = dyn_cast<ConstantInt>(LShr->getOperand(1));
- ExtVal = isa<ZExtInst>(Ext) ? LShrOp1->getZExtValue()
- : LShrOp1->getSExtValue();
- if (!LShrOp1 || LShrOp1->getZExtValue() != 1)
- return false;
-
- // Ensure the only user of the shift is a trunc which is casting
- // back to the original element type.
- auto *Trunc = cast<Instruction>(*LShr->user_begin());
- if (Trunc->getOpcode() != Instruction::Trunc ||
- Src->getScalarSizeInBits() !=
- cast<CastInst>(Trunc)->getDestTy()->getScalarSizeInBits())
- return false;
-
- return true;
-}
-
InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
Type *Src,
TTI::CastContextHint CCH,
@@ -2134,11 +2068,6 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
} else // Others are free so long as isWideningInstruction returned true.
return 0;
}
-
- // The cast will be free for the SVE2 s/urhadd instructions
- if (ST->hasSVE2() && (isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
- isExtShiftRightAdd(SingleUser, I, Dst, Src))
- return 0;
}
// TODO: Allow non-throughput costs that aren't binary.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-ext-rhadd-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-ext-rhadd-costs.ll
deleted file mode 100644
index d1296951f4867c..00000000000000
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-ext-rhadd-costs.ll
+++ /dev/null
@@ -1,237 +0,0 @@
-; REQUIRES: asserts
-; RUN: opt -passes=loop-vectorize -mtriple aarch64-linux-gnu -mattr=+sve2 -sve-tail-folding=simple -debug-only=loop-vectorize -S 2>%t < %s
-; RUN: cat %t | FileCheck %s --check-prefix=CHECK-COST
-
-target triple = "aarch64-unknown-linux-gnu"
-
-; SRHADD
-
-define void @srhadd_i8_zext_i16(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %dst, i64 %n) {
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 2 For instruction: %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 2 For instruction: %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 4 For instruction: %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 4 For instruction: %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 8 For instruction: %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 8 For instruction: %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 16 For instruction: %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 16 For instruction: %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 1 For instruction: %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 1 For instruction: %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 2 For instruction: %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 2 For instruction: %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction: %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction: %sext2 = sext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 16 For instruction: %sext1 = sext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 16 For instruction: %sext2 = sext i8 %ld2 to i16
-
-entry:
- br label %for.body
-
-for.body:
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
- %ld1 = load i8, ptr %arrayidx1
- %sext1 = sext i8 %ld1 to i16
- %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
- %ld2 = load i8, ptr %arrayidx2
- %sext2 = sext i8 %ld2 to i16
- %add1 = add nuw nsw i16 %sext1, 1
- %add2 = add nuw nsw i16 %add1, %sext2
- %shr = lshr i16 %add2, 1
- %trunc = trunc i16 %shr to i8
- %arrayidx3 = getelementptr inbounds i8, ptr %dst, i64 %indvars.iv
- store i8 %trunc, ptr %arrayidx3
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %n
- br i1 %exitcond.not, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
- br label %for.end
-
-for.end:
- ret void
-}
-
-define void @srhadd_i16_zext_i32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %dst, i64 %n) {
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %sext1 = sext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %sext2 = sext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 2 For instruction: %sext1 = sext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 2 For instruction: %sext2 = sext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 4 For instruction: %sext1 = sext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 4 For instruction: %sext2 = sext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 8 For instruction: %sext1 = sext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 8 For instruction: %sext2 = sext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 1 For instruction: %sext1 = sext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 1 For instruction: %sext2 = sext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 2 For instruction: %sext1 = sext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 2 For instruction: %sext2 = sext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %sext1 = sext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %sext2 = sext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction: %sext1 = sext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction: %sext2 = sext i16 %ld2 to i32
-
-entry:
- br label %for.body
-
-for.body:
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx1 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
- %ld1 = load i16, ptr %arrayidx1
- %sext1 = sext i16 %ld1 to i32
- %arrayidx2 = getelementptr inbounds i16, ptr %b, i64 %indvars.iv
- %ld2 = load i16, ptr %arrayidx2
- %sext2 = sext i16 %ld2 to i32
- %add1 = add nuw nsw i32 %sext1, 1
- %add2 = add nuw nsw i32 %add1, %sext2
- %shr = lshr i32 %add2, 1
- %trunc = trunc i32 %shr to i16
- %arrayidx3 = getelementptr inbounds i16, ptr %dst, i64 %indvars.iv
- store i16 %trunc, ptr %arrayidx3
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %n
- br i1 %exitcond.not, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
- br label %for.end
-
-for.end:
- ret void
-}
-
-; URHADD
-
-define void @urhadd_i8_zext_i16(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %dst, i64 %n) {
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 2 For instruction: %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 2 For instruction: %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 4 For instruction: %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 4 For instruction: %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 8 For instruction: %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 8 For instruction: %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 16 For instruction: %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 16 For instruction: %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 1 For instruction: %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 1 For instruction: %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 2 For instruction: %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 2 For instruction: %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction: %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction: %zext2 = zext i8 %ld2 to i16
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 16 For instruction: %zext1 = zext i8 %ld1 to i16
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 16 For instruction: %zext2 = zext i8 %ld2 to i16
-
-entry:
- br label %for.body
-
-for.body:
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
- %ld1 = load i8, ptr %arrayidx1
- %zext1 = zext i8 %ld1 to i16
- %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
- %ld2 = load i8, ptr %arrayidx2
- %zext2 = zext i8 %ld2 to i16
- %add1 = add nuw nsw i16 %zext1, 1
- %add2 = add nuw nsw i16 %add1, %zext2
- %shr = lshr i16 %add2, 1
- %trunc = trunc i16 %shr to i8
- %arrayidx3 = getelementptr inbounds i8, ptr %dst, i64 %indvars.iv
- store i8 %trunc, ptr %arrayidx3
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %n
- br i1 %exitcond.not, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
- br label %for.end
-
-for.end:
- ret void
-}
-
-define void @urhadd_i16_zext_i32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %dst, i64 %n) {
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %zext1 = zext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %zext2 = zext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 2 For instruction: %zext1 = zext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 2 For instruction: %zext2 = zext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 4 For instruction: %zext1 = zext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 4 For instruction: %zext2 = zext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 8 For instruction: %zext1 = zext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF 8 For instruction: %zext2 = zext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 1 For instruction: %zext1 = zext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 1 For instruction: %zext2 = zext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 2 For instruction: %zext1 = zext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 2 For instruction: %zext2 = zext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %zext1 = zext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %zext2 = zext i16 %ld2 to i32
-
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction: %zext1 = zext i16 %ld1 to i32
-; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction: %zext2 = zext i16 %ld2 to i32
-
-entry:
- br label %for.body
-
-for.body:
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx1 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
- %ld1 = load i16, ptr %arrayidx1
- %zext1 = zext i16 %ld1 to i32
- %arrayidx2 = getelementptr inbounds i16, ptr %b, i64 %indvars.iv
- %ld2 = load i16, ptr %arrayidx2
- %zext2 = zext i16 %ld2 to i32
- %add1 = add nuw nsw i32 %zext1, 1
- %add2 = add nuw nsw i32 %add1, %zext2
- %shr = lshr i32 %add2, 1
- %trunc = trunc i32 %shr to i16
- %arrayidx3 = getelementptr inbounds i16, ptr %dst, i64 %indvars.iv
- store i16 %trunc, ptr %arrayidx3
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %n
- br i1 %exitcond.not, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
- br label %for.end
-
-for.end:
- ret void
-}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-ext-rhadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-ext-rhadd.ll
deleted file mode 100644
index 5fac0775214585..00000000000000
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-ext-rhadd.ll
+++ /dev/null
@@ -1,129 +0,0 @@
-; RUN: opt -passes=loop-vectorize -mtriple aarch64-linux-gnu -mattr=+sve2 -sve-tail-folding=simple -S < %s | FileCheck %s
-
-; SRHADD
-
-define void @srhadd_i8_zext_i16(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %dst, i64 %n) {
-; CHECK-LABEL: @srhadd_i8_zext_i16(
-; CHECK: trunc <vscale x 16 x i16> {{.*}} to <vscale x 16 x i8>
-entry:
- br label %for.body
-
-for.body:
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
- %ld1 = load i8, ptr %arrayidx1
- %sext1 = sext i8 %ld1 to i16
- %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
- %ld2 = load i8, ptr %arrayidx2
- %sext2 = sext i8 %ld2 to i16
- %add1 = add nuw nsw i16 %sext1, 1
- %add2 = add nuw nsw i16 %add1, %sext2
- %shr = lshr i16 %add2, 1
- %trunc = trunc i16 %shr to i8
- %arrayidx3 = getelementptr inbounds i8, ptr %dst, i64 %indvars.iv
- store i8 %trunc, ptr %arrayidx3
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %n
- br i1 %exitcond.not, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
- br label %for.end
-
-for.end:
- ret void
-}
-
-define void @srhadd_i16_zext_i32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %dst, i64 %n) {
-; CHECK-LABEL: @srhadd_i16_zext_i32(
-; CHECK: trunc <vscale x 8 x i32> {{.*}} to <vscale x 8 x i16>
-entry:
- br label %for.body
-
-for.body:
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx1 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
- %ld1 = load i16, ptr %arrayidx1
- %sext1 = sext i16 %ld1 to i32
- %arrayidx2 = getelementptr inbounds i16, ptr %b, i64 %indvars.iv
- %ld2 = load i16, ptr %arrayidx2
- %sext2 = sext i16 %ld2 to i32
- %add1 = add nuw nsw i32 %sext1, 1
- %add2 = add nuw nsw i32 %add1, %sext2
- %shr = lshr i32 %add2, 1
- %trunc = trunc i32 %shr to i16
- %arrayidx3 = getelementptr inbounds i16, ptr %dst, i64 %indvars.iv
- store i16 %trunc, ptr %arrayidx3
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %n
- br i1 %exitcond.not, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
- br label %for.end
-
-for.end:
- ret void
-}
-
-; URHADD
-
-define void @urhadd_i8_zext_i16(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %dst, i64 %n) {
-; CHECK-LABEL: @urhadd_i8_zext_i16(
-; CHECK: trunc <vscale x 16 x i16> {{.*}} to <vscale x 16 x i8>
-entry:
- br label %for.body
-
-for.body:
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
- %ld1 = load i8, ptr %arrayidx1
- %zext1 = zext i8 %ld1 to i16
- %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
- %ld2 = load i8, ptr %arrayidx2
- %zext2 = zext i8 %ld2 to i16
- %add1 = add nuw nsw i16 %zext1, 1
- %add2 = add nuw nsw i16 %add1, %zext2
- %shr = lshr i16 %add2, 1
- %trunc = trunc i16 %shr to i8
- %arrayidx3 = getelementptr inbounds i8, ptr %dst, i64 %indvars.iv
- store i8 %trunc, ptr %arrayidx3
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %n
- br i1 %exitcond.not, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
- br label %for.end
-
-for.end:
- ret void
-}
-
-define void @urhadd_i16_zext_i32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %dst, i64 %n) {
-; CHECK-LABEL: @urhadd_i16_zext_i32(
-; CHECK: trunc <vscale x 8 x i32> {{.*}} to <vscale x 8 x i16>
-entry:
- br label %for.body
-
-for.body:
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx1 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
- %ld1 = load i16, ptr %arrayidx1
- %zext1 = zext i16 %ld1 to i32
- %arrayidx2 = getelementptr inbounds i16, ptr %b, i64 %indvars.iv
- %ld2 = load i16, ptr %arrayidx2
- %zext2 = zext i16 %ld2 to i32
- %add1 = add nuw nsw i32 %zext1, 1
- %add2 = add nuw nsw i32 %add1, %zext2
- %shr = lshr i32 %add2, 1
- %trunc = trunc i32 %shr to i16
- %arrayidx3 = getelementptr inbounds i16, ptr %dst, i64 %indvars.iv
- store i16 %trunc, ptr %arrayidx3
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %n
- br i1 %exitcond.not, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
- br label %for.end
-
-for.end:
- ret void
-}
More information about the llvm-commits
mailing list