[llvm] [SCEV] Add SafeWrap flag to AddRecs (PR #118483)
JĂșlio De Bastiani via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 3 04:47:31 PST 2024
https://github.com/OtherRandomUser created https://github.com/llvm/llvm-project/pull/118483
Added a flag to signal that an AddRec is expected to wrap. This allows for the vectorization of a few masking operations that would otherwise be discarded based on no wrap predicates, such as:
```
float arr1[8192];
float arr2[8192];
float arr3[8192];
float arr4[8192];
float arr5[8192];
float arr6[8192];
float arr7[8192];
float arr8[8192];
float arr9[8192];
void test() {
for(int i = 0; i < 8192; i += 1) {
int ind = i & 0x7f;
arr1[ind] = i;
arr2[ind] = i;
arr3[ind] = i;
arr4[ind] = i;
arr5[ind] = i;
arr6[ind] = i;
arr7[ind] = i;
arr8[ind] = i;
arr9[ind] = i;
}
}
```
There was also one last test, Transforms/LoopVectorize/X86/multi-exit-cost.ll, that failed. Adding the safe wrap flag in this case prevents any vectorization, but the vectorization done before doesn't make much sense to me, am I missing something?
>From 03de1c86bf636075ecd8dfd5f5574b9947277394 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BAlio=20De=20Bastiani?=
<julio.bastiani at expertisesolutions.com.br>
Date: Thu, 21 Nov 2024 16:47:18 -0300
Subject: [PATCH] [SCEV] Add SafeWrap flag to AddRecs
Added a flag to signal that an AddRec is expected to wrap. This allows
for the vectorization of a few masking operations that would otherwise
be discarded based on no wrap predicates, such as:
```
float arr1[8192];
float arr2[8192];
float arr3[8192];
float arr4[8192];
float arr5[8192];
float arr6[8192];
float arr7[8192];
float arr8[8192];
float arr9[8192];
void test() {
for(int i = 0; i < 8192; i += 1) {
int ind = i & 0x7f;
arr1[ind] = i;
arr2[ind] = i;
arr3[ind] = i;
arr4[ind] = i;
arr5[ind] = i;
arr6[ind] = i;
arr7[ind] = i;
arr8[ind] = i;
arr9[ind] = i;
}
}
```
---
llvm/include/llvm/Analysis/ScalarEvolution.h | 27 +++--
.../Analysis/ScalarEvolutionExpressions.h | 4 +
llvm/lib/Analysis/ScalarEvolution.cpp | 49 ++++----
llvm/test/Analysis/ScalarEvolution/pr87798.ll | 4 +-
.../ScalarEvolution/shift-recurrences.ll | 2 +-
.../ScalarEvolution/solve-quadratic-i1.ll | 4 +-
.../ScalarEvolution/solve-quadratic.ll | 4 +-
.../IndVarSimplify/shrunk-constant.ll | 2 +-
.../LoopVectorize/RISCV/safe-wrap.ll | 112 ++++++++++++++++++
.../X86/x86_fp80-vector-store.ll | 3 +-
10 files changed, 171 insertions(+), 40 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/safe-wrap.ll
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index b20c6a13cb6bd7..586b6232647c1e 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -124,11 +124,12 @@ class SCEV : public FoldingSetNode {
/// at runtime. A SCEV being defined does not require the existence of any
/// instruction within the defined scope.
enum NoWrapFlags {
- FlagAnyWrap = 0, // No guarantee.
- FlagNW = (1 << 0), // No self-wrap.
- FlagNUW = (1 << 1), // No unsigned wrap.
- FlagNSW = (1 << 2), // No signed wrap.
- NoWrapMask = (1 << 3) - 1
+ FlagAnyWrap = 0, // No guarantee.
+ FlagSafeWrap = (1 << 0), // Expected to wrap.
+ FlagNW = (1 << 1), // No self-wrap.
+ FlagNUW = (1 << 2), // No unsigned wrap.
+ FlagNSW = (1 << 3), // No signed wrap.
+ NoWrapMask = (1 << 4) - 1
};
explicit SCEV(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy,
@@ -562,7 +563,9 @@ class ScalarEvolution {
const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false);
const SCEV *getLosslessPtrToIntExpr(const SCEV *Op, unsigned Depth = 0);
const SCEV *getPtrToIntExpr(const SCEV *Op, Type *Ty);
- const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
+ const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty,
+ SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap,
+ unsigned Depth = 0);
const SCEV *getVScale(Type *Ty);
const SCEV *getElementCount(Type *Ty, ElementCount EC);
const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
@@ -707,13 +710,17 @@ class ScalarEvolution {
/// Return a SCEV corresponding to a conversion of the input value to the
/// specified type. If the type must be extended, it is zero extended.
- const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty,
- unsigned Depth = 0);
+ const SCEV *
+ getTruncateOrZeroExtend(const SCEV *V, Type *Ty,
+ SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap,
+ unsigned Depth = 0);
/// Return a SCEV corresponding to a conversion of the input value to the
/// specified type. If the type must be extended, it is sign extended.
- const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty,
- unsigned Depth = 0);
+ const SCEV *
+ getTruncateOrSignExtend(const SCEV *V, Type *Ty,
+ SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap,
+ unsigned Depth = 0);
/// Return a SCEV corresponding to a conversion of the input value to the
/// specified type. If the type must be extended, it is zero extended. The
diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
index 6eb1aca1cf76ad..6a894bd9dcb4f4 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -233,6 +233,10 @@ class SCEVNAryExpr : public SCEV {
bool hasNoSelfWrap() const { return getNoWrapFlags(FlagNW) != FlagAnyWrap; }
+ bool hasSafeWrap() const {
+ return getNoWrapFlags(FlagSafeWrap) != FlagAnyWrap;
+ }
+
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const SCEV *S) {
return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 882e938e69c0c2..3c6fb4498b9d64 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -317,6 +317,9 @@ void SCEV::print(raw_ostream &OS) const {
if (AR->hasNoSelfWrap() &&
!AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
OS << "nw><";
+ if (AR->hasSafeWrap() &&
+ !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW | FlagNW)))
+ OS << "sw><";
AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ">";
return;
@@ -1158,6 +1161,7 @@ const SCEV *ScalarEvolution::getPtrToIntExpr(const SCEV *Op, Type *Ty) {
}
const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
+ SCEV::NoWrapFlags Flags,
unsigned Depth) {
assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
"This is not a truncating conversion!");
@@ -1180,15 +1184,15 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
// trunc(trunc(x)) --> trunc(x)
if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
- return getTruncateExpr(ST->getOperand(), Ty, Depth + 1);
+ return getTruncateExpr(ST->getOperand(), Ty, Flags, Depth + 1);
// trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
- return getTruncateOrSignExtend(SS->getOperand(), Ty, Depth + 1);
+ return getTruncateOrSignExtend(SS->getOperand(), Ty, Flags, Depth + 1);
// trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
- return getTruncateOrZeroExtend(SZ->getOperand(), Ty, Depth + 1);
+ return getTruncateOrZeroExtend(SZ->getOperand(), Ty, Flags, Depth + 1);
if (Depth > MaxCastDepth) {
SCEV *S =
@@ -1208,7 +1212,8 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
unsigned numTruncs = 0;
for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2;
++i) {
- const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty, Depth + 1);
+ const SCEV *S =
+ getTruncateExpr(CommOp->getOperand(i), Ty, Flags, Depth + 1);
if (!isa<SCEVIntegralCastExpr>(CommOp->getOperand(i)) &&
isa<SCEVTruncateExpr>(S))
numTruncs++;
@@ -1232,8 +1237,8 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
SmallVector<const SCEV *, 4> Operands;
for (const SCEV *Op : AddRec->operands())
- Operands.push_back(getTruncateExpr(Op, Ty, Depth + 1));
- return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
+ Operands.push_back(getTruncateExpr(Op, Ty, Flags, Depth + 1));
+ return getAddRecExpr(Operands, AddRec->getLoop(), Flags);
}
// Return zero if truncating to known zeros.
@@ -1632,7 +1637,7 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
unsigned NewBits = getTypeSizeInBits(Ty);
if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
CR.zextOrTrunc(NewBits)))
- return getTruncateOrZeroExtend(X, Ty, Depth);
+ return getTruncateOrZeroExtend(X, Ty, SCEV::FlagAnyWrap, Depth);
}
// If the input value is a chrec scev, and we can prove that the value
@@ -1669,10 +1674,10 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
// Check whether the backedge-taken count can be losslessly casted to
// the addrec's type. The count is always unsigned.
- const SCEV *CastedMaxBECount =
- getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth);
+ const SCEV *CastedMaxBECount = getTruncateOrZeroExtend(
+ MaxBECount, Start->getType(), SCEV::FlagAnyWrap, Depth);
const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(
- CastedMaxBECount, MaxBECount->getType(), Depth);
+ CastedMaxBECount, MaxBECount->getType(), SCEV::FlagAnyWrap, Depth);
if (MaxBECount == RecastedMaxBECount) {
Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no unsigned overflow.
@@ -1973,7 +1978,7 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
unsigned NewBits = getTypeSizeInBits(Ty);
if (CR.truncate(TruncBits).signExtend(NewBits).contains(
CR.sextOrTrunc(NewBits)))
- return getTruncateOrSignExtend(X, Ty, Depth);
+ return getTruncateOrSignExtend(X, Ty, SCEV::FlagAnyWrap, Depth);
}
if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
@@ -2044,10 +2049,10 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
// Check whether the backedge-taken count can be losslessly casted to
// the addrec's type. The count is always unsigned.
- const SCEV *CastedMaxBECount =
- getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth);
+ const SCEV *CastedMaxBECount = getTruncateOrZeroExtend(
+ MaxBECount, Start->getType(), SCEV::FlagAnyWrap, Depth);
const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(
- CastedMaxBECount, MaxBECount->getType(), Depth);
+ CastedMaxBECount, MaxBECount->getType(), SCEV::FlagAnyWrap, Depth);
if (MaxBECount == RecastedMaxBECount) {
Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no signed overflow.
@@ -4714,6 +4719,7 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
}
const SCEV *ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty,
+ SCEV::NoWrapFlags Flags,
unsigned Depth) {
Type *SrcTy = V->getType();
assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
@@ -4721,11 +4727,12 @@ const SCEV *ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty,
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
- return getTruncateExpr(V, Ty, Depth);
+ return getTruncateExpr(V, Ty, Flags, Depth);
return getZeroExtendExpr(V, Ty, Depth);
}
const SCEV *ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, Type *Ty,
+ SCEV::NoWrapFlags Flags,
unsigned Depth) {
Type *SrcTy = V->getType();
assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
@@ -4733,7 +4740,7 @@ const SCEV *ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, Type *Ty,
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
- return getTruncateExpr(V, Ty, Depth);
+ return getTruncateExpr(V, Ty, Flags, Depth);
return getSignExtendExpr(V, Ty, Depth);
}
@@ -7837,8 +7844,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
ShiftedLHS = getUDivExpr(LHS, MulCount);
return getMulExpr(
getZeroExtendExpr(
- getTruncateExpr(ShiftedLHS,
- IntegerType::get(getContext(), BitWidth - LZ - TZ)),
+ getTruncateExpr(
+ ShiftedLHS,
+ IntegerType::get(getContext(), BitWidth - LZ - TZ),
+ SCEV::FlagSafeWrap, 0),
BO->LHS->getType()),
MulCount);
}
@@ -14790,7 +14799,7 @@ class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
const SCEV *Operand = visit(Expr->getOperand());
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Operand);
- if (AR && AR->getLoop() == L && AR->isAffine()) {
+ if (AR && AR->getLoop() == L && AR->isAffine() && !AR->hasSafeWrap()) {
// This couldn't be folded because the operand didn't have the nuw
// flag. Add the nusw flag as an assumption that we could make.
const SCEV *Step = AR->getStepRecurrence(SE);
@@ -14806,7 +14815,7 @@ class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
const SCEV *Operand = visit(Expr->getOperand());
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Operand);
- if (AR && AR->getLoop() == L && AR->isAffine()) {
+ if (AR && AR->getLoop() == L && AR->isAffine() && !AR->hasSafeWrap()) {
// This couldn't be folded because the operand didn't have the nsw
// flag. Add the nssw flag as an assumption that we could make.
const SCEV *Step = AR->getStepRecurrence(SE);
diff --git a/llvm/test/Analysis/ScalarEvolution/pr87798.ll b/llvm/test/Analysis/ScalarEvolution/pr87798.ll
index acd445993e47bc..c019c375cbb184 100644
--- a/llvm/test/Analysis/ScalarEvolution/pr87798.ll
+++ b/llvm/test/Analysis/ScalarEvolution/pr87798.ll
@@ -25,11 +25,11 @@ define i32 @pr87798() {
; CHECK-NEXT: %add4 = add i32 %mul, %phi
; CHECK-NEXT: --> {0,+,0,+,2,+,5,+,3}<%bb1> U: full-set S: full-set Exits: 0 LoopDispositions: { %bb1: Computable }
; CHECK-NEXT: %and = and i32 %phi, 1
-; CHECK-NEXT: --> (zext i1 {false,+,false,+,false,+,false,+,true}<%bb1> to i32) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %bb1: Computable }
+; CHECK-NEXT: --> (zext i1 {false,+,false,+,false,+,false,+,true}<sw><%bb1> to i32) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %bb1: Computable }
; CHECK-NEXT: %add5 = add i32 %phi3, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%bb1> U: [1,2) S: [1,2) Exits: 1 LoopDispositions: { %bb1: Computable }
; CHECK-NEXT: %phi9 = phi i32 [ %and, %bb1 ]
-; CHECK-NEXT: --> (zext i1 {false,+,false,+,false,+,false,+,true}<%bb1> to i32) U: [0,2) S: [0,2) --> 0 U: [0,1) S: [0,1)
+; CHECK-NEXT: --> (zext i1 {false,+,false,+,false,+,false,+,true}<sw><%bb1> to i32) U: [0,2) S: [0,2) --> 0 U: [0,1) S: [0,1)
; CHECK-NEXT: %zext = zext i32 %phi9 to i64
; CHECK-NEXT: --> poison U: full-set S: full-set
; CHECK-NEXT: Determining loop execution counts for: @pr87798
diff --git a/llvm/test/Analysis/ScalarEvolution/shift-recurrences.ll b/llvm/test/Analysis/ScalarEvolution/shift-recurrences.ll
index 6cd709bfff68f3..ed9220f9e001a2 100644
--- a/llvm/test/Analysis/ScalarEvolution/shift-recurrences.ll
+++ b/llvm/test/Analysis/ScalarEvolution/shift-recurrences.ll
@@ -323,7 +323,7 @@ define void @test_shl6(i1 %c) {
; CHECK-NEXT: %iv.next = add i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,6) S: [1,6) Exits: 5 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %shiftamt = and i64 %iv, 1
-; CHECK-NEXT: --> (zext i1 {false,+,true}<%loop> to i64) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> (zext i1 {false,+,true}<sw><%loop> to i64) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.shl.next = shl i64 %iv.shl, %shiftamt
; CHECK-NEXT: --> %iv.shl.next U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: 16 LoopDispositions: { %loop: Variant }
; CHECK-NEXT: Determining loop execution counts for: @test_shl6
diff --git a/llvm/test/Analysis/ScalarEvolution/solve-quadratic-i1.ll b/llvm/test/Analysis/ScalarEvolution/solve-quadratic-i1.ll
index fa4e5fb0ac433e..bcf156bb949411 100644
--- a/llvm/test/Analysis/ScalarEvolution/solve-quadratic-i1.ll
+++ b/llvm/test/Analysis/ScalarEvolution/solve-quadratic-i1.ll
@@ -15,7 +15,7 @@ define void @f0() {
; CHECK-NEXT: %v3 = add nsw i16 %v1, %v0
; CHECK-NEXT: --> {3,+,3,+,1}<%b1> U: full-set S: full-set Exits: 6 LoopDispositions: { %b1: Computable }
; CHECK-NEXT: %v4 = and i16 %v3, 1
-; CHECK-NEXT: --> (zext i1 {true,+,true,+,true}<%b1> to i16) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %b1: Computable }
+; CHECK-NEXT: --> (zext i1 {true,+,true,+,true}<sw><%b1> to i16) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %b1: Computable }
; CHECK-NEXT: Determining loop execution counts for: @f0
; CHECK-NEXT: Loop %b1: backedge-taken count is i6 1
; CHECK-NEXT: Loop %b1: constant max backedge-taken count is i6 1
@@ -54,7 +54,7 @@ define void @f1() #0 {
; CHECK-NEXT: %v3 = add i16 %v0, %v2
; CHECK-NEXT: --> {3,+,4,+,1}<%b1> U: full-set S: full-set Exits: 12 LoopDispositions: { %b1: Computable }
; CHECK-NEXT: %v4 = and i16 %v3, 1
-; CHECK-NEXT: --> (zext i1 {true,+,false,+,true}<%b1> to i16) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %b1: Computable }
+; CHECK-NEXT: --> (zext i1 {true,+,false,+,true}<sw><%b1> to i16) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %b1: Computable }
; CHECK-NEXT: %v6 = add nuw nsw i32 %v1, 1
; CHECK-NEXT: --> {4,+,1}<nuw><nsw><%b1> U: [4,7) S: [4,7) Exits: 6 LoopDispositions: { %b1: Computable }
; CHECK-NEXT: %v7 = phi i32 [ %v1, %b1 ]
diff --git a/llvm/test/Analysis/ScalarEvolution/solve-quadratic.ll b/llvm/test/Analysis/ScalarEvolution/solve-quadratic.ll
index fd02ef672a969e..bafe0606d8cd9b 100644
--- a/llvm/test/Analysis/ScalarEvolution/solve-quadratic.ll
+++ b/llvm/test/Analysis/ScalarEvolution/solve-quadratic.ll
@@ -41,7 +41,7 @@
; {14,+,14,+,14} -> X=0, Y=14, Z=14
;
; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test01'
-; CHECK: {{.*}}GetQuadraticEquation{{.*}}: analyzing quadratic addrec: {-2,+,-2,+,-2}<%loop>
+; CHECK: {{.*}}GetQuadraticEquation{{.*}}: analyzing quadratic addrec: {-2,+,-2,+,-2}<sw><%loop>
; CHECK: {{.*}}GetQuadraticEquation{{.*}}: addrec coeff bw: 4
; CHECK: {{.*}}GetQuadraticEquation{{.*}}: equation -2x^2 + -2x + -4, coeff bw: 5, multiplied by 2
; CHECK: {{.*}}SolveQuadraticAddRecExact{{.*}}: solving for unsigned overflow
@@ -117,7 +117,7 @@ exit:
; {17,+,-1,+,2} -> X=-3, Y=20, Z=2
;
; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test03':
-; CHECK: {{.*}}GetQuadraticEquation{{.*}}: analyzing quadratic addrec: {1,+,-1,+,2}<%loop>
+; CHECK: {{.*}}GetQuadraticEquation{{.*}}: analyzing quadratic addrec: {1,+,-1,+,2}<sw><%loop>
; CHECK: {{.*}}GetQuadraticEquation{{.*}}: addrec coeff bw: 4
; CHECK: {{.*}}GetQuadraticEquation{{.*}}: equation 2x^2 + -4x + 2, coeff bw: 5, multiplied by 2
; CHECK: {{.*}}SolveQuadraticAddRecExact{{.*}}: solving for unsigned overflow
diff --git a/llvm/test/Transforms/IndVarSimplify/shrunk-constant.ll b/llvm/test/Transforms/IndVarSimplify/shrunk-constant.ll
index 8ec8ec2c9e1f2c..5fcff4b98629b2 100644
--- a/llvm/test/Transforms/IndVarSimplify/shrunk-constant.ll
+++ b/llvm/test/Transforms/IndVarSimplify/shrunk-constant.ll
@@ -1,6 +1,6 @@
; RUN: opt < %s -passes='print<scalar-evolution>' 2>&1 | FileCheck %s
-; CHECK: --> (1 + (zext i4 {-8,+,-8}<%loop> to i32))<nuw><nsw>
+; CHECK: --> (1 + (zext i4 {-8,+,-8}<sw><%loop> to i32))<nuw><nsw>
define fastcc void @foo() nounwind {
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/safe-wrap.ll b/llvm/test/Transforms/LoopVectorize/RISCV/safe-wrap.ll
new file mode 100644
index 00000000000000..5f9275ba72cae5
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/safe-wrap.ll
@@ -0,0 +1,112 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=loop-vectorize,simplifycfg,instcombine -force-vector-interleave=1 -force-vector-width=4 -mtriple=riscv64 -mattr=+v < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+
+ at arr1 = dso_local local_unnamed_addr global [8192 x float] zeroinitializer, align 4
+ at arr2 = dso_local local_unnamed_addr global [8192 x float] zeroinitializer, align 4
+ at arr3 = dso_local local_unnamed_addr global [8192 x float] zeroinitializer, align 4
+ at arr4 = dso_local local_unnamed_addr global [8192 x float] zeroinitializer, align 4
+ at arr5 = dso_local local_unnamed_addr global [8192 x float] zeroinitializer, align 4
+ at arr6 = dso_local local_unnamed_addr global [8192 x float] zeroinitializer, align 4
+ at arr7 = dso_local local_unnamed_addr global [8192 x float] zeroinitializer, align 4
+ at arr8 = dso_local local_unnamed_addr global [8192 x float] zeroinitializer, align 4
+ at arr9 = dso_local local_unnamed_addr global [8192 x float] zeroinitializer, align 4
+
+define dso_local noundef float @loop3(i32 noundef signext %zero) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local noundef float @loop3(
+; CHECK-SAME: i32 noundef signext [[ZERO:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[VEC_IND]], splat (i32 127)
+; CHECK-NEXT: [[TMP1:%.*]] = uitofp nneg <4 x i32> [[VEC_IND]] to <4 x float>
+; CHECK-NEXT: [[TMP2:%.*]] = zext nneg <4 x i32> [[TMP0]] to <4 x i64>
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [8192 x float], ptr @arr1, i64 0, <4 x i64> [[TMP2]]
+; CHECK-NEXT: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> [[TMP1]], <4 x ptr> [[TMP3]], i32 4, <4 x i1> splat (i1 true)), !tbaa [[TBAA9:![0-9]+]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [8192 x float], ptr @arr2, i64 0, <4 x i64> [[TMP2]]
+; CHECK-NEXT: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> [[TMP1]], <4 x ptr> [[TMP4]], i32 4, <4 x i1> splat (i1 true)), !tbaa [[TBAA9]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [8192 x float], ptr @arr3, i64 0, <4 x i64> [[TMP2]]
+; CHECK-NEXT: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> [[TMP1]], <4 x ptr> [[TMP5]], i32 4, <4 x i1> splat (i1 true)), !tbaa [[TBAA9]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [8192 x float], ptr @arr4, i64 0, <4 x i64> [[TMP2]]
+; CHECK-NEXT: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> [[TMP1]], <4 x ptr> [[TMP6]], i32 4, <4 x i1> splat (i1 true)), !tbaa [[TBAA9]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [8192 x float], ptr @arr5, i64 0, <4 x i64> [[TMP2]]
+; CHECK-NEXT: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> [[TMP1]], <4 x ptr> [[TMP7]], i32 4, <4 x i1> splat (i1 true)), !tbaa [[TBAA9]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [8192 x float], ptr @arr6, i64 0, <4 x i64> [[TMP2]]
+; CHECK-NEXT: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> [[TMP1]], <4 x ptr> [[TMP8]], i32 4, <4 x i1> splat (i1 true)), !tbaa [[TBAA9]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [8192 x float], ptr @arr7, i64 0, <4 x i64> [[TMP2]]
+; CHECK-NEXT: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> [[TMP1]], <4 x ptr> [[TMP9]], i32 4, <4 x i1> splat (i1 true)), !tbaa [[TBAA9]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [8192 x float], ptr @arr8, i64 0, <4 x i64> [[TMP2]]
+; CHECK-NEXT: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> [[TMP1]], <4 x ptr> [[TMP10]], i32 4, <4 x i1> splat (i1 true)), !tbaa [[TBAA9]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [8192 x float], ptr @arr9, i64 0, <4 x i64> [[TMP2]]
+; CHECK-NEXT: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> [[TMP1]], <4 x ptr> [[TMP11]], i32 4, <4 x i1> splat (i1 true)), !tbaa [[TBAA9]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8192
+; CHECK-NEXT: br i1 [[TMP12]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK: [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT: ret float 0.000000e+00
+;
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret float 0.000000e+00
+
+for.body: ; preds = %entry, %for.body
+ %i.044 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %and = and i32 %i.044, 127
+ %conv = uitofp nneg i32 %i.044 to float
+ %idxprom = zext nneg i32 %and to i64
+ %arrayidx = getelementptr inbounds [8192 x float], ptr @arr1, i64 0, i64 %idxprom
+ store float %conv, ptr %arrayidx, align 4, !tbaa !9
+ %arrayidx3 = getelementptr inbounds [8192 x float], ptr @arr2, i64 0, i64 %idxprom
+ store float %conv, ptr %arrayidx3, align 4, !tbaa !9
+ %arrayidx6 = getelementptr inbounds [8192 x float], ptr @arr3, i64 0, i64 %idxprom
+ store float %conv, ptr %arrayidx6, align 4, !tbaa !9
+ %arrayidx9 = getelementptr inbounds [8192 x float], ptr @arr4, i64 0, i64 %idxprom
+ store float %conv, ptr %arrayidx9, align 4, !tbaa !9
+ %arrayidx12 = getelementptr inbounds [8192 x float], ptr @arr5, i64 0, i64 %idxprom
+ store float %conv, ptr %arrayidx12, align 4, !tbaa !9
+ %arrayidx15 = getelementptr inbounds [8192 x float], ptr @arr6, i64 0, i64 %idxprom
+ store float %conv, ptr %arrayidx15, align 4, !tbaa !9
+ %arrayidx18 = getelementptr inbounds [8192 x float], ptr @arr7, i64 0, i64 %idxprom
+ store float %conv, ptr %arrayidx18, align 4, !tbaa !9
+ %arrayidx21 = getelementptr inbounds [8192 x float], ptr @arr8, i64 0, i64 %idxprom
+ store float %conv, ptr %arrayidx21, align 4, !tbaa !9
+ %arrayidx24 = getelementptr inbounds [8192 x float], ptr @arr9, i64 0, i64 %idxprom
+ store float %conv, ptr %arrayidx24, align 4, !tbaa !9
+ %add = add nuw nsw i32 %i.044, 1
+ %exitcond.not = icmp eq i32 %add, 8192
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+attributes #0 = { nofree noinline norecurse nosync nounwind memory(write, argmem: none, inaccessiblemem: none) uwtable vscale_range(4,1024) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+c,+d,+f,+m,+relax,+v,+zba,+zbb,+zbs,+zicond,+zicsr,+zifencei,+zmmul,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl256b,+zvl32b,+zvl64b,-b,-e,-experimental-smctr,-experimental-smmpm,-experimental-smnpm,-experimental-ssctr,-experimental-ssnpm,-experimental-sspm,-experimental-supm,-experimental-zacas,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smepmp,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecdiscarddlone,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xventanamatmul,-xwchc,-za128rs,-za64rs,-zaamo,-zabha,-zalrsc,-zama16b,-zawrs,-zbc,-zbkb,-zbkc,-zbkx,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-ztso,-zvbb,-zvbc,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl16384b,-zvl2048b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
+
+!llvm.module.flags = !{!0, !1, !2, !4, !5, !6, !7}
+!llvm.ident = !{!8}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"target-abi", !"lp64d"}
+!2 = !{i32 6, !"riscv-isa", !3}
+!3 = !{!"rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zicond1p0_zicsr2p0_zifencei2p0_zmmul1p0_zba1p0_zbb1p0_zbs1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl64b1p0"}
+!4 = !{i32 8, !"PIC Level", i32 2}
+!5 = !{i32 7, !"PIE Level", i32 2}
+!6 = !{i32 7, !"uwtable", i32 2}
+!7 = !{i32 8, !"SmallDataLimit", i32 0}
+!8 = !{!"clang version 20.0.0git (git at github.com:expertisesolutions/llvm.git 18fcedf2e278ec646fddb9b2d24a5abe98cdb7ed)"}
+!9 = !{!10, !10, i64 0}
+!10 = !{!"float", !11, i64 0}
+!11 = !{!"omnipotent char", !12, i64 0}
+!12 = !{!"Simple C/C++ TBAA"}
+;.
+; CHECK: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
+; CHECK: [[META10]] = !{!"float", [[META11:![0-9]+]], i64 0}
+; CHECK: [[META11]] = !{!"omnipotent char", [[META12:![0-9]+]], i64 0}
+; CHECK: [[META12]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META14:![0-9]+]], [[META15:![0-9]+]]}
+; CHECK: [[META14]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META15]] = !{!"llvm.loop.unroll.runtime.disable"}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
index 921cf4246f7259..15cb3e5b062536 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
@@ -33,8 +33,7 @@ for.body: ; preds = %for.body, %entry
%arrayidx = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 %indvars.iv
store x86_fp80 %conv, ptr %arrayidx, align 16
%indvars.iv.next = add i64 %indvars.iv, 1
- %lftr.wideiv = trunc i64 %indvars.iv.next to i32
- %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
More information about the llvm-commits
mailing list