[llvm] ac8c720 - [IR] Allow constant folding (insertelement <vscale x 2 x i32> zeroinitializer, i32 0, i32 i32 0.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 15 17:44:48 PDT 2022
Author: Craig Topper
Date: 2022-04-15T17:44:32-07:00
New Revision: ac8c720d4863b2b520f31b156ee5e55aff2e1cde
URL: https://github.com/llvm/llvm-project/commit/ac8c720d4863b2b520f31b156ee5e55aff2e1cde
DIFF: https://github.com/llvm/llvm-project/commit/ac8c720d4863b2b520f31b156ee5e55aff2e1cde.diff
LOG: [IR] Allow constant folding (insertelement <vscale x 2 x i32> zeroinitializer, i32 0, i32 i32 0.
Most of insertelement constant folding is blocked if the vector type
is scalable. I believe we can make an exception for inserting null
into an all zeros vector.
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D123413
Added:
Modified:
llvm/lib/IR/ConstantFold.cpp
llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll
llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
Removed:
################################################################################
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index e38cd820da29f..9cbbe2bf5df8c 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -683,6 +683,11 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
if (isa<UndefValue>(Idx))
return PoisonValue::get(Val->getType());
+ // Inserting null into all zeros is still all zeros.
+ // TODO: This is true for undef and poison splats too.
+ if (isa<ConstantAggregateZero>(Val) && Elt->isNullValue())
+ return Val;
+
ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
if (!CIdx) return nullptr;
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll b/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll
index ec98a13233c05..6e9ecf5e3d93b 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll
@@ -50,3 +50,19 @@ define i64 @extract_undef_index_from_nonzero_vec() {
%E = extractelement <2 x i64> <i64 -1, i64 -1>, i64 undef
ret i64 %E
}
+
+define <vscale x 2 x i32> @insertelement_scalable_null() {
+; CHECK-LABEL: @insertelement_scalable_null(
+; CHECK-NEXT: ret <vscale x 2 x i32> zeroinitializer
+;
+ %vec = insertelement <vscale x 2 x i32> zeroinitializer, i32 0, i32 0
+ ret <vscale x 2 x i32> %vec
+}
+
+define <vscale x 2 x float> @insertelement_scalable_null_fp() {
+; CHECK-LABEL: @insertelement_scalable_null_fp(
+; CHECK-NEXT: ret <vscale x 2 x float> zeroinitializer
+;
+ %vec = insertelement <vscale x 2 x float> zeroinitializer, float 0.0, i32 1
+ ret <vscale x 2 x float> %vec
+}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
index 6949b63394cb2..3b9aa409b960a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
@@ -251,7 +251,7 @@ define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) #0 {
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ insertelement (<vscale x 2 x i32> zeroinitializer, i32 0, i32 0), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 1
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
More information about the llvm-commits
mailing list