[llvm] ac8c720 - [IR] Allow constant folding (insertelement <vscale x 2 x i32> zeroinitializer, i32 0, i32 i32 0.

Fri Apr 15 17:44:48 PDT 2022

Author: Craig Topper
Date: 2022-04-15T17:44:32-07:00
New Revision: ac8c720d4863b2b520f31b156ee5e55aff2e1cde

URL: https://github.com/llvm/llvm-project/commit/ac8c720d4863b2b520f31b156ee5e55aff2e1cde
DIFF: https://github.com/llvm/llvm-project/commit/ac8c720d4863b2b520f31b156ee5e55aff2e1cde.diff

LOG: [IR] Allow constant folding (insertelement <vscale x 2 x i32> zeroinitializer, i32 0, i32 i32 0.

Most of insertelement constant folding is blocked if the vector type
is scalable. I believe we can make an exception for inserting null
into an all zeros vector.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D123413

Added: 
    

Modified: 
    llvm/lib/IR/ConstantFold.cpp
    llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index e38cd820da29f..9cbbe2bf5df8c 100644

--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -683,6 +683,11 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
   if (isa<UndefValue>(Idx))
     return PoisonValue::get(Val->getType());
 
+  // Inserting null into all zeros is still all zeros.
+  // TODO: This is true for undef and poison splats too.
+  if (isa<ConstantAggregateZero>(Val) && Elt->isNullValue())
+    return Val;
+
   ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
   if (!CIdx) return nullptr;
 

diff  --git a/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll b/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll
index ec98a13233c05..6e9ecf5e3d93b 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll
@@ -50,3 +50,19 @@ define i64 @extract_undef_index_from_nonzero_vec() {
   %E = extractelement <2 x i64> <i64 -1, i64 -1>, i64 undef
   ret i64 %E
 }
+
+define <vscale x 2 x i32> @insertelement_scalable_null() {
+; CHECK-LABEL: @insertelement_scalable_null(
+; CHECK-NEXT:    ret <vscale x 2 x i32> zeroinitializer
+;
+  %vec = insertelement <vscale x 2 x i32> zeroinitializer, i32 0, i32 0
+  ret <vscale x 2 x i32> %vec
+}
+
+define <vscale x 2 x float> @insertelement_scalable_null_fp() {
+; CHECK-LABEL: @insertelement_scalable_null_fp(
+; CHECK-NEXT:    ret <vscale x 2 x float> zeroinitializer
+;
+  %vec = insertelement <vscale x 2 x float> zeroinitializer, float 0.0, i32 1
+  ret <vscale x 2 x float> %vec
+}

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
index 6949b63394cb2..3b9aa409b960a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
@@ -251,7 +251,7 @@ define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) #0 {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ insertelement (<vscale x 2 x i32> zeroinitializer, i32 0, i32 0), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()