[llvm] 6e50474 - [Scalarizer] InsertElement handling w/ variable insert index (PR46524)

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 6 03:20:11 PDT 2020


Author: Roman Lebedev
Date: 2020-07-06T13:19:32+03:00
New Revision: 6e504745813259067f5b0ad696bec3a3d22ab044

URL: https://github.com/llvm/llvm-project/commit/6e504745813259067f5b0ad696bec3a3d22ab044
DIFF: https://github.com/llvm/llvm-project/commit/6e504745813259067f5b0ad696bec3a3d22ab044.diff

LOG: [Scalarizer] InsertElement handling w/ variable insert index (PR46524)

Summary:
I'm interested in taking the original C++ input,
for which we currently are stuck with an alloca
and producing roughly the lower IR,
with neither an alloca nor a vector ops:
https://godbolt.org/z/cRRWaJ

For that, as intermediate step, i'd to somehow perform scalarization.
As per @arsenmn suggestion, i'm trying to see if scalarizer can help me
avoid writing a bicycle.

I'm not sure if it's really intentional that variable insert is not handled currently.
If it really is, and is supposed to stay that way (?), i guess i could guard it..

See [[ https://bugs.llvm.org/show_bug.cgi?id=46524 | PR46524 ]].

Reviewers: bjope, cameron.mcinally, arsenm, jdoerfert

Reviewed By: jdoerfert

Subscribers: arphaman, uabelho, wdng, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D82961

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/Scalarizer.cpp
    llvm/test/Transforms/Scalarizer/variable-insertelement.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 5cc4d795d767..0327d3932135 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -51,6 +51,11 @@ using namespace llvm;
 
 #define DEBUG_TYPE "scalarizer"
 
+static cl::opt<bool> ScalarizeVariableInsertExtract(
+    "scalarize-variable-insert-extract", cl::init(true), cl::Hidden,
+    cl::desc("Allow the scalarizer pass to scalarize "
+             "insertelement/extractelement with variable index"));
+
 // This is disabled by default because having separate loads and stores
 // makes it more likely that the -combiner-alias-analysis limits will be
 // reached.
@@ -760,7 +765,15 @@ bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
     for (unsigned I = 0; I < NumElems; ++I)
       Res[I] = CI->getValue().getZExtValue() == I ? NewElt : Op0[I];
   } else {
-    return false;
+    if (!ScalarizeVariableInsertExtract)
+      return false;
+
+    for (unsigned I = 0; I < NumElems; ++I) {
+      Res[I] = Builder.CreateSelect(
+          Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I),
+                               InsIdx->getName() + ".is." + Twine(I)),
+          NewElt, Op0[I], IEI.getName() + ".i" + Twine(I));
+    }
   }
 
   gather(&IEI, Res);

diff  --git a/llvm/test/Transforms/Scalarizer/variable-insertelement.ll b/llvm/test/Transforms/Scalarizer/variable-insertelement.ll
index fc2955fc1ae4..aeec2ddea2ea 100644
--- a/llvm/test/Transforms/Scalarizer/variable-insertelement.ll
+++ b/llvm/test/Transforms/Scalarizer/variable-insertelement.ll
@@ -1,50 +1,82 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt %s -scalarizer -scalarize-load-store -dce -S | FileCheck --check-prefixes=ALL %s
+; RUN: opt %s -scalarizer -dce -S | FileCheck --check-prefixes=ALL,DEFAULT %s
+; RUN: opt %s -scalarizer -scalarize-variable-insert-extract=false -dce -S | FileCheck --check-prefixes=ALL,OFF %s
+; RUN: opt %s -scalarizer -scalarize-variable-insert-extract=true -dce -S | FileCheck --check-prefixes=ALL,DEFAULT,ON %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 ; Test that variable inserts are scalarized.
 define <4 x i32> @f1(<4 x i32> %src, i32 %val, i32 %index) {
-; ALL-LABEL: @f1(
-; ALL-NEXT:    [[RES:%.*]] = insertelement <4 x i32> [[SRC:%.*]], i32 [[VAL:%.*]], i32 [[INDEX:%.*]]
-; ALL-NEXT:    ret <4 x i32> [[RES]]
+; DEFAULT-LABEL: @f1(
+; DEFAULT-NEXT:    [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0
+; DEFAULT-NEXT:    [[SRC_I0:%.*]] = extractelement <4 x i32> [[SRC:%.*]], i32 0
+; DEFAULT-NEXT:    [[RES_I0:%.*]] = select i1 [[INDEX_IS_0]], i32 [[VAL:%.*]], i32 [[SRC_I0]]
+; DEFAULT-NEXT:    [[INDEX_IS_1:%.*]] = icmp eq i32 [[INDEX]], 1
+; DEFAULT-NEXT:    [[SRC_I1:%.*]] = extractelement <4 x i32> [[SRC]], i32 1
+; DEFAULT-NEXT:    [[RES_I1:%.*]] = select i1 [[INDEX_IS_1]], i32 [[VAL]], i32 [[SRC_I1]]
+; DEFAULT-NEXT:    [[INDEX_IS_2:%.*]] = icmp eq i32 [[INDEX]], 2
+; DEFAULT-NEXT:    [[SRC_I2:%.*]] = extractelement <4 x i32> [[SRC]], i32 2
+; DEFAULT-NEXT:    [[RES_I2:%.*]] = select i1 [[INDEX_IS_2]], i32 [[VAL]], i32 [[SRC_I2]]
+; DEFAULT-NEXT:    [[INDEX_IS_3:%.*]] = icmp eq i32 [[INDEX]], 3
+; DEFAULT-NEXT:    [[SRC_I3:%.*]] = extractelement <4 x i32> [[SRC]], i32 3
+; DEFAULT-NEXT:    [[RES_I3:%.*]] = select i1 [[INDEX_IS_3]], i32 [[VAL]], i32 [[SRC_I3]]
+; DEFAULT-NEXT:    [[RES_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[RES_I0]], i32 0
+; DEFAULT-NEXT:    [[RES_UPTO1:%.*]] = insertelement <4 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1
+; DEFAULT-NEXT:    [[RES_UPTO2:%.*]] = insertelement <4 x i32> [[RES_UPTO1]], i32 [[RES_I2]], i32 2
+; DEFAULT-NEXT:    [[RES:%.*]] = insertelement <4 x i32> [[RES_UPTO2]], i32 [[RES_I3]], i32 3
+; DEFAULT-NEXT:    ret <4 x i32> [[RES]]
+;
+; OFF-LABEL: @f1(
+; OFF-NEXT:    [[RES:%.*]] = insertelement <4 x i32> [[SRC:%.*]], i32 [[VAL:%.*]], i32 [[INDEX:%.*]]
+; OFF-NEXT:    ret <4 x i32> [[RES]]
 ;
   %res = insertelement <4 x i32> %src, i32 %val, i32 %index
   ret <4 x i32> %res
 }
 
 define void @f2(<4 x i32> *%dest, <4 x i32> *%src, i32 %index) {
-; ALL-LABEL: @f2(
-; ALL-NEXT:    [[DEST_I0:%.*]] = bitcast <4 x i32>* [[DEST:%.*]] to i32*
-; ALL-NEXT:    [[DEST_I1:%.*]] = getelementptr i32, i32* [[DEST_I0]], i32 1
-; ALL-NEXT:    [[DEST_I2:%.*]] = getelementptr i32, i32* [[DEST_I0]], i32 2
-; ALL-NEXT:    [[DEST_I3:%.*]] = getelementptr i32, i32* [[DEST_I0]], i32 3
-; ALL-NEXT:    [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32*
-; ALL-NEXT:    [[VAL0_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16
-; ALL-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1
-; ALL-NEXT:    [[VAL0_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4
-; ALL-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
-; ALL-NEXT:    [[VAL0_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8
-; ALL-NEXT:    [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
-; ALL-NEXT:    [[VAL0_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4
-; ALL-NEXT:    [[VAL0_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL0_I0]], i32 0
-; ALL-NEXT:    [[VAL0_UPTO1:%.*]] = insertelement <4 x i32> [[VAL0_UPTO0]], i32 [[VAL0_I1]], i32 1
-; ALL-NEXT:    [[VAL0_UPTO2:%.*]] = insertelement <4 x i32> [[VAL0_UPTO1]], i32 [[VAL0_I2]], i32 2
-; ALL-NEXT:    [[VAL0:%.*]] = insertelement <4 x i32> [[VAL0_UPTO2]], i32 [[VAL0_I3]], i32 3
-; ALL-NEXT:    [[VAL1:%.*]] = insertelement <4 x i32> [[VAL0]], i32 1, i32 [[INDEX:%.*]]
-; ALL-NEXT:    [[VAL1_I0:%.*]] = extractelement <4 x i32> [[VAL1]], i32 0
-; ALL-NEXT:    [[VAL2_I0:%.*]] = shl i32 1, [[VAL1_I0]]
-; ALL-NEXT:    [[VAL1_I1:%.*]] = extractelement <4 x i32> [[VAL1]], i32 1
-; ALL-NEXT:    [[VAL2_I1:%.*]] = shl i32 2, [[VAL1_I1]]
-; ALL-NEXT:    [[VAL1_I2:%.*]] = extractelement <4 x i32> [[VAL1]], i32 2
-; ALL-NEXT:    [[VAL2_I2:%.*]] = shl i32 3, [[VAL1_I2]]
-; ALL-NEXT:    [[VAL1_I3:%.*]] = extractelement <4 x i32> [[VAL1]], i32 3
-; ALL-NEXT:    [[VAL2_I3:%.*]] = shl i32 4, [[VAL1_I3]]
-; ALL-NEXT:    store i32 [[VAL2_I0]], i32* [[DEST_I0]], align 16
-; ALL-NEXT:    store i32 [[VAL2_I1]], i32* [[DEST_I1]], align 4
-; ALL-NEXT:    store i32 [[VAL2_I2]], i32* [[DEST_I2]], align 8
-; ALL-NEXT:    store i32 [[VAL2_I3]], i32* [[DEST_I3]], align 4
-; ALL-NEXT:    ret void
+; DEFAULT-LABEL: @f2(
+; DEFAULT-NEXT:    [[VAL0:%.*]] = load <4 x i32>, <4 x i32>* [[SRC:%.*]], align 16
+; DEFAULT-NEXT:    [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0
+; DEFAULT-NEXT:    [[VAL0_I0:%.*]] = extractelement <4 x i32> [[VAL0]], i32 0
+; DEFAULT-NEXT:    [[VAL1_I0:%.*]] = select i1 [[INDEX_IS_0]], i32 1, i32 [[VAL0_I0]]
+; DEFAULT-NEXT:    [[INDEX_IS_1:%.*]] = icmp eq i32 [[INDEX]], 1
+; DEFAULT-NEXT:    [[VAL0_I1:%.*]] = extractelement <4 x i32> [[VAL0]], i32 1
+; DEFAULT-NEXT:    [[VAL1_I1:%.*]] = select i1 [[INDEX_IS_1]], i32 1, i32 [[VAL0_I1]]
+; DEFAULT-NEXT:    [[INDEX_IS_2:%.*]] = icmp eq i32 [[INDEX]], 2
+; DEFAULT-NEXT:    [[VAL0_I2:%.*]] = extractelement <4 x i32> [[VAL0]], i32 2
+; DEFAULT-NEXT:    [[VAL1_I2:%.*]] = select i1 [[INDEX_IS_2]], i32 1, i32 [[VAL0_I2]]
+; DEFAULT-NEXT:    [[INDEX_IS_3:%.*]] = icmp eq i32 [[INDEX]], 3
+; DEFAULT-NEXT:    [[VAL0_I3:%.*]] = extractelement <4 x i32> [[VAL0]], i32 3
+; DEFAULT-NEXT:    [[VAL1_I3:%.*]] = select i1 [[INDEX_IS_3]], i32 1, i32 [[VAL0_I3]]
+; DEFAULT-NEXT:    [[VAL2_I0:%.*]] = shl i32 1, [[VAL1_I0]]
+; DEFAULT-NEXT:    [[VAL2_I1:%.*]] = shl i32 2, [[VAL1_I1]]
+; DEFAULT-NEXT:    [[VAL2_I2:%.*]] = shl i32 3, [[VAL1_I2]]
+; DEFAULT-NEXT:    [[VAL2_I3:%.*]] = shl i32 4, [[VAL1_I3]]
+; DEFAULT-NEXT:    [[VAL2_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL2_I0]], i32 0
+; DEFAULT-NEXT:    [[VAL2_UPTO1:%.*]] = insertelement <4 x i32> [[VAL2_UPTO0]], i32 [[VAL2_I1]], i32 1
+; DEFAULT-NEXT:    [[VAL2_UPTO2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO1]], i32 [[VAL2_I2]], i32 2
+; DEFAULT-NEXT:    [[VAL2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO2]], i32 [[VAL2_I3]], i32 3
+; DEFAULT-NEXT:    store <4 x i32> [[VAL2]], <4 x i32>* [[DEST:%.*]], align 16
+; DEFAULT-NEXT:    ret void
+;
+; OFF-LABEL: @f2(
+; OFF-NEXT:    [[VAL0:%.*]] = load <4 x i32>, <4 x i32>* [[SRC:%.*]], align 16
+; OFF-NEXT:    [[VAL1:%.*]] = insertelement <4 x i32> [[VAL0]], i32 1, i32 [[INDEX:%.*]]
+; OFF-NEXT:    [[VAL1_I0:%.*]] = extractelement <4 x i32> [[VAL1]], i32 0
+; OFF-NEXT:    [[VAL2_I0:%.*]] = shl i32 1, [[VAL1_I0]]
+; OFF-NEXT:    [[VAL1_I1:%.*]] = extractelement <4 x i32> [[VAL1]], i32 1
+; OFF-NEXT:    [[VAL2_I1:%.*]] = shl i32 2, [[VAL1_I1]]
+; OFF-NEXT:    [[VAL1_I2:%.*]] = extractelement <4 x i32> [[VAL1]], i32 2
+; OFF-NEXT:    [[VAL2_I2:%.*]] = shl i32 3, [[VAL1_I2]]
+; OFF-NEXT:    [[VAL1_I3:%.*]] = extractelement <4 x i32> [[VAL1]], i32 3
+; OFF-NEXT:    [[VAL2_I3:%.*]] = shl i32 4, [[VAL1_I3]]
+; OFF-NEXT:    [[VAL2_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL2_I0]], i32 0
+; OFF-NEXT:    [[VAL2_UPTO1:%.*]] = insertelement <4 x i32> [[VAL2_UPTO0]], i32 [[VAL2_I1]], i32 1
+; OFF-NEXT:    [[VAL2_UPTO2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO1]], i32 [[VAL2_I2]], i32 2
+; OFF-NEXT:    [[VAL2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO2]], i32 [[VAL2_I3]], i32 3
+; OFF-NEXT:    store <4 x i32> [[VAL2]], <4 x i32>* [[DEST:%.*]], align 16
+; OFF-NEXT:    ret void
 ;
   %val0 = load <4 x i32> , <4 x i32> *%src
   %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index


        


More information about the llvm-commits mailing list