[PATCH] D26556: [InstCombine] don't widen most selects by hoisting an extend

Fri Nov 11 12:28:37 PST 2016

spatel created this revision.
spatel added reviewers: efriedma, mkuper, majnemer.
spatel added a subscriber: llvm-commits.
Herald added a subscriber: mcrosier.

This is related to the discussion in PR28160:
https://llvm.org/bugs/show_bug.cgi?id=28160

...but it's not the same example. It will help PR30773 more directly:
https://llvm.org/bugs/show_bug.cgi?id=30773

...because we handle selects with a constant operand on a different path than selects with two variables. Assuming this is the right thing to do, the next step will be to allow shrinking selects by sinking an extend after the select. That's easy because we already do that transform in InstCombiner::foldSelectExtConst(), but it's artificially limited to i1 types currently.

An example of the AVX2 codegen improvement from this patch using one of the vector regression tests:

Sext before:

  define <4 x i64> @g1vec(<4 x i32> %a, <4 x i1> %cmp) {
    %ext = sext <4 x i32> %a to <4 x i64>
    %sel = select <4 x i1> %cmp, <4 x i64> %ext, <4 x i64> <i64 42, i64 42, i64 42, i64 42>
    ret <4 x i64> %ext
  }
  
  vpslld	$31, %xmm1, %xmm1
  vpmovsxdq	%xmm1, %ymm1
  vpmovsxdq	%xmm0, %ymm0
  vbroadcastsd	LCPI1_0(%rip), %ymm2
  vblendvpd	%ymm1, %ymm0, %ymm2, %ymm0
  retq

Sext after:

  define <4 x i64> @g1vec(<4 x i32> %a, <4 x i1> %cmp) {
    %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> <i32 42, i32 42, i32 42, i32 42>
    %ext = sext <4 x i32> %sel to <4 x i64>
    ret <4 x i64> %ext
  }
  
  vpslld	$31, %xmm1, %xmm1
  vbroadcastss	LCPI1_0(%rip), %xmm2  <-- smaller load
  vblendvps	%xmm1, %xmm0, %xmm2, %xmm0 <-- smaller select
  vpmovsxdq	%xmm0, %ymm0
  retq

The check for a leading trunc is to avoid regressing this test in test/Transforms/InstCombine/sext.ll:

  define i32 @test8(i8 %a, i32 %f, i1 %p, i32* %z) {
  ; CHECK-LABEL: @test8(
  ; CHECK-NEXT:    [[D:%.*]] = lshr i32 %f, 24
  ; CHECK-NEXT:    [[N:%.*]] = select i1 %p, i32 [[D]], i32 0
  ; CHECK-NEXT:    ret i32 [[N]]
  ;
    %d = lshr i32 %f, 24
    %e = select i1 %p, i32 %d, i32 0
    %s = trunc i32 %e to i16
    %n = sext i16 %s to i32
    ret i32 %n
  }


https://reviews.llvm.org/D26556

Files:
  lib/Transforms/InstCombine/InstructionCombining.cpp
  test/Transforms/InstCombine/select-bitext.ll


Index: test/Transforms/InstCombine/select-bitext.ll
===================================================================

--- test/Transforms/InstCombine/select-bitext.ll
+++ test/Transforms/InstCombine/select-bitext.ll
@@ -5,8 +5,8 @@
 
 define i64 @sel_sext(i32 %a, i1 %cmp) {
 ; CHECK-LABEL: @sel_sext(
-; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 %a to i64
-; CHECK-NEXT:    [[EXT:%.*]] = select i1 %cmp, i64 [[TMP1]], i64 42
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 %cmp, i32 %a, i32 42
+; CHECK-NEXT:    [[EXT:%.*]] = sext i32 [[SEL]] to i64
 ; CHECK-NEXT:    ret i64 [[EXT]]
 ;
   %sel = select i1 %cmp, i32 %a, i32 42
@@ -16,8 +16,8 @@
 
 define <4 x i64> @sel_sext_vec(<4 x i32> %a, <4 x i1> %cmp) {
 ; CHECK-LABEL: @sel_sext_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = sext <4 x i32> %a to <4 x i64>
-; CHECK-NEXT:    [[EXT:%.*]] = select <4 x i1> %cmp, <4 x i64> [[TMP1]], <4 x i64> <i64 42, i64 42, i64 42, i64 42>
+; CHECK-NEXT:    [[SEL:%.*]] = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> <i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT:    [[EXT:%.*]] = sext <4 x i32> [[SEL]] to <4 x i64>
 ; CHECK-NEXT:    ret <4 x i64> [[EXT]]
 ;
   %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> <i32 42, i32 42, i32 42, i32 42>
@@ -27,8 +27,8 @@
 
 define i64 @sel_zext(i32 %a, i1 %cmp) {
 ; CHECK-LABEL: @sel_zext(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 %a to i64
-; CHECK-NEXT:    [[EXT:%.*]] = select i1 %cmp, i64 [[TMP1]], i64 42
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 %cmp, i32 %a, i32 42
+; CHECK-NEXT:    [[EXT:%.*]] = zext i32 [[SEL]] to i64
 ; CHECK-NEXT:    ret i64 [[EXT]]
 ;
   %sel = select i1 %cmp, i32 %a, i32 42
@@ -38,8 +38,8 @@
 
 define <4 x i64> @sel_zext_vec(<4 x i32> %a, <4 x i1> %cmp) {
 ; CHECK-LABEL: @sel_zext_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i32> %a to <4 x i64>
-; CHECK-NEXT:    [[EXT:%.*]] = select <4 x i1> %cmp, <4 x i64> [[TMP1]], <4 x i64> <i64 42, i64 42, i64 42, i64 42>
+; CHECK-NEXT:    [[SEL:%.*]] = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> <i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT:    [[EXT:%.*]] = zext <4 x i32> [[SEL]] to <4 x i64>
 ; CHECK-NEXT:    ret <4 x i64> [[EXT]]
 ;
   %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> <i32 42, i32 42, i32 42, i32 42>
Index: lib/Transforms/InstCombine/InstructionCombining.cpp
===================================================================
--- lib/Transforms/InstCombine/InstructionCombining.cpp
+++ lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -780,8 +780,8 @@
 
 /// Given an instruction with a select as one operand and a constant as the
 /// other operand, try to fold the binary operator into the select arguments.
-/// This also works for Cast instructions, which obviously do not have a second
-/// operand.
+/// This also works for some Cast instructions, which obviously do not have a
+/// second operand.
 Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
   // Don't modify shared select instructions.
   if (!SI->hasOneUse())
@@ -796,6 +796,20 @@
   if (SI->getType()->getScalarType()->isIntegerTy(1))
     return nullptr;
 
+  // If Op is an extend and this is not a select of constants, do not grow the
+  // select operand sizes by pulling the extend ahead of the select unless
+  // there's also a truncate before the select that will allow folding with the
+  // extend. This is particularly important for vectors because we want to use
+  // the narrowest operations possible for a given number of vector lanes.
+  auto isNonTruncVariable = [](Value *V) {
+    return !isa<Constant>(V) && !match(V, m_Trunc(m_Value()));
+  };
+
+  unsigned SrcWidth = SI->getType()->getScalarSizeInBits();
+  unsigned DstWidth = Op.getType()->getScalarSizeInBits();
+  if (SrcWidth < DstWidth && (isNonTruncVariable(TV) || isNonTruncVariable(FV)))
+    return nullptr;
+
   // If it's a bitcast involving vectors, make sure it has the same number of
   // elements on both sides.
   if (auto *BC = dyn_cast<BitCastInst>(&Op)) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D26556.77644.patch
Type: text/x-patch
Size: 3995 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161111/bb0b99ce/attachment.bin>