[llvm-commits] [llvm] r155362 - in /llvm/trunk: lib/Transforms/InstCombine/InstCombineShifts.cpp test/Transforms/InstCombine/2010-11-01-lshr-mask.ll test/Transforms/InstCombine/apint-shift.ll test/Transforms/InstCombine/cast.ll test/Transforms/InstCombine/shift.ll test/Transforms/PhaseOrdering/basic.ll test/Transforms/PhaseOrdering/scev.ll

Mon Apr 23 10:39:52 PDT 2012

Author: stoklund
Date: Mon Apr 23 12:39:52 2012
New Revision: 155362

URL: http://llvm.org/viewvc/llvm-project?rev=155362&view=rev
Log:
Reapply r155136 after fixing PR12599.

Original commit message:

Defer some shl transforms to DAGCombine.

The shl instruction is used to represent multiplication by a constant
power of two as well as bitwise left shifts. Some InstCombine
transformations would turn an shl instruction into a bit mask operation,
making it difficult for later analysis passes to recognize the
constsnt multiplication.

Disable those shl transformations, deferring them to DAGCombine time.
An 'shl X, C' instruction is now treated mostly the same was as 'mul X, C'.

These transformations are deferred:

  (X >>? C) << C   --> X & (-1 << C)  (When X >> C has multiple uses)
  (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)   (When C2 > C1)
  (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)  (When C1 > C2)

The corresponding exact transformations are preserved, just like
div-exact + mul:

  (X >>?,exact C) << C   --> X
  (X >>?,exact C1) << C2 --> X << (C2-C1)
  (X >>?,exact C1) << C2 --> X >>?,exact (C1-C2)

The disabled transformations could also prevent the instruction selector
from recognizing rotate patterns in hash functions and cryptographic
primitives. I have a test case for that, but it is too fragile.

Added:
    llvm/trunk/test/Transforms/PhaseOrdering/scev.ll
Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineShifts.cpp
    llvm/trunk/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
    llvm/trunk/test/Transforms/InstCombine/apint-shift.ll
    llvm/trunk/test/Transforms/InstCombine/cast.ll
    llvm/trunk/test/Transforms/InstCombine/shift.ll
    llvm/trunk/test/Transforms/PhaseOrdering/basic.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineShifts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineShifts.cpp?rev=155362&r1=155361&r2=155362&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineShifts.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineShifts.cpp Mon Apr 23 12:39:52 2012
@@ -529,6 +529,19 @@
     ShiftOp = 0;
   
   if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
+
+    // This is a constant shift of a constant shift. Be careful about hiding
+    // shl instructions behind bit masks. They are used to represent multiplies
+    // by a constant, and it is important that simple arithmetic expressions
+    // are still recognizable by scalar evolution.
+    //
+    // The transforms applied to shl are very similar to the transforms applied
+    // to mul by constant. We can be more aggressive about optimizing right
+    // shifts.
+    //
+    // Combinations of right and left shifts will still be optimized in
+    // DAGCombine where scalar evolution no longer applies.
+
     ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
     uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
     uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
@@ -554,13 +567,6 @@
     }
     
     if (ShiftAmt1 == ShiftAmt2) {
-      // If we have ((X >>? C) << C), turn this into X & (-1 << C).
-      if (I.getOpcode() == Instruction::Shl &&
-          ShiftOp->getOpcode() != Instruction::Shl) {
-        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
-        return BinaryOperator::CreateAnd(X,
-                                         ConstantInt::get(I.getContext(),Mask));
-      }
       // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
       if (I.getOpcode() == Instruction::LShr &&
           ShiftOp->getOpcode() == Instruction::Shl) {
@@ -570,28 +576,23 @@
       }
     } else if (ShiftAmt1 < ShiftAmt2) {
       uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
-      
-      // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
+
+      // (X >>?,exact C1) << C2 --> X << (C2-C1)
+      // The inexact version is deferred to DAGCombine so we don't hide shl
+      // behind a bit mask.
       if (I.getOpcode() == Instruction::Shl &&
-          ShiftOp->getOpcode() != Instruction::Shl) {
+          ShiftOp->getOpcode() != Instruction::Shl &&
+          ShiftOp->isExact()) {
         assert(ShiftOp->getOpcode() == Instruction::LShr ||
                ShiftOp->getOpcode() == Instruction::AShr);
         ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
-        if (ShiftOp->isExact()) {
-          // (X >>?,exact C1) << C2 --> X << (C2-C1)
-          BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
-                                                          X, ShiftDiffCst);
-          NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
-          NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
-          return NewShl;
-        }
-        Value *Shift = Builder->CreateShl(X, ShiftDiffCst);
-        
-        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift,
-                                         ConstantInt::get(I.getContext(),Mask));
+        BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
+                                                        X, ShiftDiffCst);
+        NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+        NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
+        return NewShl;
       }
-      
+
       // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
       if (I.getOpcode() == Instruction::LShr &&
           ShiftOp->getOpcode() == Instruction::Shl) {
@@ -627,24 +628,19 @@
       assert(ShiftAmt2 < ShiftAmt1);
       uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
 
-      // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
+      // (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
+      // The inexact version is deferred to DAGCombine so we don't hide shl
+      // behind a bit mask.
       if (I.getOpcode() == Instruction::Shl &&
-          ShiftOp->getOpcode() != Instruction::Shl) {
+          ShiftOp->getOpcode() != Instruction::Shl &&
+          ShiftOp->isExact()) {
         ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
-        if (ShiftOp->isExact()) {
-          // (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
-          BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(),
-                                                          X, ShiftDiffCst);
-          NewShr->setIsExact(true);
-          return NewShr;
-        }
-        Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(),
-                                            X, ShiftDiffCst);
-        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift,
-                                         ConstantInt::get(I.getContext(),Mask));
+        BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(),
+                                                        X, ShiftDiffCst);
+        NewShr->setIsExact(true);
+        return NewShr;
       }
-      
+
       // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
       if (I.getOpcode() == Instruction::LShr &&
           ShiftOp->getOpcode() == Instruction::Shl) {

Modified: llvm/trunk/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll?rev=155362&r1=155361&r2=155362&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll Mon Apr 23 12:39:52 2012
@@ -5,8 +5,8 @@
 define i32 @main(i32 %argc) nounwind ssp {
 entry:
   %tmp3151 = trunc i32 %argc to i8
-; CHECK: %tmp3162 = shl i8 %tmp3151, 5
-; CHECK: and i8 %tmp3162, 64
+; CHECK: %tmp3163 = shl i8 %tmp3162, 6
+; CHECK: and i8 %tmp3163, 64
 ; CHECK-NOT: shl
 ; CHECK-NOT: shr
   %tmp3161 = or i8 %tmp3151, -17
@@ -38,8 +38,8 @@
   %tmp10 = lshr i8 %tmp8, 7
   %tmp11 = shl i8 %tmp10, 5
 
-; CHECK: %0 = lshr i8 %tmp8, 2
-; CHECK: %tmp11 = and i8 %0, 32
+; CHECK: %tmp10 = lshr i8 %tmp8, 7
+; CHECK: %tmp11 = shl nuw nsw i8 %tmp10, 5
 
   %tmp12 = xor i8 %tmp11, %tmp9
   ret i8 %tmp12

Modified: llvm/trunk/test/Transforms/InstCombine/apint-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-shift.ll?rev=155362&r1=155361&r2=155362&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-shift.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/apint-shift.ll Mon Apr 23 12:39:52 2012
@@ -47,13 +47,21 @@
 }
 
 ; CHECK: @test6
-; CHECK-NOT: sh
+; CHECK: mul i55 %A, 6
 define i55 @test6(i55 %A) {
 	%B = shl i55 %A, 1		; <i55> [#uses=1]
 	%C = mul i55 %B, 3		; <i55> [#uses=1]
 	ret i55 %C
 }
 
+; CHECK: @test6a
+; CHECK: mul i55 %A, 6
+define i55 @test6a(i55 %A) {
+	%B = mul i55 %A, 3		; <i55> [#uses=1]
+	%C = shl i55 %B, 1		; <i55> [#uses=1]
+	ret i55 %C
+}
+
 ; CHECK: @test7
 ; CHECK-NOT: sh
 define i29 @test7(i8 %X) {
@@ -87,7 +95,8 @@
 }
 
 ; CHECK: @test11
-; CHECK-NOT: sh
+; Don't hide the shl from scalar evolution. DAGCombine will get it.
+; CHECK: shl
 define i23 @test11(i23 %A) {
 	%a = mul i23 %A, 3		; <i23> [#uses=1]
 	%B = lshr i23 %a, 11		; <i23> [#uses=1]
@@ -104,7 +113,8 @@
 }
 
 ; CHECK: @test13
-; CHECK-NOT: sh
+; Don't hide the shl from scalar evolution. DAGCombine will get it.
+; CHECK: shl
 define i18 @test13(i18 %A) {
 	%a = mul i18 %A, 3		; <i18> [#uses=1]
 	%B = ashr i18 %a, 8		; <i18> [#uses=1]

Modified: llvm/trunk/test/Transforms/InstCombine/cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/cast.ll?rev=155362&r1=155361&r2=155362&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/cast.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/cast.ll Mon Apr 23 12:39:52 2012
@@ -457,10 +457,12 @@
   %E = sext i32 %D to i64
   ret i64 %E
 ; CHECK: @test50
-; CHECK-NEXT: shl i64 %A, 30
+; lshr+shl will be handled by DAGCombine.
+; CHECK-NEXT: lshr i64 %A, 2
+; CHECK-NEXT: shl i64 %a, 32
 ; CHECK-NEXT: add i64 {{.*}}, -4294967296
-; CHECK-NEXT: %sext = ashr i64 {{.*}}, 32
-; CHECK-NEXT: ret i64 %sext
+; CHECK-NEXT: %E = ashr exact i64 {{.*}}, 32
+; CHECK-NEXT: ret i64 %E
 }
 
 define i64 @test51(i64 %A, i1 %cond) {

Modified: llvm/trunk/test/Transforms/InstCombine/shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/shift.ll?rev=155362&r1=155361&r2=155362&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/shift.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/shift.ll Mon Apr 23 12:39:52 2012
@@ -65,8 +65,17 @@
 ; CHECK: @test6
 ; CHECK-NEXT: mul i32 %A, 6
 ; CHECK-NEXT: ret i32
-        %B = shl i32 %A, 1      ;; convert to an mul instruction 
-        %C = mul i32 %B, 3             
+        %B = shl i32 %A, 1      ;; convert to an mul instruction
+        %C = mul i32 %B, 3
+        ret i32 %C
+}
+
+define i32 @test6a(i32 %A) {
+; CHECK: @test6a
+; CHECK-NEXT: mul i32 %A, 6
+; CHECK-NEXT: ret i32
+        %B = mul i32 %A, 3
+        %C = shl i32 %B, 1      ;; convert to an mul instruction
         ret i32 %C
 }
 
@@ -97,7 +106,9 @@
         ret i8 %C
 }
 
+;; This transformation is deferred to DAGCombine:
 ;; (A >> 7) << 7 === A & 128
+;; The shl may be valuable to scalar evolution.
 define i8 @test10(i8 %A) {
 ; CHECK: @test10
 ; CHECK-NEXT: and i8 %A, -128
@@ -107,11 +118,21 @@
         ret i8 %C
 }
 
+;; Allow the simplification when the lshr shift is exact.
+define i8 @test10a(i8 %A) {
+; CHECK: @test10a
+; CHECK-NEXT: ret i8 %A
+        %B = lshr exact i8 %A, 7
+        %C = shl i8 %B, 7
+        ret i8 %C
+}
+
+;; This transformation is deferred to DAGCombine:
 ;; (A >> 3) << 4 === (A & 0x1F) << 1
+;; The shl may be valuable to scalar evolution.
 define i8 @test11(i8 %A) {
 ; CHECK: @test11
-; CHECK-NEXT: mul i8 %A, 6
-; CHECK-NEXT: and i8
+; CHECK: shl i8
 ; CHECK-NEXT: ret i8
         %a = mul i8 %A, 3               ; <i8> [#uses=1]
         %B = lshr i8 %a, 3              ; <i8> [#uses=1]
@@ -119,6 +140,18 @@
         ret i8 %C
 }
 
+;; Allow the simplification in InstCombine when the lshr shift is exact.
+define i8 @test11a(i8 %A) {
+; CHECK: @test11a
+; CHECK-NEXT: mul i8 %A, 6
+; CHECK-NEXT: ret i8
+        %a = mul i8 %A, 3
+        %B = lshr exact i8 %a, 3
+        %C = shl i8 %B, 4
+        ret i8 %C
+}
+
+;; This is deferred to DAGCombine unless %B is single-use.
 ;; (A >> 8) << 8 === A & -256
 define i32 @test12(i32 %A) {
 ; CHECK: @test12
@@ -129,11 +162,12 @@
         ret i32 %C
 }
 
+;; This transformation is deferred to DAGCombine:
 ;; (A >> 3) << 4 === (A & -8) * 2
+;; The shl may be valuable to scalar evolution.
 define i8 @test13(i8 %A) {
 ; CHECK: @test13
-; CHECK-NEXT: mul i8 %A, 6
-; CHECK-NEXT: and i8
+; CHECK: shl i8
 ; CHECK-NEXT: ret i8
         %a = mul i8 %A, 3               ; <i8> [#uses=1]
         %B = ashr i8 %a, 3              ; <i8> [#uses=1]
@@ -141,6 +175,16 @@
         ret i8 %C
 }
 
+define i8 @test13a(i8 %A) {
+; CHECK: @test13a
+; CHECK-NEXT: mul i8 %A, 6
+; CHECK-NEXT: ret i8
+        %a = mul i8 %A, 3
+        %B = ashr exact i8 %a, 3
+        %C = shl i8 %B, 4
+        ret i8 %C
+}
+
 ;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4)
 define i32 @test14(i32 %A) {
 ; CHECK: @test14
@@ -477,10 +521,11 @@
   %tmp49 = lshr i8 %tmp48, 5
   %tmp50 = mul i8 %tmp49, 64
   %tmp51 = xor i8 %tmp50, %tmp5
-; CHECK: and i8 %0, 16
   %tmp52 = and i8 %tmp51, -128
   %tmp53 = lshr i8 %tmp52, 7
+; CHECK: lshr i8 %tmp51, 7
   %tmp54 = mul i8 %tmp53, 16
+; CHECK: shl nuw nsw i8 %tmp53, 4
   %tmp55 = xor i8 %tmp54, %tmp51
 ; CHECK: ret i8 %tmp551
   ret i8 %tmp55

Modified: llvm/trunk/test/Transforms/PhaseOrdering/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/PhaseOrdering/basic.ll?rev=155362&r1=155361&r2=155362&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/PhaseOrdering/basic.ll (original)
+++ llvm/trunk/test/Transforms/PhaseOrdering/basic.ll Mon Apr 23 12:39:52 2012
@@ -22,3 +22,30 @@
 ; CHECK: @test1
 ; CHECK-NEXT: ret void
 }
+
+; This function exposes a phase ordering problem when InstCombine is
+; turning %add into a bitmask, making it difficult to spot a 0 return value.
+;
+; It it also important that %add is expressed as a multiple of %div so scalar
+; evolution can recognize it.
+define i32 @test2(i32 %a, i32* %p) nounwind uwtable ssp {
+entry:
+  %div = udiv i32 %a, 4
+  %arrayidx = getelementptr inbounds i32* %p, i64 0
+  store i32 %div, i32* %arrayidx, align 4
+  %add = add i32 %div, %div
+  %arrayidx1 = getelementptr inbounds i32* %p, i64 1
+  store i32 %add, i32* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32* %p, i64 1
+  %0 = load i32* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %p, i64 0
+  %1 = load i32* %arrayidx3, align 4
+  %mul = mul i32 2, %1
+  %sub = sub i32 %0, %mul
+  ret i32 %sub
+
+; CHECK: @test2
+; CHECK: %div = lshr i32 %a, 2
+; CHECK: %add = shl nuw nsw i32 %div, 1
+; CHECK: ret i32 0
+}

Added: llvm/trunk/test/Transforms/PhaseOrdering/scev.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/PhaseOrdering/scev.ll?rev=155362&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/PhaseOrdering/scev.ll (added)
+++ llvm/trunk/test/Transforms/PhaseOrdering/scev.ll Mon Apr 23 12:39:52 2012
@@ -0,0 +1,64 @@
+; RUN: opt -O3 -S -analyze -scalar-evolution %s | FileCheck %s
+;
+; This file contains phase ordering tests for scalar evolution.
+; Test that the standard passes don't obfuscate the IR so scalar evolution can't
+; recognize expressions.
+
+; CHECK: test1
+; The loop body contains two increments by %div.
+; Make sure that 2*%div is recognizable, and not expressed as a bit mask of %d.
+; CHECK: -->  {%p,+,(2 * (%d /u 4) * sizeof(i32))}
+define void @test1(i64 %d, i32* %p) nounwind uwtable ssp {
+entry:
+  %div = udiv i64 %d, 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ne i32 %i.0, 64
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* %p.addr.0, align 4
+  %add.ptr = getelementptr inbounds i32* %p.addr.0, i64 %div
+  store i32 1, i32* %add.ptr, align 4
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i64 %div
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; CHECK: test1a
+; Same thing as test1, but it is even more tempting to fold 2 * (%d /u 2)
+; CHECK: -->  {%p,+,(2 * (%d /u 2) * sizeof(i32))}
+define void @test1a(i64 %d, i32* %p) nounwind uwtable ssp {
+entry:
+  %div = udiv i64 %d, 2
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ne i32 %i.0, 64
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* %p.addr.0, align 4
+  %add.ptr = getelementptr inbounds i32* %p.addr.0, i64 %div
+  store i32 1, i32* %add.ptr, align 4
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i64 %div
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}