[PATCH] Fix for PR17975 - wrong code for bit shifting integer promotion at -O1 and above on x86_64-linux-gnu

Wed Dec 4 02:27:33 PST 2013

Ping^2.

This is a regression from LLVM 3.3 and should IMHO fixed - either by 
reverting my commit or by applying the attached fix.

Regards,
Kai

On 02.12.2013 08:13, Kai Nacke wrote:
> Ping.
>
> On 25.11.2013 08:12, Kai Nacke wrote:
>> Hi all!
>>
>> With my commit r191059 I introduced new patterns to match rotates. But I
>> forgot to check for the required TRUNCATE after the rotate. The missing
>> check produces wrong code as documented in PR17975.
>>
>> The attached patch fixes the problem by adding the missing check. It
>> also extends the test to include patterns which must not produce a
>> rotate. Please review.
>>
>> @Owen Anderson, Bill Wendling
>> If approved then this patch needs to be committed to the 3.4 branch. If
>> this seems to be to risky then my commit r191059 must be reverted on the
>> 3.4 branch in order to fix the wrong code generation.
>>
>> Regards,
>> Kai
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>

-------------- next part --------------

diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 805cc9e..be14c95 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -280,7 +280,7 @@ namespace {
     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
                                bool DemandHighBits = true);
     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
-    SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL);
+    SDNode *MatchRotate(SDNode *N, SDValue LHS, SDValue RHS, SDLoc DL);
     SDValue ReduceLoadWidth(SDNode *N);
     SDValue ReduceLoadOpStoreWidth(SDNode *N);
     SDValue TransformFPLoadStorePair(SDNode *N);
@@ -3272,7 +3272,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   }
 
   // See if this is some rotate idiom.
-  if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
+  if (SDNode *Rot = MatchRotate(N, N0, N1, SDLoc(N)))
     return SDValue(Rot, 0);
 
   // Simplify the operands using demanded-bits information.
@@ -3305,7 +3305,8 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
 // idioms for rotate, and if the target supports rotation instructions, generate
 // a rot[lr].
-SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
+SDNode *DAGCombiner::MatchRotate(SDNode *N, SDValue LHS, SDValue RHS,
+                                 SDLoc DL) {
   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
   EVT VT = LHS.getValueType();
   if (!TLI.isTypeLegal(VT)) return 0;
@@ -3406,19 +3407,25 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
       if (SUBC->getAPIntValue() == OpSizeInBits) {
         return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
                            HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
-      } else if (LHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
-                 LHSShiftArg.getOpcode() == ISD::ANY_EXTEND) {
-        // fold (or (shl (*ext x), (*ext y)),
-        //          (srl (*ext x), (*ext (sub 32, y)))) ->
-        //   (*ext (rotl x, y))
-        // fold (or (shl (*ext x), (*ext y)),
-        //          (srl (*ext x), (*ext (sub 32, y)))) ->
-        //   (*ext (rotr x, (sub 32, y)))
+      } else if ((LHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
+                  LHSShiftArg.getOpcode() == ISD::ANY_EXTEND) &&
+                 N->hasOneUse()) {
+        // fold (*trunc (or (shl (*ext x), (*ext y)),
+        //                  (srl (*ext x), (*ext (sub 32, y))))) ->
+        //      (*trunc (*ext (rotl x, y)))
+        // fold (*trunc (or (shl (*ext x), (*ext y)),
+        //                  (srl (*ext x), (*ext (sub 32, y))))) ->
+        //       (*trunc (*ext (rotr x, (sub 32, y))))
         SDValue LArgExtOp0 = LHSShiftArg.getOperand(0);
         EVT LArgVT = LArgExtOp0.getValueType();
+        SDNode *Use = *N->use_begin();
         bool HasROTRWithLArg = TLI.isOperationLegalOrCustom(ISD::ROTR, LArgVT);
         bool HasROTLWithLArg = TLI.isOperationLegalOrCustom(ISD::ROTL, LArgVT);
-        if (HasROTRWithLArg || HasROTLWithLArg) {
+        ConstantSDNode *CN;
+        if (Use->getOpcode() == ISD::AND &&
+            (CN = dyn_cast<ConstantSDNode>(Use->getOperand(1))) &&
+            CN->getZExtValue()+1 == (1ULL << LArgVT.getSizeInBits())
+            && (HasROTRWithLArg || HasROTLWithLArg)) {
           if (LArgVT.getSizeInBits() == SUBC->getAPIntValue()) {
             SDValue V =
                 DAG.getNode(HasROTLWithLArg ? ISD::ROTL : ISD::ROTR, DL, LArgVT,
@@ -3439,19 +3446,25 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
       if (SUBC->getAPIntValue() == OpSizeInBits) {
         return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
                            HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
-      } else if (RHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
-                 RHSShiftArg.getOpcode() == ISD::ANY_EXTEND) {
-        // fold (or (shl (*ext x), (*ext (sub 32, y))),
-        //          (srl (*ext x), (*ext y))) ->
-        //   (*ext (rotl x, y))
-        // fold (or (shl (*ext x), (*ext (sub 32, y))),
-        //          (srl (*ext x), (*ext y))) ->
-        //   (*ext (rotr x, (sub 32, y)))
+      } else if ((RHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
+                  RHSShiftArg.getOpcode() == ISD::ANY_EXTEND) &&
+                 N->hasOneUse()) {
+        // fold (*trunc (or (shl (*ext x), (*ext (sub 32, y))),
+        //                  (srl (*ext x), (*ext y)))) ->
+        //       (*trunc (*ext (rotl x, y)))
+        // fold (*trunc (or (shl (*ext x), (*ext (sub 32, y))),
+        //                  (srl (*ext x), (*ext y)))) ->
+        //       (*trunc (*ext (rotr x, (sub 32, y))))
         SDValue RArgExtOp0 = RHSShiftArg.getOperand(0);
         EVT RArgVT = RArgExtOp0.getValueType();
+        SDNode *Use = *N->use_begin();
         bool HasROTRWithRArg = TLI.isOperationLegalOrCustom(ISD::ROTR, RArgVT);
         bool HasROTLWithRArg = TLI.isOperationLegalOrCustom(ISD::ROTL, RArgVT);
-        if (HasROTRWithRArg || HasROTLWithRArg) {
+        ConstantSDNode *CN;
+        if (Use->getOpcode() == ISD::AND &&
+            (CN = dyn_cast<ConstantSDNode>(Use->getOperand(1))) &&
+            CN->getZExtValue()+1 == (1ULL << RArgVT.getSizeInBits())
+            && (HasROTRWithRArg || HasROTLWithRArg)) {
           if (RArgVT.getSizeInBits() == SUBC->getAPIntValue()) {
             SDValue V =
                 DAG.getNode(HasROTRWithRArg ? ISD::ROTR : ISD::ROTL, DL, RArgVT,
diff --git a/test/CodeGen/X86/rotate3.ll b/test/CodeGen/X86/rotate3.ll
index b92f7c2..a0f7cbd 100644
--- a/test/CodeGen/X86/rotate3.ll
+++ b/test/CodeGen/X86/rotate3.ll
@@ -1,6 +1,9 @@
 ; Check that (or (shl x, y), (srl x, (sub 32, y))) is folded into (rotl x, y)
 ; and (or (shl x, (sub 32, y)), (srl x, r)) into (rotr x, y) even if the
 ; argument is zero extended. Fix for PR16726.
+;
+; Check that the folding is not performed if the result is not truncated.
+; Fix for PR17975.
 
 ; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
 
@@ -53,18 +56,69 @@ entry:
 }
 ; CHECK:    rorw %cl, %{{[a-z0-9]+}}
 
-define i64 @roldword(i64 %nBits_arg, i32 %x_arg) nounwind readnone {
+
+define i32 @norolbyte(i32 %nBits_arg, i8 %x_arg) nounwind readnone {
+entry:
+  %tmp1 = zext i8 %x_arg to i32
+  %tmp3 = shl i32 %tmp1, %nBits_arg
+  %tmp8 = sub i32 8, %nBits_arg
+  %tmp10 = lshr i32 %tmp1, %tmp8
+  %tmp11 = or i32 %tmp3, %tmp10
+  ret i32 %tmp11
+}
+; CHECK:    shll %cl, %{{[a-z0-9]+}}
+; CHECK:    shrl %cl, %{{[a-z0-9]+}}
+
+define i32 @nororbyte(i32 %nBits_arg, i8 %x_arg) nounwind readnone {
+entry:
+  %tmp1 = zext i8 %x_arg to i32
+  %tmp3 = lshr i32 %tmp1, %nBits_arg
+  %tmp8 = sub i32 8, %nBits_arg
+  %tmp10 = shl i32 %tmp1, %tmp8
+  %tmp11 = or i32 %tmp3, %tmp10
+  ret i32 %tmp11
+}
+; CHECK:    shrl %cl, %{{[a-z0-9]+}}
+; CHECK:    shll %cl, %{{[a-z0-9]+}}
+
+define i32 @norolword(i32 %nBits_arg, i16 %x_arg) nounwind readnone {
+entry:
+  %tmp1 = zext i16 %x_arg to i32
+  %tmp3 = shl i32 %tmp1, %nBits_arg
+  %tmp8 = sub i32 16, %nBits_arg
+  %tmp10 = lshr i32 %tmp1, %tmp8
+  %tmp11 = or i32 %tmp3, %tmp10
+  ret i32 %tmp11
+}
+; CHECK:    shll %cl, %{{[a-z0-9]+}}
+; CHECK:    shrl %cl, %{{[a-z0-9]+}}
+
+define i32 @nororword(i32 %nBits_arg, i16 %x_arg) nounwind readnone {
+entry:
+  %tmp1 = zext i16 %x_arg to i32
+  %tmp3 = lshr i32 %tmp1, %nBits_arg
+  %tmp8 = sub i32 16, %nBits_arg
+  %tmp10 = shl i32 %tmp1, %tmp8
+  %tmp11 = or i32 %tmp3, %tmp10
+  ret i32 %tmp11
+}
+; CHECK:    shrl %cl, %{{[a-z0-9]+}}
+; CHECK:    shll %cl, %{{[a-z0-9]+}}
+
+define i64 @noroldword(i64 %nBits_arg, i32 %x_arg) nounwind readnone {
 entry:
   %tmp1 = zext i32 %x_arg to i64
   %tmp3 = shl i64 %tmp1, %nBits_arg
   %tmp8 = sub i64 32, %nBits_arg
   %tmp10 = lshr i64 %tmp1, %tmp8
   %tmp11 = or i64 %tmp3, %tmp10
+  %tmp12 = trunc i64 %tmp11 to i32
   ret i64 %tmp11
 }
-; CHECK:    roll %cl, %{{[a-z0-9]+}}
+; CHECK:    shlq %cl, %{{[a-z0-9]+}}
+; CHECK:    shrq %cl, %{{[a-z0-9]+}}
 
-define zeroext i64 @rordword(i64 %nBits_arg, i32 %x_arg) nounwind readnone {
+define i64 @norordword(i64 %nBits_arg, i32 %x_arg) nounwind readnone {
 entry:
   %tmp1 = zext i32 %x_arg to i64
   %tmp3 = lshr i64 %tmp1, %nBits_arg
@@ -73,4 +127,5 @@ entry:
   %tmp11 = or i64 %tmp3, %tmp10
   ret i64 %tmp11
 }
-; CHECK:    rorl %cl, %{{[a-z0-9]+}}
+; CHECK:    shrq %cl, %{{[a-z0-9]+}}
+; CHECK:    shlq %cl, %{{[a-z0-9]+}}