[PATCH] Fix for PR17975 - wrong code for bit shifting integer promotion at -O1 and above on x86_64-linux-gnu

Sun Nov 24 23:12:17 PST 2013

Hi all!

With my commit r191059 I introduced new patterns to match rotates. But I 
forgot to check for the required TRUNCATE after the rotate. The missing 
check produces wrong code as documented in PR17975.

The attached patch fixes the problem by adding the missing check. It 
also extends the test to include patterns which must not produce a 
rotate. Please review.

@Owen Anderson, Bill Wendling
If approved then this patch needs to be committed to the 3.4 branch. If 
this seems to be to risky then my commit r191059 must be reverted on the 
3.4 branch in order to fix the wrong code generation.

Regards,
Kai
-------------- next part --------------

diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 20b0981..6337103 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -280,7 +280,7 @@ namespace {
     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
                                bool DemandHighBits = true);
     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
-    SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL);
+    SDNode *MatchRotate(SDNode *N, SDValue LHS, SDValue RHS, SDLoc DL);
     SDValue ReduceLoadWidth(SDNode *N);
     SDValue ReduceLoadOpStoreWidth(SDNode *N);
     SDValue TransformFPLoadStorePair(SDNode *N);
@@ -3283,7 +3283,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   }
 
   // See if this is some rotate idiom.
-  if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
+  if (SDNode *Rot = MatchRotate(N, N0, N1, SDLoc(N)))
     return SDValue(Rot, 0);
 
   // Simplify the operands using demanded-bits information.
@@ -3316,7 +3316,8 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
 // idioms for rotate, and if the target supports rotation instructions, generate
 // a rot[lr].
-SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
+SDNode *DAGCombiner::MatchRotate(SDNode *N, SDValue LHS, SDValue RHS,
+                                 SDLoc DL) {
   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
   EVT VT = LHS.getValueType();
   if (!TLI.isTypeLegal(VT)) return 0;
@@ -3417,19 +3418,25 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
       if (SUBC->getAPIntValue() == OpSizeInBits) {
         return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
                            HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
-      } else if (LHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
-                 LHSShiftArg.getOpcode() == ISD::ANY_EXTEND) {
-        // fold (or (shl (*ext x), (*ext y)),
-        //          (srl (*ext x), (*ext (sub 32, y)))) ->
-        //   (*ext (rotl x, y))
-        // fold (or (shl (*ext x), (*ext y)),
-        //          (srl (*ext x), (*ext (sub 32, y)))) ->
-        //   (*ext (rotr x, (sub 32, y)))
+      } else if ((LHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
+                  LHSShiftArg.getOpcode() == ISD::ANY_EXTEND) &&
+                 N->hasOneUse()) {
+        // fold (*trunc (or (shl (*ext x), (*ext y)),
+        //                  (srl (*ext x), (*ext (sub 32, y))))) ->
+        //      (*trunc (*ext (rotl x, y)))
+        // fold (*trunc (or (shl (*ext x), (*ext y)),
+        //                  (srl (*ext x), (*ext (sub 32, y))))) ->
+        //       (*trunc (*ext (rotr x, (sub 32, y))))
         SDValue LArgExtOp0 = LHSShiftArg.getOperand(0);
         EVT LArgVT = LArgExtOp0.getValueType();
+        SDNode *Use = *N->use_begin();
         bool HasROTRWithLArg = TLI.isOperationLegalOrCustom(ISD::ROTR, LArgVT);
         bool HasROTLWithLArg = TLI.isOperationLegalOrCustom(ISD::ROTL, LArgVT);
-        if (HasROTRWithLArg || HasROTLWithLArg) {
+        ConstantSDNode *CN;
+        if (Use->getOpcode() == ISD::AND &&
+            (CN = dyn_cast<ConstantSDNode>(Use->getOperand(1))) &&
+            CN->getZExtValue()+1 == (1ULL << LArgVT.getSizeInBits())
+            && (HasROTRWithLArg || HasROTLWithLArg)) {
           if (LArgVT.getSizeInBits() == SUBC->getAPIntValue()) {
             SDValue V =
                 DAG.getNode(HasROTLWithLArg ? ISD::ROTL : ISD::ROTR, DL, LArgVT,
@@ -3450,19 +3457,25 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
       if (SUBC->getAPIntValue() == OpSizeInBits) {
         return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
                            HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
-      } else if (RHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
-                 RHSShiftArg.getOpcode() == ISD::ANY_EXTEND) {
-        // fold (or (shl (*ext x), (*ext (sub 32, y))),
-        //          (srl (*ext x), (*ext y))) ->
-        //   (*ext (rotl x, y))
-        // fold (or (shl (*ext x), (*ext (sub 32, y))),
-        //          (srl (*ext x), (*ext y))) ->
-        //   (*ext (rotr x, (sub 32, y)))
+      } else if ((RHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
+                  RHSShiftArg.getOpcode() == ISD::ANY_EXTEND) &&
+                 N->hasOneUse()) {
+        // fold (*trunc (or (shl (*ext x), (*ext (sub 32, y))),
+        //                  (srl (*ext x), (*ext y)))) ->
+        //       (*trunc (*ext (rotl x, y)))
+        // fold (*trunc (or (shl (*ext x), (*ext (sub 32, y))),
+        //                  (srl (*ext x), (*ext y)))) ->
+        //       (*trunc (*ext (rotr x, (sub 32, y))))
         SDValue RArgExtOp0 = RHSShiftArg.getOperand(0);
         EVT RArgVT = RArgExtOp0.getValueType();
+        SDNode *Use = *N->use_begin();
         bool HasROTRWithRArg = TLI.isOperationLegalOrCustom(ISD::ROTR, RArgVT);
         bool HasROTLWithRArg = TLI.isOperationLegalOrCustom(ISD::ROTL, RArgVT);
-        if (HasROTRWithRArg || HasROTLWithRArg) {
+        ConstantSDNode *CN;
+        if (Use->getOpcode() == ISD::AND &&
+            (CN = dyn_cast<ConstantSDNode>(Use->getOperand(1))) &&
+            CN->getZExtValue()+1 == (1ULL << RArgVT.getSizeInBits())
+            && (HasROTRWithRArg || HasROTLWithRArg)) {
           if (RArgVT.getSizeInBits() == SUBC->getAPIntValue()) {
             SDValue V =
                 DAG.getNode(HasROTRWithRArg ? ISD::ROTR : ISD::ROTL, DL, RArgVT,
diff --git a/test/CodeGen/X86/rotate3.ll b/test/CodeGen/X86/rotate3.ll
index b92f7c2..a0f7cbd 100644
--- a/test/CodeGen/X86/rotate3.ll
+++ b/test/CodeGen/X86/rotate3.ll
@@ -1,6 +1,9 @@
 ; Check that (or (shl x, y), (srl x, (sub 32, y))) is folded into (rotl x, y)
 ; and (or (shl x, (sub 32, y)), (srl x, r)) into (rotr x, y) even if the
 ; argument is zero extended. Fix for PR16726.
+;
+; Check that the folding is not performed if the result is not truncated.
+; Fix for PR17975.
 
 ; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
 
@@ -53,18 +56,69 @@ entry:
 }
 ; CHECK:    rorw %cl, %{{[a-z0-9]+}}
 
-define i64 @roldword(i64 %nBits_arg, i32 %x_arg) nounwind readnone {
+
+define i32 @norolbyte(i32 %nBits_arg, i8 %x_arg) nounwind readnone {
+entry:
+  %tmp1 = zext i8 %x_arg to i32
+  %tmp3 = shl i32 %tmp1, %nBits_arg
+  %tmp8 = sub i32 8, %nBits_arg
+  %tmp10 = lshr i32 %tmp1, %tmp8
+  %tmp11 = or i32 %tmp3, %tmp10
+  ret i32 %tmp11
+}
+; CHECK:    shll %cl, %{{[a-z0-9]+}}
+; CHECK:    shrl %cl, %{{[a-z0-9]+}}
+
+define i32 @nororbyte(i32 %nBits_arg, i8 %x_arg) nounwind readnone {
+entry:
+  %tmp1 = zext i8 %x_arg to i32
+  %tmp3 = lshr i32 %tmp1, %nBits_arg
+  %tmp8 = sub i32 8, %nBits_arg
+  %tmp10 = shl i32 %tmp1, %tmp8
+  %tmp11 = or i32 %tmp3, %tmp10
+  ret i32 %tmp11
+}
+; CHECK:    shrl %cl, %{{[a-z0-9]+}}
+; CHECK:    shll %cl, %{{[a-z0-9]+}}
+
+define i32 @norolword(i32 %nBits_arg, i16 %x_arg) nounwind readnone {
+entry:
+  %tmp1 = zext i16 %x_arg to i32
+  %tmp3 = shl i32 %tmp1, %nBits_arg
+  %tmp8 = sub i32 16, %nBits_arg
+  %tmp10 = lshr i32 %tmp1, %tmp8
+  %tmp11 = or i32 %tmp3, %tmp10
+  ret i32 %tmp11
+}
+; CHECK:    shll %cl, %{{[a-z0-9]+}}
+; CHECK:    shrl %cl, %{{[a-z0-9]+}}
+
+define i32 @nororword(i32 %nBits_arg, i16 %x_arg) nounwind readnone {
+entry:
+  %tmp1 = zext i16 %x_arg to i32
+  %tmp3 = lshr i32 %tmp1, %nBits_arg
+  %tmp8 = sub i32 16, %nBits_arg
+  %tmp10 = shl i32 %tmp1, %tmp8
+  %tmp11 = or i32 %tmp3, %tmp10
+  ret i32 %tmp11
+}
+; CHECK:    shrl %cl, %{{[a-z0-9]+}}
+; CHECK:    shll %cl, %{{[a-z0-9]+}}
+
+define i64 @noroldword(i64 %nBits_arg, i32 %x_arg) nounwind readnone {
 entry:
   %tmp1 = zext i32 %x_arg to i64
   %tmp3 = shl i64 %tmp1, %nBits_arg
   %tmp8 = sub i64 32, %nBits_arg
   %tmp10 = lshr i64 %tmp1, %tmp8
   %tmp11 = or i64 %tmp3, %tmp10
+  %tmp12 = trunc i64 %tmp11 to i32
   ret i64 %tmp11
 }
-; CHECK:    roll %cl, %{{[a-z0-9]+}}
+; CHECK:    shlq %cl, %{{[a-z0-9]+}}
+; CHECK:    shrq %cl, %{{[a-z0-9]+}}
 
-define zeroext i64 @rordword(i64 %nBits_arg, i32 %x_arg) nounwind readnone {
+define i64 @norordword(i64 %nBits_arg, i32 %x_arg) nounwind readnone {
 entry:
   %tmp1 = zext i32 %x_arg to i64
   %tmp3 = lshr i64 %tmp1, %nBits_arg
@@ -73,4 +127,5 @@ entry:
   %tmp11 = or i64 %tmp3, %tmp10
   ret i64 %tmp11
 }
-; CHECK:    rorl %cl, %{{[a-z0-9]+}}
+; CHECK:    shrq %cl, %{{[a-z0-9]+}}
+; CHECK:    shlq %cl, %{{[a-z0-9]+}}