[llvm] faa35fc - [DAG] Fix issue with rot(rot(x,c1),c2) -> rot(x,c1+c2) fold with unnormalized rotation amounts

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue May 3 09:16:36 PDT 2022


Author: Simon Pilgrim
Date: 2022-05-03T17:16:26+01:00
New Revision: faa35fc873700b6d3e61e652c887b82d1cea7c0c

URL: https://github.com/llvm/llvm-project/commit/faa35fc873700b6d3e61e652c887b82d1cea7c0c
DIFF: https://github.com/llvm/llvm-project/commit/faa35fc873700b6d3e61e652c887b82d1cea7c0c.diff

LOG: [DAG] Fix issue with rot(rot(x,c1),c2) -> rot(x,c1+c2) fold with unnormalized rotation amounts

Don't assume the rotation amounts have been correctly normalized - do it as part of the constant folding.

Also, the normalization should be performed with UREM not SREM.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/X86/combine-rotates.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e483c3a1fdf86..679466be954ed 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8746,7 +8746,9 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
   }
 
   unsigned NextOp = N0.getOpcode();
-  // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
+
+  // fold (rot* (rot* x, c2), c1)
+  //   -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize)) % bitsize)
   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
@@ -8754,14 +8756,19 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
       EVT ShiftVT = C1->getValueType(0);
       bool SameSide = (N->getOpcode() == NextOp);
       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
-      if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
-              CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
-        SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
-        SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
-            ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
-        return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
-                           CombinedShiftNorm);
-      }
+      SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
+      SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT, 
+                                                 {N1, BitsizeC});
+      SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
+                                                 {N0.getOperand(1), BitsizeC});
+      if (Norm1 && Norm2)
+        if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
+                CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
+          SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
+              ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
+          return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
+                             CombinedShiftNorm);
+        }
     }
   }
   return SDValue();

diff  --git a/llvm/test/CodeGen/X86/combine-rotates.ll b/llvm/test/CodeGen/X86/combine-rotates.ll
index ba775fca649a0..dee22d10d8fcb 100644
--- a/llvm/test/CodeGen/X86/combine-rotates.ll
+++ b/llvm/test/CodeGen/X86/combine-rotates.ll
@@ -435,12 +435,16 @@ define i32 @fuzz9935() {
   ret i32 %4
 }
 
-; FIXME: Failure to modulo the inner rotation before adding the results
+; Ensure we normalize the inner rotation before adding the results.
 define i5 @rotl_merge_i5(i5 %x) {
 ; CHECK-LABEL: rotl_merge_i5:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal (,%rdi,4), %ecx
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    andb $24, %al
+; CHECK-NEXT:    shrb $3, %al
+; CHECK-NEXT:    orb %cl, %al
 ; CHECK-NEXT:    retq
   %r1 = call i5 @llvm.fshl.i5(i5 %x, i5 %x, i5 -1)
   %r2 = call i5 @llvm.fshl.i5(i5 %r1, i5 %r1, i5 1)


        


More information about the llvm-commits mailing list