[PATCH][DAG] PR16726: extend rol/ror matching
Kai Nacke
kai.nacke at redstar.de
Sun Sep 15 11:00:14 PDT 2013
Ping.
On 10.09.2013 01:26, Kai Nacke wrote:
> Hi!
>
> C-like languages promote types like unsigned short to unsigned int
> before performing an arithmetic operation. The rotate matcher in the
> DAGCombiner does not consider this situation.
> The attached patch extends the DAGCombiner in the way that the pattern
>
> (or (shl ([az]ext x), (*ext y)), (srl ([az]ext x), (*ext (sub 32, y))))
>
> is folded into
>
> ([az]ext (rotl x, y))
>
> The matching is restricted to aext and zext because in this cases the
> upper bits are either undefined or known. Test case is included.
>
> This fixes PR16726.
>
> Please review.
>
> Regards
> Kai
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 67f3f06..ec0ba44 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3337,6 +3337,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
unsigned OpSizeInBits = VT.getSizeInBits();
SDValue LHSShiftArg = LHSShift.getOperand(0);
SDValue LHSShiftAmt = LHSShift.getOperand(1);
+ SDValue RHSShiftArg = RHSShift.getOperand(0);
SDValue RHSShiftAmt = RHSShift.getOperand(1);
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
@@ -3420,6 +3421,23 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
LHSShiftArg,
HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
+ else if (LHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
+ LHSShiftArg.getOpcode() == ISD::ANY_EXTEND) {
+ // fold (or (shl (*ext x), (*ext y)),
+ // (srl (*ext x), (*ext (sub 32, y)))) ->
+ // (*ext (rotl x, y))
+ // fold (or (shl (*ext x), (*ext y)),
+ // (srl (*ext x), (*ext (sub 32, y)))) ->
+ // (*ext (rotr x, (sub 32, y)))
+ SDValue LArgExtOp0 = LHSShiftArg.getOperand(0);
+ EVT LArgVT = LArgExtOp0.getValueType();
+ if (LArgVT.getSizeInBits() == SUBC->getAPIntValue()) {
+ SDValue V = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, LArgVT,
+ LArgExtOp0,
+ HasROTL ? LHSShiftAmt : RHSShiftAmt);
+ return DAG.getNode(LHSShiftArg.getOpcode(), DL, VT, V).getNode();
+ }
+ }
} else if (LExtOp0.getOpcode() == ISD::SUB &&
RExtOp0 == LExtOp0.getOperand(1)) {
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
@@ -3432,6 +3450,23 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,
LHSShiftArg,
HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
+ else if (RHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
+ RHSShiftArg.getOpcode() == ISD::ANY_EXTEND) {
+ // fold (or (shl (*ext x), (*ext (sub 32, y))),
+ // (srl (*ext x), (*ext y))) ->
+ // (*ext (rotl x, y))
+ // fold (or (shl (*ext x), (*ext (sub 32, y))),
+ // (srl (*ext x), (*ext y))) ->
+ // (*ext (rotr x, (sub 32, y)))
+ SDValue RArgExtOp0 = RHSShiftArg.getOperand(0);
+ EVT RArgVT = RArgExtOp0.getValueType();
+ if (RArgVT.getSizeInBits() == SUBC->getAPIntValue()) {
+ SDValue V = DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, RArgVT,
+ RArgExtOp0,
+ HasROTR ? RHSShiftAmt : LHSShiftAmt);
+ return DAG.getNode(RHSShiftArg.getOpcode(), DL, VT, V).getNode();
+ }
+ }
}
}
diff --git a/test/CodeGen/X86/rotate3.ll b/test/CodeGen/X86/rotate3.ll
new file mode 100644
index 0000000..b92f7c2
--- /dev/null
+++ b/test/CodeGen/X86/rotate3.ll
@@ -0,0 +1,76 @@
+; Check that (or (shl x, y), (srl x, (sub 32, y))) is folded into (rotl x, y)
+; and (or (shl x, (sub 32, y)), (srl x, r)) into (rotr x, y) even if the
+; argument is zero extended. Fix for PR16726.
+
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+define zeroext i8 @rolbyte(i32 %nBits_arg, i8 %x_arg) nounwind readnone {
+entry:
+ %tmp1 = zext i8 %x_arg to i32
+ %tmp3 = shl i32 %tmp1, %nBits_arg
+ %tmp8 = sub i32 8, %nBits_arg
+ %tmp10 = lshr i32 %tmp1, %tmp8
+ %tmp11 = or i32 %tmp3, %tmp10
+ %tmp12 = trunc i32 %tmp11 to i8
+ ret i8 %tmp12
+}
+; CHECK: rolb %cl, %{{[a-z0-9]+}}
+
+
+define zeroext i8 @rorbyte(i32 %nBits_arg, i8 %x_arg) nounwind readnone {
+entry:
+ %tmp1 = zext i8 %x_arg to i32
+ %tmp3 = lshr i32 %tmp1, %nBits_arg
+ %tmp8 = sub i32 8, %nBits_arg
+ %tmp10 = shl i32 %tmp1, %tmp8
+ %tmp11 = or i32 %tmp3, %tmp10
+ %tmp12 = trunc i32 %tmp11 to i8
+ ret i8 %tmp12
+}
+; CHECK: rorb %cl, %{{[a-z0-9]+}}
+
+define zeroext i16 @rolword(i32 %nBits_arg, i16 %x_arg) nounwind readnone {
+entry:
+ %tmp1 = zext i16 %x_arg to i32
+ %tmp3 = shl i32 %tmp1, %nBits_arg
+ %tmp8 = sub i32 16, %nBits_arg
+ %tmp10 = lshr i32 %tmp1, %tmp8
+ %tmp11 = or i32 %tmp3, %tmp10
+ %tmp12 = trunc i32 %tmp11 to i16
+ ret i16 %tmp12
+}
+; CHECK: rolw %cl, %{{[a-z0-9]+}}
+
+define zeroext i16 @rorword(i32 %nBits_arg, i16 %x_arg) nounwind readnone {
+entry:
+ %tmp1 = zext i16 %x_arg to i32
+ %tmp3 = lshr i32 %tmp1, %nBits_arg
+ %tmp8 = sub i32 16, %nBits_arg
+ %tmp10 = shl i32 %tmp1, %tmp8
+ %tmp11 = or i32 %tmp3, %tmp10
+ %tmp12 = trunc i32 %tmp11 to i16
+ ret i16 %tmp12
+}
+; CHECK: rorw %cl, %{{[a-z0-9]+}}
+
+define i64 @roldword(i64 %nBits_arg, i32 %x_arg) nounwind readnone {
+entry:
+ %tmp1 = zext i32 %x_arg to i64
+ %tmp3 = shl i64 %tmp1, %nBits_arg
+ %tmp8 = sub i64 32, %nBits_arg
+ %tmp10 = lshr i64 %tmp1, %tmp8
+ %tmp11 = or i64 %tmp3, %tmp10
+ ret i64 %tmp11
+}
+; CHECK: roll %cl, %{{[a-z0-9]+}}
+
+define zeroext i64 @rordword(i64 %nBits_arg, i32 %x_arg) nounwind readnone {
+entry:
+ %tmp1 = zext i32 %x_arg to i64
+ %tmp3 = lshr i64 %tmp1, %nBits_arg
+ %tmp8 = sub i64 32, %nBits_arg
+ %tmp10 = shl i64 %tmp1, %tmp8
+ %tmp11 = or i64 %tmp3, %tmp10
+ ret i64 %tmp11
+}
+; CHECK: rorl %cl, %{{[a-z0-9]+}}
More information about the llvm-commits
mailing list