[llvm] [InstCombine] Resolve TODO: Remove one-time check if other logic operand (Y) is constant (PR #77973)

via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 13 15:28:45 PST 2024


https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/77973

>From e76f81ebd0a3fef3e11b6001457235042ffb3091 Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Fri, 12 Jan 2024 15:15:27 -0500
Subject: [PATCH] [Transforms] Remove one-time check if other logic operand (Y)
 is constant

By using match(W, m_ImmConstant()), we do not need to worry about one-time use anymore.
---
 .../InstCombine/InstCombineShifts.cpp         |  17 +-
 .../Transforms/InstCombine/shift-logic.ll     | 397 ++++++++++++++++++
 2 files changed, 408 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index b7958978c450c9..e7dea910937a77 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -368,12 +368,17 @@ static Instruction *foldShiftOfShiftedBinOp(BinaryOperator &I,
 
   // Find a matching one-use shift by constant. The fold is not valid if the sum
   // of the shift values equals or exceeds bitwidth.
-  // TODO: Remove the one-use check if the other logic operand (Y) is constant.
   Value *X, *Y;
-  auto matchFirstShift = [&](Value *V) {
+  auto matchFirstShift = [&](Value *V, Value *W) {
     APInt Threshold(Ty->getScalarSizeInBits(), Ty->getScalarSizeInBits());
-    return match(V,
-                 m_OneUse(m_BinOp(ShiftOpcode, m_Value(X), m_Constant(C0)))) &&
+    bool matchFirst;
+    if (match(W, m_ImmConstant()))
+      matchFirst = match(V, (m_BinOp(ShiftOpcode, m_Value(X), m_Constant(C0))));
+    else
+      matchFirst =
+          match(V, m_OneUse(m_BinOp(ShiftOpcode, m_Value(X), m_Constant(C0))));
+
+    return matchFirst &&
            match(ConstantExpr::getAdd(C0, C1),
                  m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold));
   };
@@ -382,9 +387,9 @@ static Instruction *foldShiftOfShiftedBinOp(BinaryOperator &I,
   // is not so we cannot reoder if we match operand(1) and need to keep the
   // operands in their original positions.
   bool FirstShiftIsOp1 = false;
-  if (matchFirstShift(BinInst->getOperand(0)))
+  if (matchFirstShift(BinInst->getOperand(0), BinInst->getOperand(1)))
     Y = BinInst->getOperand(1);
-  else if (matchFirstShift(BinInst->getOperand(1))) {
+  else if (matchFirstShift(BinInst->getOperand(1), BinInst->getOperand(0))) {
     Y = BinInst->getOperand(0);
     FirstShiftIsOp1 = BinInst->getOpcode() == Instruction::Sub;
   } else
diff --git a/llvm/test/Transforms/InstCombine/shift-logic.ll b/llvm/test/Transforms/InstCombine/shift-logic.ll
index 544694d398431e..0286023026b0dd 100644
--- a/llvm/test/Transforms/InstCombine/shift-logic.ll
+++ b/llvm/test/Transforms/InstCombine/shift-logic.ll
@@ -16,6 +16,19 @@ define i8 @shl_and(i8 %x, i8 %y) {
   ret i8 %sh1
 }
 
+define i8 @shl_and_non_imm(i8 %x, i8 %y) {
+; CHECK-LABEL: @shl_and_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = shl i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[SH0]], [[Y]]
+; CHECK-NEXT:    [[SH1:%.*]] = shl i8 [[R]], 2
+; CHECK-NEXT:    ret i8 [[SH1]]
+;
+  %sh0 = shl i8 %x, %y
+  %r = and i8 %sh0, %y
+  %sh1 = shl i8 %r, 2
+  ret i8 %sh1
+}
+
 define <2 x i8> @shl_and_nonuniform(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @shl_and_nonuniform(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 4>
@@ -44,6 +57,21 @@ define i16 @shl_or(i16 %x, i16 %py) {
   ret i16 %sh1
 }
 
+define i16 @shl_or_non_imm(i16 %x, i16 %py) {
+; CHECK-LABEL: @shl_or_non_imm(
+; CHECK-NEXT:    [[Y:%.*]] = srem i16 [[PY:%.*]], 42
+; CHECK-NEXT:    [[SH0:%.*]] = shl i16 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = or i16 [[Y]], [[SH0]]
+; CHECK-NEXT:    [[SH1:%.*]] = shl i16 [[R]], 7
+; CHECK-NEXT:    ret i16 [[SH1]]
+;
+  %y = srem i16 %py, 42 ; thwart complexity-based canonicalization
+  %sh0 = shl i16 %x, %y
+  %r = or i16 %y, %sh0
+  %sh1 = shl i16 %r, 7
+  ret i16 %sh1
+}
+
 define <2 x i16> @shl_or_undef(<2 x i16> %x, <2 x i16> %py) {
 ; CHECK-LABEL: @shl_or_undef(
 ; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i16> [[PY:%.*]], <i16 42, i16 42>
@@ -59,6 +87,21 @@ define <2 x i16> @shl_or_undef(<2 x i16> %x, <2 x i16> %py) {
   ret <2 x i16> %sh1
 }
 
+define <2 x i16> @shl_or_undef_non_imm(<2 x i16> %x, <2 x i16> %py) {
+; CHECK-LABEL: @shl_or_undef_non_imm(
+; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i16> [[PY:%.*]], <i16 42, i16 42>
+; CHECK-NEXT:    [[SH0:%.*]] = shl <2 x i16> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i16> [[Y]], [[SH0]]
+; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i16> [[R]], <i16 7, i16 undef>
+; CHECK-NEXT:    ret <2 x i16> [[SH1]]
+;
+  %y = srem <2 x i16> %py, <i16 42, i16 42> ; thwart complexity-based canonicalization
+  %sh0 = shl <2 x i16> %x, %y
+  %r = or <2 x i16> %y, %sh0
+  %sh1 = shl <2 x i16> %r, <i16 7, i16 undef>
+  ret <2 x i16> %sh1
+}
+
 define i32 @shl_xor(i32 %x, i32 %y) {
 ; CHECK-LABEL: @shl_xor(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 12
@@ -72,6 +115,19 @@ define i32 @shl_xor(i32 %x, i32 %y) {
   ret i32 %sh1
 }
 
+define i32 @shl_xor_non_imm(i32 %x, i32 %y) {
+; CHECK-LABEL: @shl_xor_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i32 [[SH0]], [[Y]]
+; CHECK-NEXT:    [[SH1:%.*]] = shl i32 [[R]], 7
+; CHECK-NEXT:    ret i32 [[SH1]]
+;
+  %sh0 = shl i32 %x, %y
+  %r = xor i32 %sh0, %y
+  %sh1 = shl i32 %r, 7
+  ret i32 %sh1
+}
+
 define <2 x i32> @shl_xor_nonuniform(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @shl_xor_nonuniform(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 12, i32 14>
@@ -85,6 +141,19 @@ define <2 x i32> @shl_xor_nonuniform(<2 x i32> %x, <2 x i32> %y) {
   ret <2 x i32> %sh1
 }
 
+define <2 x i32> @shl_xor_nonuniform_non_imm(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @shl_xor_nonuniform_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = shl <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i32> [[SH0]], [[Y]]
+; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i32> [[R]], <i32 7, i32 8>
+; CHECK-NEXT:    ret <2 x i32> [[SH1]]
+;
+  %sh0 = shl <2 x i32> %x, %y
+  %r = xor <2 x i32> %sh0, %y
+  %sh1 = shl <2 x i32> %r, <i32 7, i32 8>
+  ret <2 x i32> %sh1
+}
+
 define i64 @lshr_and(i64 %x, i64 %py) {
 ; CHECK-LABEL: @lshr_and(
 ; CHECK-NEXT:    [[Y:%.*]] = srem i64 [[PY:%.*]], 42
@@ -100,6 +169,21 @@ define i64 @lshr_and(i64 %x, i64 %py) {
   ret i64 %sh1
 }
 
+define i64 @lshr_and_non_imm(i64 %x, i64 %py) {
+; CHECK-LABEL: @lshr_and_non_imm(
+; CHECK-NEXT:    [[Y:%.*]] = srem i64 [[PY:%.*]], 42
+; CHECK-NEXT:    [[SH0:%.*]] = lshr i64 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = and i64 [[Y]], [[SH0]]
+; CHECK-NEXT:    [[SH1:%.*]] = lshr i64 [[R]], 7
+; CHECK-NEXT:    ret i64 [[SH1]]
+;
+  %y = srem i64 %py, 42 ; thwart complexity-based canonicalization
+  %sh0 = lshr i64 %x, %y
+  %r = and i64 %y, %sh0
+  %sh1 = lshr i64 %r, 7
+  ret i64 %sh1
+}
+
 define <2 x i64> @lshr_and_undef(<2 x i64> %x, <2 x i64> %py) {
 ; CHECK-LABEL: @lshr_and_undef(
 ; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42>
@@ -115,6 +199,21 @@ define <2 x i64> @lshr_and_undef(<2 x i64> %x, <2 x i64> %py) {
   ret <2 x i64> %sh1
 }
 
+define <2 x i64> @lshr_and_undef_non_imm(<2 x i64> %x, <2 x i64> %py) {
+; CHECK-LABEL: @lshr_and_undef_non_imm(
+; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42>
+; CHECK-NEXT:    [[SH0:%.*]] = lshr <2 x i64> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i64> [[Y]], [[SH0]]
+; CHECK-NEXT:    [[SH1:%.*]] = lshr <2 x i64> [[R]], <i64 7, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> [[SH1]]
+;
+  %y = srem <2 x i64> %py, <i64 42, i64 42> ; thwart complexity-based canonicalization
+  %sh0 = lshr <2 x i64> %x, %y
+  %r = and <2 x i64> %y, %sh0
+  %sh1 = lshr <2 x i64> %r, <i64 7, i64 undef>
+  ret <2 x i64> %sh1
+}
+
 define <4 x i32> @lshr_or(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @lshr_or(
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[X:%.*]], <i32 12, i32 12, i32 12, i32 12>
@@ -128,6 +227,19 @@ define <4 x i32> @lshr_or(<4 x i32> %x, <4 x i32> %y) {
   ret <4 x i32> %sh1
 }
 
+define <4 x i32> @lshr_or_non_imm(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @lshr_or_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or <4 x i32> [[SH0]], [[Y]]
+; CHECK-NEXT:    [[SH1:%.*]] = lshr <4 x i32> [[R]], <i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[SH1]]
+;
+  %sh0 = lshr <4 x i32> %x, %y
+  %r = or <4 x i32> %sh0, %y
+  %sh1 = lshr <4 x i32> %r, <i32 7, i32 7, i32 7, i32 7>
+  ret <4 x i32> %sh1
+}
+
 define <8 x i16> @lshr_xor(<8 x i16> %x, <8 x i16> %py) {
 ; CHECK-LABEL: @lshr_xor(
 ; CHECK-NEXT:    [[Y:%.*]] = srem <8 x i16> [[PY:%.*]], <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
@@ -143,6 +255,21 @@ define <8 x i16> @lshr_xor(<8 x i16> %x, <8 x i16> %py) {
   ret <8 x i16> %sh1
 }
 
+define <8 x i16> @lshr_xor_non_imm(<8 x i16> %x, <8 x i16> %py) {
+; CHECK-LABEL: @lshr_xor_non_imm(
+; CHECK-NEXT:    [[Y:%.*]] = srem <8 x i16> [[PY:%.*]], <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[X:%.*]], <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i16> [[Y]], <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:    [[SH1:%.*]] = xor <8 x i16> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <8 x i16> [[SH1]]
+;
+  %y = srem <8 x i16> %py, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 -42> ; thwart complexity-based canonicalization
+  %sh0 = lshr <8 x i16> %x, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  %r = xor <8 x i16> %y, %sh0
+  %sh1 = lshr <8 x i16> %r, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <8 x i16> %sh1
+}
+
 define <16 x i8> @ashr_and(<16 x i8> %x, <16 x i8> %py, <16 x i8> %pz) {
 ; CHECK-LABEL: @ashr_and(
 ; CHECK-NEXT:    [[Y:%.*]] = srem <16 x i8> [[PY:%.*]], [[PZ:%.*]]
@@ -158,6 +285,21 @@ define <16 x i8> @ashr_and(<16 x i8> %x, <16 x i8> %py, <16 x i8> %pz) {
   ret <16 x i8> %sh1
 }
 
+define <16 x i8> @ashr_and_non_imm(<16 x i8> %x, <16 x i8> %py, <16 x i8> %pz) {
+; CHECK-LABEL: @ashr_and_non_imm(
+; CHECK-NEXT:    [[Y:%.*]] = srem <16 x i8> [[PY:%.*]], [[PZ:%.*]]
+; CHECK-NEXT:    [[SH0:%.*]] = ashr <16 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = and <16 x i8> [[Y]], [[SH0]]
+; CHECK-NEXT:    [[SH1:%.*]] = ashr <16 x i8> [[R]], <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEXT:    ret <16 x i8> [[SH1]]
+;
+  %y = srem <16 x i8> %py, %pz ; thwart complexity-based canonicalization
+  %sh0 = ashr <16 x i8> %x, %y
+  %r = and <16 x i8> %y, %sh0
+  %sh1 = ashr <16 x i8> %r, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+  ret <16 x i8> %sh1
+}
+
 define <2 x i64> @ashr_or(<2 x i64> %x, <2 x i64> %y) {
 ; CHECK-LABEL: @ashr_or(
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[X:%.*]], <i64 12, i64 12>
@@ -171,6 +313,19 @@ define <2 x i64> @ashr_or(<2 x i64> %x, <2 x i64> %y) {
   ret <2 x i64> %sh1
 }
 
+define <2 x i64> @ashr_or_non_imm(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @ashr_or_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = ashr <2 x i64> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i64> [[SH0]], [[Y]]
+; CHECK-NEXT:    [[SH1:%.*]] = ashr <2 x i64> [[R]], <i64 7, i64 7>
+; CHECK-NEXT:    ret <2 x i64> [[SH1]]
+;
+  %sh0 = ashr <2 x i64> %x, %y
+  %r = or <2 x i64> %sh0, %y
+  %sh1 = ashr <2 x i64> %r, <i64 7, i64 7>
+  ret <2 x i64> %sh1
+}
+
 define i32 @ashr_xor(i32 %x, i32 %py) {
 ; CHECK-LABEL: @ashr_xor(
 ; CHECK-NEXT:    [[Y:%.*]] = srem i32 [[PY:%.*]], 42
@@ -186,6 +341,21 @@ define i32 @ashr_xor(i32 %x, i32 %py) {
   ret i32 %sh1
 }
 
+define i32 @ashr_xor_non_imm(i32 %x, i32 %py) {
+; CHECK-LABEL: @ashr_xor_non_imm(
+; CHECK-NEXT:    [[Y:%.*]] = srem i32 [[PY:%.*]], 42
+; CHECK-NEXT:    [[SH0:%.*]] = ashr i32 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = xor i32 [[Y]], [[SH0]]
+; CHECK-NEXT:    [[SH1:%.*]] = ashr i32 [[R]], 7
+; CHECK-NEXT:    ret i32 [[SH1]]
+;
+  %y = srem i32 %py, 42 ; thwart complexity-based canonicalization
+  %sh0 = ashr i32 %x, %y
+  %r = xor i32 %y, %sh0
+  %sh1 = ashr i32 %r, 7
+  ret i32 %sh1
+}
+
 define i32 @shr_mismatch_xor(i32 %x, i32 %y) {
 ; CHECK-LABEL: @shr_mismatch_xor(
 ; CHECK-NEXT:    [[SH0:%.*]] = ashr i32 [[X:%.*]], 5
@@ -199,6 +369,19 @@ define i32 @shr_mismatch_xor(i32 %x, i32 %y) {
   ret i32 %sh1
 }
 
+define i32 @shr_mismatch_xor_non_imm(i32 %x, i32 %y) {
+; CHECK-LABEL: @shr_mismatch_xor_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = ashr i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i32 [[SH0]], [[Y]]
+; CHECK-NEXT:    [[SH1:%.*]] = lshr i32 [[R]], 7
+; CHECK-NEXT:    ret i32 [[SH1]]
+;
+  %sh0 = ashr i32 %x, %y
+  %r = xor i32 %y, %sh0
+  %sh1 = lshr i32 %r, 7
+  ret i32 %sh1
+}
+
 define i32 @ashr_overshift_xor(i32 %x, i32 %y) {
 ; CHECK-LABEL: @ashr_overshift_xor(
 ; CHECK-NEXT:    [[SH0:%.*]] = ashr i32 [[X:%.*]], 15
@@ -212,6 +395,19 @@ define i32 @ashr_overshift_xor(i32 %x, i32 %y) {
   ret i32 %sh1
 }
 
+define i32 @ashr_overshift_xor_non_imm(i32 %x, i32 %y) {
+; CHECK-LABEL: @ashr_overshift_xor_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = ashr i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i32 [[SH0]], [[Y]]
+; CHECK-NEXT:    [[SH1:%.*]] = ashr i32 [[R]], 17
+; CHECK-NEXT:    ret i32 [[SH1]]
+;
+  %sh0 = ashr i32 %x, %y
+  %r = xor i32 %y, %sh0
+  %sh1 = ashr i32 %r, 17
+  ret i32 %sh1
+}
+
 define <2 x i32> @ashr_undef_undef_xor(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @ashr_undef_undef_xor(
 ; CHECK-NEXT:    [[SH0:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 15, i32 undef>
@@ -225,6 +421,19 @@ define <2 x i32> @ashr_undef_undef_xor(<2 x i32> %x, <2 x i32> %y) {
   ret <2 x i32> %sh1
 }
 
+define <2 x i32> @ashr_undef_undef_xor_non_imm(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @ashr_undef_undef_xor_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = ashr <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i32> [[SH0]], [[Y]]
+; CHECK-NEXT:    [[SH1:%.*]] = ashr <2 x i32> [[R]], <i32 undef, i32 17>
+; CHECK-NEXT:    ret <2 x i32> [[SH1]]
+;
+  %sh0 = ashr <2 x i32> %x, %y
+  %r = xor <2 x i32> %y, %sh0
+  %sh1 = ashr <2 x i32> %r, <i32 undef, i32 17>
+  ret <2 x i32> %sh1
+}
+
 define i32 @lshr_or_extra_use(i32 %x, i32 %y, ptr %p) {
 ; CHECK-LABEL: @lshr_or_extra_use(
 ; CHECK-NEXT:    [[SH0:%.*]] = lshr i32 [[X:%.*]], 5
@@ -240,6 +449,21 @@ define i32 @lshr_or_extra_use(i32 %x, i32 %y, ptr %p) {
   ret i32 %sh1
 }
 
+define i32 @lshr_or_extra_use_non_imm(i32 %x, i32 %y, ptr %p) {
+; CHECK-LABEL: @lshr_or_extra_use_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or i32 [[SH0]], [[Y]]
+; CHECK-NEXT:    store i32 [[R]], ptr [[P:%.*]], align 4
+; CHECK-NEXT:    [[SH1:%.*]] = lshr i32 [[R]], 7
+; CHECK-NEXT:    ret i32 [[SH1]]
+;
+  %sh0 = lshr i32 %x, %y
+  %r = or i32 %y, %sh0
+  store i32 %r, ptr %p
+  %sh1 = lshr i32 %r, 7
+  ret i32 %sh1
+}
+
 ; Avoid crashing on constant expressions.
 
 @g = external global i32
@@ -346,6 +570,19 @@ define i8 @shl_add(i8 %x, i8 %y) {
   ret i8 %sh1
 }
 
+define i8 @shl_add_non_imm(i8 %x, i8 %y) {
+; CHECK-LABEL: @shl_add_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = shl i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[SH0]], [[Y]]
+; CHECK-NEXT:    [[SH1:%.*]] = shl i8 [[R]], 2
+; CHECK-NEXT:    ret i8 [[SH1]]
+;
+  %sh0 = shl i8 %x, %y
+  %r = add i8 %y, %sh0
+  %sh1 = shl i8 %r, 2
+  ret i8 %sh1
+}
+
 define <2 x i8> @shl_add_nonuniform(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @shl_add_nonuniform(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 4>
@@ -359,6 +596,18 @@ define <2 x i8> @shl_add_nonuniform(<2 x i8> %x, <2 x i8> %y) {
   ret <2 x i8> %sh1
 }
 
+define <2 x i8> @shl_add_nonuniform_non_imm(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @shl_add_nonuniform_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = shl <2 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[SH0]], [[Y]]
+; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i8> [[R]], <i8 2, i8 0>
+; CHECK-NEXT:    ret <2 x i8> [[SH1]]
+;
+  %sh0 = shl <2 x i8> %x, %y
+  %r = add <2 x i8> %y, %sh0
+  %sh1 = shl <2 x i8> %r, <i8 2, i8 0>
+  ret <2 x i8> %sh1
+}
 
 define <2 x i64> @shl_add_undef(<2 x i64> %x, <2 x i64> %py) {
 ; CHECK-LABEL: @shl_add_undef(
@@ -375,6 +624,20 @@ define <2 x i64> @shl_add_undef(<2 x i64> %x, <2 x i64> %py) {
   ret <2 x i64> %sh1
 }
 
+define <2 x i64> @shl_add_undef_non_imm(<2 x i64> %x, <2 x i64> %py) {
+; CHECK-LABEL: @shl_add_undef_non_imm(
+; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42>
+; CHECK-NEXT:    [[SH0:%.*]] = shl <2 x i64> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i64> [[Y]], [[SH0]]
+; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i64> [[R]], <i64 7, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> [[SH1]]
+;
+  %y = srem <2 x i64> %py, <i64 42, i64 42> ; thwart complexity-based canonicalization
+  %sh0 = shl <2 x i64> %x, %y
+  %r = add <2 x i64> %y, %sh0
+  %sh1 = shl <2 x i64> %r, <i64 7, i64 undef>
+  ret <2 x i64> %sh1
+}
 
 define i8 @lshr_add(i8 %x, i8 %y) {
 ; CHECK-LABEL: @lshr_add(
@@ -389,6 +652,19 @@ define i8 @lshr_add(i8 %x, i8 %y) {
   ret i8 %sh1
 }
 
+define i8 @lshr_add_new_non_imm(i8 %x, i8 %y) {
+; CHECK-LABEL: @lshr_add_new_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = lshr i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[SH0]], [[Y]]
+; CHECK-NEXT:    [[SH1:%.*]] = lshr i8 [[R]], 2
+; CHECK-NEXT:    ret i8 [[SH1]]
+;
+  %sh0 = lshr i8 %x, %y
+  %r = add i8 %y, %sh0
+  %sh1 = lshr i8 %r, 2
+  ret i8 %sh1
+}
+
 define <2 x i8> @lshr_add_nonuniform(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @lshr_add_nonuniform(
 ; CHECK-NEXT:    [[SH0:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 3, i8 4>
@@ -402,6 +678,19 @@ define <2 x i8> @lshr_add_nonuniform(<2 x i8> %x, <2 x i8> %y) {
   ret <2 x i8> %sh1
 }
 
+define <2 x i8> @lshr_add_nonuniform_non_imm(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @lshr_add_nonuniform_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[SH0]], [[Y]]
+; CHECK-NEXT:    [[SH1:%.*]] = lshr <2 x i8> [[R]], <i8 2, i8 0>
+; CHECK-NEXT:    ret <2 x i8> [[SH1]]
+;
+  %sh0 = lshr <2 x i8> %x, %y
+  %r = add <2 x i8> %y, %sh0
+  %sh1 = lshr <2 x i8> %r, <i8 2, i8 0>
+  ret <2 x i8> %sh1
+}
+
 define <2 x i64> @lshr_add_undef(<2 x i64> %x, <2 x i64> %py) {
 ; CHECK-LABEL: @lshr_add_undef(
 ; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42>
@@ -417,6 +706,21 @@ define <2 x i64> @lshr_add_undef(<2 x i64> %x, <2 x i64> %py) {
   ret <2 x i64> %sh1
 }
 
+define <2 x i64> @lshr_add_undef_non_imm(<2 x i64> %x, <2 x i64> %py) {
+; CHECK-LABEL: @lshr_add_undef_non_imm(
+; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42>
+; CHECK-NEXT:    [[SH0:%.*]] = lshr <2 x i64> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i64> [[Y]], [[SH0]]
+; CHECK-NEXT:    [[SH1:%.*]] = lshr <2 x i64> [[R]], <i64 7, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> [[SH1]]
+;
+  %y = srem <2 x i64> %py, <i64 42, i64 42> ; thwart complexity-based canonicalization
+  %sh0 = lshr <2 x i64> %x, %y
+  %r = add <2 x i64> %y, %sh0
+  %sh1 = lshr <2 x i64> %r, <i64 7, i64 undef>
+  ret <2 x i64> %sh1
+}
+
 define i8 @shl_sub(i8 %x, i8 %y) {
 ; CHECK-LABEL: @shl_sub(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 [[X:%.*]], 5
@@ -430,6 +734,19 @@ define i8 @shl_sub(i8 %x, i8 %y) {
   ret i8 %sh1
 }
 
+define i8 @shl_sub_non_imm(i8 %x, i8 %y) {
+; CHECK-LABEL: @shl_sub_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = shl i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sub i8 [[Y]], [[SH0]]
+; CHECK-NEXT:    [[SH1:%.*]] = shl i8 [[R]], 2
+; CHECK-NEXT:    ret i8 [[SH1]]
+;
+  %sh0 = shl i8 %x, %y
+  %r = sub i8 %y, %sh0
+  %sh1 = shl i8 %r, 2
+  ret i8 %sh1
+}
+
 ; Make sure we don't commute operands for sub
 define i8 @shl_sub_no_commute(i8 %x, i8 %y) {
 ; CHECK-LABEL: @shl_sub_no_commute(
@@ -444,6 +761,19 @@ define i8 @shl_sub_no_commute(i8 %x, i8 %y) {
   ret i8 %sh1
 }
 
+define i8 @shl_sub_no_commute_non_imm(i8 %x, i8 %y, i8 %z, i8 %w) {
+; CHECK-LABEL: @shl_sub_no_commute_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = shl i8 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sub i8 [[X:%.*]], [[SH0]]
+; CHECK-NEXT:    [[SH1:%.*]] = shl i8 [[R]], [[W:%.*]]
+; CHECK-NEXT:    ret i8 [[SH1]]
+;
+  %sh0 = shl i8 %y, %z
+  %r = sub i8 %x, %sh0
+  %sh1 = shl i8 %r, %w
+  ret i8 %sh1
+}
+
 define <2 x i8> @shl_sub_nonuniform(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @shl_sub_nonuniform(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 4>
@@ -457,6 +787,18 @@ define <2 x i8> @shl_sub_nonuniform(<2 x i8> %x, <2 x i8> %y) {
   ret <2 x i8> %sh1
 }
 
+define <2 x i8> @shl_sub_nonuniform_non_imm(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @shl_sub_nonuniform_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = shl <2 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sub <2 x i8> [[Y]], [[SH0]]
+; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i8> [[R]], <i8 2, i8 0>
+; CHECK-NEXT:    ret <2 x i8> [[SH1]]
+;
+  %sh0 = shl <2 x i8> %x, %y
+  %r = sub <2 x i8> %y, %sh0
+  %sh1 = shl <2 x i8> %r, <i8 2, i8 0>
+  ret <2 x i8> %sh1
+}
 
 define <2 x i64> @shl_sub_undef(<2 x i64> %x, <2 x i64> %py) {
 ; CHECK-LABEL: @shl_sub_undef(
@@ -473,6 +815,20 @@ define <2 x i64> @shl_sub_undef(<2 x i64> %x, <2 x i64> %py) {
   ret <2 x i64> %sh1
 }
 
+define <2 x i64> @shl_sub_undef_non_imm(<2 x i64> %x, <2 x i64> %py) {
+; CHECK-LABEL: @shl_sub_undef_non_imm(
+; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42>
+; CHECK-NEXT:    [[SH0:%.*]] = shl <2 x i64> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = sub <2 x i64> [[Y]], [[SH0]]
+; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i64> [[R]], <i64 7, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> [[SH1]]
+;
+  %y = srem <2 x i64> %py, <i64 42, i64 42> ; thwart complexity-based canonicalization
+  %sh0 = shl <2 x i64> %x, %y
+  %r = sub <2 x i64> %y, %sh0
+  %sh1 = shl <2 x i64> %r, <i64 7, i64 undef>
+  ret <2 x i64> %sh1
+}
 
 define i8 @lshr_sub(i8 %x, i8 %y) {
 ; CHECK-LABEL: @lshr_sub(
@@ -487,6 +843,19 @@ define i8 @lshr_sub(i8 %x, i8 %y) {
   ret i8 %sh1
 }
 
+define i8 @lshr_sub_non_imm(i8 %x, i8 %y) {
+; CHECK-LABEL: @lshr_sub_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = lshr i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sub i8 [[Y]], [[SH0]]
+; CHECK-NEXT:    [[SH1:%.*]] = lshr i8 [[R]], 2
+; CHECK-NEXT:    ret i8 [[SH1]]
+;
+  %sh0 = lshr i8 %x, %y
+  %r = sub i8 %y, %sh0
+  %sh1 = lshr i8 %r, 2
+  ret i8 %sh1
+}
+
 define <2 x i8> @lshr_sub_nonuniform(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @lshr_sub_nonuniform(
 ; CHECK-NEXT:    [[SH0:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 3, i8 4>
@@ -500,6 +869,19 @@ define <2 x i8> @lshr_sub_nonuniform(<2 x i8> %x, <2 x i8> %y) {
   ret <2 x i8> %sh1
 }
 
+define <2 x i8> @lshr_sub_nonuniform_non_imm(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @lshr_sub_nonuniform_non_imm(
+; CHECK-NEXT:    [[SH0:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sub <2 x i8> [[Y]], [[SH0]]
+; CHECK-NEXT:    [[SH1:%.*]] = lshr <2 x i8> [[R]], <i8 2, i8 0>
+; CHECK-NEXT:    ret <2 x i8> [[SH1]]
+;
+  %sh0 = lshr <2 x i8> %x, %y
+  %r = sub <2 x i8> %y, %sh0
+  %sh1 = lshr <2 x i8> %r, <i8 2, i8 0>
+  ret <2 x i8> %sh1
+}
+
 define <2 x i64> @lshr_sub_undef(<2 x i64> %x, <2 x i64> %py) {
 ; CHECK-LABEL: @lshr_sub_undef(
 ; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42>
@@ -514,3 +896,18 @@ define <2 x i64> @lshr_sub_undef(<2 x i64> %x, <2 x i64> %py) {
   %sh1 = lshr <2 x i64> %r, <i64 7, i64 undef>
   ret <2 x i64> %sh1
 }
+
+define <2 x i64> @lshr_sub_undef_non_imm(<2 x i64> %x, <2 x i64> %py) {
+; CHECK-LABEL: @lshr_sub_undef_non_imm(
+; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42>
+; CHECK-NEXT:    [[SH0:%.*]] = lshr <2 x i64> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = sub <2 x i64> [[Y]], [[SH0]]
+; CHECK-NEXT:    [[SH1:%.*]] = lshr <2 x i64> [[R]], <i64 7, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> [[SH1]]
+;
+  %y = srem <2 x i64> %py, <i64 42, i64 42> ; thwart complexity-based canonicalization
+  %sh0 = lshr <2 x i64> %x, %y
+  %r = sub <2 x i64> %y, %sh0
+  %sh1 = lshr <2 x i64> %r, <i64 7, i64 undef>
+  ret <2 x i64> %sh1
+}



More information about the llvm-commits mailing list