[llvm] 0364721 - Revert rG3d14a1e982ad27 - "[InstCombine] recognizeBSwapOrBitReverseIdiom - support for 'partial' bswap patterns (PR47191)"
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 2 10:17:31 PDT 2020
Author: Simon Pilgrim
Date: 2020-10-02T18:17:14+01:00
New Revision: 0364721e3ef2edca318e27b9453adf977911dbb1
URL: https://github.com/llvm/llvm-project/commit/0364721e3ef2edca318e27b9453adf977911dbb1
DIFF: https://github.com/llvm/llvm-project/commit/0364721e3ef2edca318e27b9453adf977911dbb1.diff
LOG: Revert rG3d14a1e982ad27 - "[InstCombine] recognizeBSwapOrBitReverseIdiom - support for 'partial' bswap patterns (PR47191)"
This reverts commit 3d14a1e982ad27111346471564d575ad5efc6419.
This is breaking on some 2stage clang buildbots
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
llvm/lib/Transforms/Utils/Local.cpp
llvm/test/Transforms/InstCombine/bswap.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index edb2dc8881c7..cbc3f5a2532f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2046,18 +2046,29 @@ Instruction *InstCombinerImpl::matchBSwap(BinaryOperator &Or) {
Op1 = Ext->getOperand(0);
// (A | B) | C and A | (B | C) -> bswap if possible.
- bool OrWithOrs = match(Op0, m_Or(m_Value(), m_Value())) ||
- match(Op1, m_Or(m_Value(), m_Value()));
-
- // (A >> B) | C and (A << B) | C -> bswap if possible.
- bool OrWithShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) ||
- match(Op1, m_LogicalShift(m_Value(), m_Value()));
-
- // (A & B) | C and A | (B & C) -> bswap if possible.
- bool OrWithAnds = match(Op0, m_And(m_Value(), m_Value())) ||
- match(Op1, m_And(m_Value(), m_Value()));
-
- if (!OrWithOrs && !OrWithShifts && !OrWithAnds)
+ bool OrOfOrs = match(Op0, m_Or(m_Value(), m_Value())) ||
+ match(Op1, m_Or(m_Value(), m_Value()));
+
+ // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
+ bool OrOfShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
+ match(Op1, m_LogicalShift(m_Value(), m_Value()));
+
+ // (A & B) | (C & D) -> bswap if possible.
+ bool OrOfAnds = match(Op0, m_And(m_Value(), m_Value())) &&
+ match(Op1, m_And(m_Value(), m_Value()));
+
+ // (A << B) | (C & D) -> bswap if possible.
+ // The bigger pattern here is ((A & C1) << C2) | ((B >> C2) & C1), which is a
+ // part of the bswap idiom for specific values of C1, C2 (e.g. C1 = 16711935,
+ // C2 = 8 for i32).
+ // This pattern can occur when the operands of the 'or' are not canonicalized
+ // for some reason (not having only one use, for example).
+ bool OrOfAndAndSh = (match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
+ match(Op1, m_And(m_Value(), m_Value()))) ||
+ (match(Op0, m_And(m_Value(), m_Value())) &&
+ match(Op1, m_LogicalShift(m_Value(), m_Value())));
+
+ if (!OrOfOrs && !OrOfShifts && !OrOfAnds && !OrOfAndAndSh)
return nullptr;
SmallVector<Instruction*, 4> Insts;
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 0c27d803946e..0fd0dfa24ce9 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -2940,24 +2940,6 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
return Result;
}
- // BSWAP - most likely due to us previous matching a partial bswap.
- if (match(V, m_BSwap(m_Value(X)))) {
- const auto &Res =
- collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
- if (!Res)
- return Result;
-
- unsigned ByteWidth = BitWidth / 8;
- Result = BitPart(Res->Provider, BitWidth);
- for (unsigned ByteIdx = 0; ByteIdx < ByteWidth; ++ByteIdx) {
- unsigned ByteBitOfs = ByteIdx * 8;
- for (unsigned BitIdx = 0; BitIdx < 8; ++BitIdx)
- Result->Provenance[(BitWidth - 8 - ByteBitOfs) + BitIdx] =
- Res->Provenance[ByteBitOfs + BitIdx];
- }
- return Result;
- }
-
// Funnel 'double' shifts take 3 operands, 2 inputs and the shift
// amount (modulo).
// fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
@@ -3050,15 +3032,10 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
// Now, is the bit permutation correct for a bswap or a bitreverse? We can
// only byteswap values with an even number of bytes.
unsigned DemandedBW = DemandedTy->getBitWidth();
- APInt DemandedMask = APInt::getAllOnesValue(DemandedBW);
bool OKForBSwap = MatchBSwaps && (DemandedBW % 16) == 0;
bool OKForBitReverse = MatchBitReversals;
for (unsigned BitIdx = 0;
(BitIdx < DemandedBW) && (OKForBSwap || OKForBitReverse); ++BitIdx) {
- if (BitProvenance[BitIdx] == BitPart::Unset) {
- DemandedMask.clearBit(BitIdx);
- continue;
- }
OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[BitIdx], BitIdx,
DemandedBW);
OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[BitIdx],
@@ -3073,6 +3050,7 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
else
return false;
+ Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy);
Value *Provider = Res->Provider;
// We may need to truncate the provider.
@@ -3083,19 +3061,12 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
Provider = Trunc;
}
- Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy);
- Instruction *Result = CallInst::Create(F, Provider, "rev", I);
- InsertedInsts.push_back(Result);
-
- if (!DemandedMask.isAllOnesValue()) {
- auto *Mask = ConstantInt::get(DemandedTy, DemandedMask);
- Result = BinaryOperator::Create(Instruction::And, Result, Mask, "mask", I);
- InsertedInsts.push_back(Result);
- }
+ auto *CI = CallInst::Create(F, Provider, "rev", I);
+ InsertedInsts.push_back(CI);
// We may need to zeroextend back to the result type.
- if (ITy != Result->getType()) {
- auto *ExtInst = CastInst::Create(Instruction::ZExt, Result, ITy, "zext", I);
+ if (ITy != CI->getType()) {
+ auto *ExtInst = CastInst::Create(Instruction::ZExt, CI, ITy, "zext", I);
InsertedInsts.push_back(ExtInst);
}
diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll
index d6f079250488..aac34178efd4 100644
--- a/llvm/test/Transforms/InstCombine/bswap.ll
+++ b/llvm/test/Transforms/InstCombine/bswap.ll
@@ -534,8 +534,14 @@ define i8 @PR39793_bswap_u32_as_u16_trunc(i32 %0) {
define i32 @partial_bswap(i32 %x) {
; CHECK-LABEL: @partial_bswap(
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]])
-; CHECK-NEXT: ret i32 [[TMP1]]
+; CHECK-NEXT: [[X3:%.*]] = shl i32 [[X:%.*]], 24
+; CHECK-NEXT: [[A2:%.*]] = shl i32 [[X]], 8
+; CHECK-NEXT: [[X2:%.*]] = and i32 [[A2]], 16711680
+; CHECK-NEXT: [[X32:%.*]] = or i32 [[X3]], [[X2]]
+; CHECK-NEXT: [[T1:%.*]] = and i32 [[X]], -65536
+; CHECK-NEXT: [[T2:%.*]] = call i32 @llvm.bswap.i32(i32 [[T1]])
+; CHECK-NEXT: [[R:%.*]] = or i32 [[X32]], [[T2]]
+; CHECK-NEXT: ret i32 [[R]]
;
%x3 = shl i32 %x, 24
%a2 = shl i32 %x, 8
@@ -572,9 +578,10 @@ declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
define i64 @bswap_and_mask_0(i64 %0) {
; CHECK-LABEL: @bswap_and_mask_0(
-; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], -72057594037927681
-; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
-; CHECK-NEXT: ret i64 [[TMP3]]
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56
+; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret i64 [[TMP4]]
;
%2 = lshr i64 %0, 56
%3 = shl i64 %0, 56
@@ -599,9 +606,13 @@ define i64 @bswap_and_mask_1(i64 %0) {
define i64 @bswap_and_mask_2(i64 %0) {
; CHECK-LABEL: @bswap_and_mask_2(
-; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], -72057594037862401
-; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
-; CHECK-NEXT: ret i64 [[TMP3]]
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56
+; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP0]], 40
+; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 71776119061217280
+; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], [[TMP6]]
+; CHECK-NEXT: ret i64 [[TMP7]]
;
%2 = lshr i64 %0, 56
%3 = shl i64 %0, 56
@@ -724,8 +735,28 @@ define i32 @funnel_binary(i32 %abcd) {
define i64 @PR47191_problem1(i64 %0) {
; CHECK-LABEL: @PR47191_problem1(
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]])
-; CHECK-NEXT: ret i64 [[TMP2]]
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56
+; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40
+; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24
+; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680
+; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8
+; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080
+; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56
+; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40
+; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280
+; CHECK-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 24
+; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 280375465082880
+; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP9]], [[TMP2]]
+; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP4]]
+; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP6]]
+; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[TMP8]]
+; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP17]], [[TMP11]]
+; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[TMP13]]
+; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP0]], 8
+; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1095216660480
+; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP19]], [[TMP21]]
+; CHECK-NEXT: ret i64 [[TMP22]]
;
%2 = lshr i64 %0, 56
%3 = lshr i64 %0, 40
@@ -753,8 +784,28 @@ define i64 @PR47191_problem1(i64 %0) {
define i64 @PR47191_problem2(i64 %0) {
; CHECK-LABEL: @PR47191_problem2(
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]])
-; CHECK-NEXT: ret i64 [[TMP2]]
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56
+; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40
+; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24
+; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680
+; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8
+; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080
+; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56
+; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40
+; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280
+; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP2]]
+; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], [[TMP4]]
+; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP13]], [[TMP6]]
+; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP8]]
+; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP11]]
+; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP0]], 24
+; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 280375465082880
+; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP0]], 8
+; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 1095216660480
+; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP20]], [[TMP18]]
+; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], [[TMP16]]
+; CHECK-NEXT: ret i64 [[TMP22]]
;
%2 = lshr i64 %0, 56
%3 = lshr i64 %0, 40
@@ -782,8 +833,28 @@ define i64 @PR47191_problem2(i64 %0) {
define i64 @PR47191_problem3(i64 %0) {
; CHECK-LABEL: @PR47191_problem3(
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]])
-; CHECK-NEXT: ret i64 [[TMP2]]
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56
+; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40
+; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24
+; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680
+; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8
+; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080
+; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56
+; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40
+; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280
+; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP2]]
+; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], [[TMP4]]
+; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP13]], [[TMP6]]
+; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP8]]
+; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP11]]
+; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP0]], 24
+; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 280375465082880
+; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP0]], 8
+; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 1095216660480
+; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP20]], [[TMP18]]
+; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], [[TMP16]]
+; CHECK-NEXT: ret i64 [[TMP22]]
;
%2 = lshr i64 %0, 56
%3 = lshr i64 %0, 40
@@ -811,8 +882,28 @@ define i64 @PR47191_problem3(i64 %0) {
define i64 @PR47191_problem4(i64 %0) {
; CHECK-LABEL: @PR47191_problem4(
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]])
-; CHECK-NEXT: ret i64 [[TMP2]]
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56
+; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 40
+; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 65280
+; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP0]], 40
+; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 71776119061217280
+; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP7]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP0]], 24
+; CHECK-NEXT: [[TMP12:%.*]] = and i64 [[TMP11]], 16711680
+; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP10]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP0]], 24
+; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 280375465082880
+; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP13]], [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = lshr i64 [[TMP0]], 8
+; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 4278190080
+; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP16]], [[TMP18]]
+; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP0]], 8
+; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1095216660480
+; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP19]], [[TMP21]]
+; CHECK-NEXT: ret i64 [[TMP22]]
;
%2 = lshr i64 %0, 56
%3 = shl i64 %0, 56
More information about the llvm-commits
mailing list