[llvm] aacfe2b - [InstCombine] recognizeBSwapOrBitReverseIdiom - add vector support
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 3 08:36:01 PDT 2020
Author: Simon Pilgrim
Date: 2020-10-03T16:26:46+01:00
New Revision: aacfe2be53d441d256091b2b495875a69fc2f285
URL: https://github.com/llvm/llvm-project/commit/aacfe2be53d441d256091b2b495875a69fc2f285
DIFF: https://github.com/llvm/llvm-project/commit/aacfe2be53d441d256091b2b495875a69fc2f285.diff
LOG: [InstCombine] recognizeBSwapOrBitReverseIdiom - add vector support
Add basic vector handling to recognizeBSwapOrBitReverseIdiom/collectBitParts - this works at the element level, all vector element operations must match (splat constants etc.) and there is no cross-element support (insert/extract/shuffle etc.).
Added:
Modified:
llvm/lib/Transforms/Utils/Local.cpp
llvm/test/Transforms/InstCombine/bswap.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index d17ce2fb47c8..eea347aa8fe6 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -2803,7 +2803,7 @@ struct BitPart {
/// Analyze the specified subexpression and see if it is capable of providing
/// pieces of a bswap or bitreverse. The subexpression provides a potential
-/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in
+/// piece of a bswap or bitreverse if it can be proved that each non-zero bit in
/// the output of the expression came from a corresponding bit in some other
/// value. This function is recursive, and the end result is a mapping of
/// bitnumber to bitnumber. It is the caller's responsibility to validate that
@@ -2815,6 +2815,10 @@ struct BitPart {
/// BitPart is returned with Provider set to %X and Provenance[24-31] set to
/// [0-7].
///
+/// For vector types, all analysis is performed at the per-element level. No
+/// cross-element analysis is supported (shuffle/insertion/reduction), and all
+/// constant masks must be splatted across all elements.
+///
/// To avoid revisiting values, the BitPart results are memoized into the
/// provided map. To avoid unnecessary copying of BitParts, BitParts are
/// constructed in-place in the \c BPS map. Because of this \c BPS needs to
@@ -3019,14 +3023,14 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
return false;
if (!MatchBSwaps && !MatchBitReversals)
return false;
- IntegerType *ITy = dyn_cast<IntegerType>(I->getType());
- if (!ITy || ITy->getBitWidth() > 128)
- return false; // Can't do vectors or integers > 128 bits.
+ Type *ITy = I->getType();
+ if (!ITy->isIntOrIntVectorTy() || ITy->getScalarSizeInBits() > 128)
+ return false; // Can't do integer/elements > 128 bits.
- IntegerType *DemandedTy = ITy;
+ Type *DemandedTy = ITy;
if (I->hasOneUse())
if (auto *Trunc = dyn_cast<TruncInst>(I->user_back()))
- DemandedTy = cast<IntegerType>(Trunc->getType());
+ DemandedTy = Trunc->getType();
// Try to find all the pieces corresponding to the bswap.
std::map<Value *, Optional<BitPart>> BPS;
@@ -3044,12 +3048,14 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
BitProvenance = BitProvenance.drop_back();
if (BitProvenance.empty())
return false; // TODO - handle null value?
- DemandedTy = IntegerType::get(I->getContext(), BitProvenance.size());
+ DemandedTy = Type::getIntNTy(I->getContext(), BitProvenance.size());
+ if (auto *IVecTy = dyn_cast<VectorType>(ITy))
+ DemandedTy = VectorType::get(DemandedTy, IVecTy);
}
// Check BitProvenance hasn't found a source larger than the result type.
- unsigned DemandedBW = DemandedTy->getBitWidth();
- if (DemandedBW > ITy->getBitWidth())
+ unsigned DemandedBW = DemandedTy->getScalarSizeInBits();
+ if (DemandedBW > ITy->getScalarSizeInBits())
return false;
// Now, is the bit permutation correct for a bswap or a bitreverse? We can
diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll
index d6f079250488..effbc66499c0 100644
--- a/llvm/test/Transforms/InstCombine/bswap.ll
+++ b/llvm/test/Transforms/InstCombine/bswap.ll
@@ -22,15 +22,7 @@ define i32 @test1(i32 %i) {
define <2 x i32> @test1_vector(<2 x i32> %i) {
; CHECK-LABEL: @test1_vector(
-; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[I:%.*]], <i32 24, i32 24>
-; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[I]], <i32 8, i32 8>
-; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T3]], <i32 65280, i32 65280>
-; CHECK-NEXT: [[T5:%.*]] = or <2 x i32> [[T1]], [[T4]]
-; CHECK-NEXT: [[T7:%.*]] = shl <2 x i32> [[I]], <i32 8, i32 8>
-; CHECK-NEXT: [[T8:%.*]] = and <2 x i32> [[T7]], <i32 16711680, i32 16711680>
-; CHECK-NEXT: [[T9:%.*]] = or <2 x i32> [[T5]], [[T8]]
-; CHECK-NEXT: [[T11:%.*]] = shl <2 x i32> [[I]], <i32 24, i32 24>
-; CHECK-NEXT: [[T12:%.*]] = or <2 x i32> [[T9]], [[T11]]
+; CHECK-NEXT: [[T12:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[I:%.*]])
; CHECK-NEXT: ret <2 x i32> [[T12]]
;
%t1 = lshr <2 x i32> %i, <i32 24, i32 24>
@@ -64,15 +56,7 @@ define i32 @test2(i32 %arg) {
define <2 x i32> @test2_vector(<2 x i32> %arg) {
; CHECK-LABEL: @test2_vector(
-; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[ARG:%.*]], <i32 24, i32 24>
-; CHECK-NEXT: [[T4:%.*]] = shl <2 x i32> [[ARG]], <i32 8, i32 8>
-; CHECK-NEXT: [[T5:%.*]] = and <2 x i32> [[T4]], <i32 16711680, i32 16711680>
-; CHECK-NEXT: [[T6:%.*]] = or <2 x i32> [[T2]], [[T5]]
-; CHECK-NEXT: [[T8:%.*]] = lshr <2 x i32> [[ARG]], <i32 8, i32 8>
-; CHECK-NEXT: [[T9:%.*]] = and <2 x i32> [[T8]], <i32 65280, i32 65280>
-; CHECK-NEXT: [[T10:%.*]] = or <2 x i32> [[T6]], [[T9]]
-; CHECK-NEXT: [[T12:%.*]] = lshr <2 x i32> [[ARG]], <i32 24, i32 24>
-; CHECK-NEXT: [[T14:%.*]] = or <2 x i32> [[T10]], [[T12]]
+; CHECK-NEXT: [[T14:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[ARG:%.*]])
; CHECK-NEXT: ret <2 x i32> [[T14]]
;
%t2 = shl <2 x i32> %arg, <i32 24, i32 24>
@@ -225,15 +209,7 @@ define i32 @test6(i32 %x) nounwind readnone {
define <2 x i32> @test6_vector(<2 x i32> %x) nounwind readnone {
; CHECK-LABEL: @test6_vector(
-; CHECK-NEXT: [[T:%.*]] = shl <2 x i32> [[X:%.*]], <i32 16, i32 16>
-; CHECK-NEXT: [[X_MASK:%.*]] = and <2 x i32> [[X]], <i32 65280, i32 65280>
-; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[X]], <i32 16, i32 16>
-; CHECK-NEXT: [[T2:%.*]] = and <2 x i32> [[T1]], <i32 255, i32 255>
-; CHECK-NEXT: [[T3:%.*]] = or <2 x i32> [[X_MASK]], [[T]]
-; CHECK-NEXT: [[T4:%.*]] = or <2 x i32> [[T3]], [[T2]]
-; CHECK-NEXT: [[T5:%.*]] = shl <2 x i32> [[T4]], <i32 8, i32 8>
-; CHECK-NEXT: [[T6:%.*]] = lshr <2 x i32> [[X]], <i32 24, i32 24>
-; CHECK-NEXT: [[T7:%.*]] = or <2 x i32> [[T5]], [[T6]]
+; CHECK-NEXT: [[T7:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]])
; CHECK-NEXT: ret <2 x i32> [[T7]]
;
%t = shl <2 x i32> %x, <i32 16, i32 16>
@@ -381,12 +357,9 @@ define i16 @test10(i32 %a) {
define <2 x i16> @test10_vector(<2 x i32> %a) {
; CHECK-LABEL: @test10_vector(
-; CHECK-NEXT: [[SHR1:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 8, i32 8>
-; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[SHR1]], <i32 255, i32 255>
-; CHECK-NEXT: [[AND2:%.*]] = shl <2 x i32> [[A]], <i32 8, i32 8>
-; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[AND1]], [[AND2]]
-; CHECK-NEXT: [[CONV:%.*]] = trunc <2 x i32> [[OR]] to <2 x i16>
-; CHECK-NEXT: ret <2 x i16> [[CONV]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i32> [[A:%.*]] to <2 x i16>
+; CHECK-NEXT: [[REV:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[TRUNC]])
+; CHECK-NEXT: ret <2 x i16> [[REV]]
;
%shr1 = lshr <2 x i32> %a, <i32 8, i32 8>
%and1 = and <2 x i32> %shr1, <i32 255, i32 255>
@@ -457,12 +430,10 @@ define i64 @PR39793_bswap_u64_as_u16(i64 %0) {
define <2 x i64> @PR39793_bswap_u64_as_u16_vector(<2 x i64> %0) {
; CHECK-LABEL: @PR39793_bswap_u64_as_u16_vector(
-; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP0:%.*]], <i64 8, i64 8>
-; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], <i64 255, i64 255>
-; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i64> [[TMP0]], <i64 8, i64 8>
-; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP4]], <i64 65280, i64 65280>
-; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i64> [[TMP3]], [[TMP5]]
-; CHECK-NEXT: ret <2 x i64> [[TMP6]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i64> [[TMP0:%.*]] to <2 x i16>
+; CHECK-NEXT: [[REV:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[TRUNC]])
+; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[REV]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%2 = lshr <2 x i64> %0, <i64 8, i64 8>
%3 = and <2 x i64> %2, <i64 255, i64 255>
@@ -550,14 +521,8 @@ declare i32 @llvm.bswap.i32(i32)
define <2 x i32> @partial_bswap_vector(<2 x i32> %x) {
; CHECK-LABEL: @partial_bswap_vector(
-; CHECK-NEXT: [[X3:%.*]] = shl <2 x i32> [[X:%.*]], <i32 24, i32 24>
-; CHECK-NEXT: [[A2:%.*]] = shl <2 x i32> [[X]], <i32 8, i32 8>
-; CHECK-NEXT: [[X2:%.*]] = and <2 x i32> [[A2]], <i32 16711680, i32 16711680>
-; CHECK-NEXT: [[X32:%.*]] = or <2 x i32> [[X3]], [[X2]]
-; CHECK-NEXT: [[T1:%.*]] = and <2 x i32> [[X]], <i32 -65536, i32 -65536>
-; CHECK-NEXT: [[T2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[T1]])
-; CHECK-NEXT: [[R:%.*]] = or <2 x i32> [[X32]], [[T2]]
-; CHECK-NEXT: ret <2 x i32> [[R]]
+; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]])
+; CHECK-NEXT: ret <2 x i32> [[TMP1]]
;
%x3 = shl <2 x i32> %x, <i32 24, i32 24>
%a2 = shl <2 x i32> %x, <i32 8, i32 8>
More information about the llvm-commits
mailing list