[llvm] 1d7604f - [InstCombine] Simplify bswap -> shift
Paweł Bylica via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 20 16:25:58 PST 2022
Author: Pawe Bylica
Date: 2022-01-21T01:25:30+01:00
New Revision: 1d7604fdcebda9cc431b7f19c2f4cb769efdca02
URL: https://github.com/llvm/llvm-project/commit/1d7604fdcebda9cc431b7f19c2f4cb769efdca02
DIFF: https://github.com/llvm/llvm-project/commit/1d7604fdcebda9cc431b7f19c2f4cb769efdca02.diff
LOG: [InstCombine] Simplify bswap -> shift
Simplify bswap(x) to shl(x) or lshr(x) if x has exactly one
"active byte", i.e. all active bits are contained in boundaries
of a single byte of x.
https://alive2.llvm.org/ce/z/nvbbU5
https://alive2.llvm.org/ce/z/KiiL3J
Reviewed By: spatel, craig.topper, lebedev.ri
Differential Revision: https://reviews.llvm.org/D117680
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/test/Transforms/InstCombine/bswap-fold.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index ad97e7265404c..e3a9e806abdba 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1215,6 +1215,21 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Value *IIOperand = II->getArgOperand(0);
Value *X = nullptr;
+ KnownBits Known = computeKnownBits(IIOperand, 0, II);
+ uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
+ uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
+
+ // bswap(x) -> shift(x) if x has exactly one "active byte"
+ if (Known.getBitWidth() - LZ - TZ == 8) {
+ assert(LZ != TZ && "active byte cannot be in the middle");
+ if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
+ return BinaryOperator::CreateNUWShl(
+ IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
+ // -> lshr(x) if the "active byte" is in the high part of x
+ return BinaryOperator::CreateExactLShr(
+ IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
+ }
+
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
unsigned C = X->getType()->getScalarSizeInBits() -
diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll
index 9ffae93bbbc7b..47083b815e9fe 100644
--- a/llvm/test/Transforms/InstCombine/bswap-fold.ll
+++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll
@@ -358,9 +358,8 @@ define i64 @bs_and64i_multiuse(i64 %a, i64 %b) #0 {
define i64 @bs_active_high8(i64 %0) {
; CHECK-LABEL: @bs_active_high8(
-; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56
-; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
-; CHECK-NEXT: ret i64 [[TMP3]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255
+; CHECK-NEXT: ret i64 [[TMP2]]
;
%2 = shl i64 %0, 56
%3 = call i64 @llvm.bswap.i64(i64 %2)
@@ -369,8 +368,8 @@ define i64 @bs_active_high8(i64 %0) {
define i32 @bs_active_high7(i32 %0) {
; CHECK-LABEL: @bs_active_high7(
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP0:%.*]], -33554432
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 24
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 254
; CHECK-NEXT: ret i32 [[TMP3]]
;
%2 = and i32 %0, -33554432 ; 0xfe000000
@@ -380,8 +379,8 @@ define i32 @bs_active_high7(i32 %0) {
define <2 x i64> @bs_active_high4(<2 x i64> %0) {
; CHECK-LABEL: @bs_active_high4(
-; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 60, i64 60>
-; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 4, i64 4>
+; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], <i64 240, i64 240>
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%2 = shl <2 x i64> %0, <i64 60, i64 60>
@@ -392,7 +391,7 @@ define <2 x i64> @bs_active_high4(<2 x i64> %0) {
define <2 x i64> @bs_active_high_
diff erent(<2 x i64> %0) {
; CHECK-LABEL: @bs_active_high_
diff erent(
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 56, i64 57>
-; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <2 x i64> [[TMP2]], <i64 56, i64 56>
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%2 = shl <2 x i64> %0, <i64 56, i64 57>
@@ -427,7 +426,7 @@ define <2 x i64> @bs_active_high_undef(<2 x i64> %0) {
define i64 @bs_active_high8_multiuse(i64 %0) {
; CHECK-LABEL: @bs_active_high8_multiuse(
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56
-; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP0]], 255
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret i64 [[TMP4]]
;
@@ -440,7 +439,7 @@ define i64 @bs_active_high8_multiuse(i64 %0) {
define i64 @bs_active_high7_multiuse(i64 %0) {
; CHECK-LABEL: @bs_active_high7_multiuse(
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 57
-; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT: [[TMP3:%.*]] = lshr exact i64 [[TMP2]], 56
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret i64 [[TMP4]]
;
@@ -452,8 +451,8 @@ define i64 @bs_active_high7_multiuse(i64 %0) {
define i64 @bs_active_byte_6h(i64 %0) {
; CHECK-LABEL: @bs_active_byte_6h(
-; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 280375465082880
-; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 24
+; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 16711680
; CHECK-NEXT: ret i64 [[TMP3]]
;
%2 = and i64 %0, 280375465082880 ; 0xff00'00000000
@@ -463,8 +462,8 @@ define i64 @bs_active_byte_6h(i64 %0) {
define i32 @bs_active_byte_3h(i32 %0) {
; CHECK-LABEL: @bs_active_byte_3h(
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 393216
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 8
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1536
; CHECK-NEXT: ret i32 [[TMP3]]
;
%2 = and i32 %0, 393216 ; 0x0006'0000
@@ -475,7 +474,7 @@ define i32 @bs_active_byte_3h(i32 %0) {
define <2 x i32> @bs_active_byte_3h_v2(<2 x i32> %0) {
; CHECK-LABEL: @bs_active_byte_3h_v2(
; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 8388608, i32 65536>
-; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <2 x i32> [[TMP2]], <i32 8, i32 8>
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
;
%2 = and <2 x i32> %0, <i32 8388608, i32 65536> ; 0x0080'0000, 0x0001'0000
@@ -498,8 +497,8 @@ define i64 @bs_active_byte_78h(i64 %0) {
define i16 @bs_active_low1(i16 %0) {
; CHECK-LABEL: @bs_active_low1(
-; CHECK-NEXT: [[TMP2:%.*]] = lshr i16 [[TMP0:%.*]], 15
-; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i16 [[TMP0:%.*]], 7
+; CHECK-NEXT: [[TMP3:%.*]] = and i16 [[TMP2]], 256
; CHECK-NEXT: ret i16 [[TMP3]]
;
%2 = lshr i16 %0, 15
@@ -509,9 +508,8 @@ define i16 @bs_active_low1(i16 %0) {
define <2 x i32> @bs_active_low8(<2 x i32> %0) {
; CHECK-LABEL: @bs_active_low8(
-; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 255, i32 255>
-; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
-; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP0:%.*]], <i32 24, i32 24>
+; CHECK-NEXT: ret <2 x i32> [[TMP2]]
;
%2 = and <2 x i32> %0, <i32 255, i32 255>
%3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
@@ -521,7 +519,7 @@ define <2 x i32> @bs_active_low8(<2 x i32> %0) {
define <2 x i32> @bs_active_low_
diff erent(<2 x i32> %0) {
; CHECK-LABEL: @bs_active_low_
diff erent(
; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 2, i32 128>
-; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP2]], <i32 24, i32 24>
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
;
%2 = and <2 x i32> %0, <i32 2, i32 128>
@@ -556,7 +554,7 @@ define <2 x i32> @bs_active_low_undef(<2 x i32> %0) {
define i64 @bs_active_low8_multiuse(i64 %0) {
; CHECK-LABEL: @bs_active_low8_multiuse(
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255
-; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 56
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret i64 [[TMP4]]
;
@@ -569,7 +567,7 @@ define i64 @bs_active_low8_multiuse(i64 %0) {
define i64 @bs_active_low7_multiuse(i64 %0) {
; CHECK-LABEL: @bs_active_low7_multiuse(
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 127
-; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 56
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret i64 [[TMP4]]
;
@@ -581,8 +579,8 @@ define i64 @bs_active_low7_multiuse(i64 %0) {
define i64 @bs_active_byte_4l(i64 %0) {
; CHECK-LABEL: @bs_active_byte_4l(
-; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 1140850688
-; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 8
+; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 292057776128
; CHECK-NEXT: ret i64 [[TMP3]]
;
%2 = and i64 %0, 1140850688 ; 0x44000000
@@ -592,8 +590,8 @@ define i64 @bs_active_byte_4l(i64 %0) {
define i32 @bs_active_byte_2l(i32 %0) {
; CHECK-LABEL: @bs_active_byte_2l(
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 65280
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 8
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 16711680
; CHECK-NEXT: ret i32 [[TMP3]]
;
%2 = and i32 %0, 65280 ; 0xff00
@@ -604,7 +602,7 @@ define i32 @bs_active_byte_2l(i32 %0) {
define <2 x i64> @bs_active_byte_2l_v2(<2 x i64> %0) {
; CHECK-LABEL: @bs_active_byte_2l_v2(
; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP0:%.*]], <i64 256, i64 65280>
-; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <2 x i64> [[TMP2]], <i64 40, i64 40>
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%2 = and <2 x i64> %0, <i64 256, i64 65280> ; 0x0100, 0xff00
More information about the llvm-commits
mailing list