[llvm] 1d7604f - [InstCombine] Simplify bswap -> shift

Paweł Bylica via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 20 16:25:58 PST 2022


Author: Pawe Bylica
Date: 2022-01-21T01:25:30+01:00
New Revision: 1d7604fdcebda9cc431b7f19c2f4cb769efdca02

URL: https://github.com/llvm/llvm-project/commit/1d7604fdcebda9cc431b7f19c2f4cb769efdca02
DIFF: https://github.com/llvm/llvm-project/commit/1d7604fdcebda9cc431b7f19c2f4cb769efdca02.diff

LOG: [InstCombine] Simplify bswap -> shift

Simplify bswap(x) to shl(x) or lshr(x) if x has exactly one
"active byte", i.e. all active bits are contained in boundaries
of a single byte of x.

https://alive2.llvm.org/ce/z/nvbbU5
https://alive2.llvm.org/ce/z/KiiL3J

Reviewed By: spatel, craig.topper, lebedev.ri

Differential Revision: https://reviews.llvm.org/D117680

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/test/Transforms/InstCombine/bswap-fold.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index ad97e7265404c..e3a9e806abdba 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1215,6 +1215,21 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     Value *IIOperand = II->getArgOperand(0);
     Value *X = nullptr;
 
+    KnownBits Known = computeKnownBits(IIOperand, 0, II);
+    uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
+    uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
+
+    // bswap(x) -> shift(x) if x has exactly one "active byte"
+    if (Known.getBitWidth() - LZ - TZ == 8) {
+      assert(LZ != TZ && "active byte cannot be in the middle");
+      if (LZ > TZ)  // -> shl(x) if the "active byte" is in the low part of x
+        return BinaryOperator::CreateNUWShl(
+            IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
+      // -> lshr(x) if the "active byte" is in the high part of x
+      return BinaryOperator::CreateExactLShr(
+            IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
+    }
+
     // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
     if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
       unsigned C = X->getType()->getScalarSizeInBits() -

diff  --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll
index 9ffae93bbbc7b..47083b815e9fe 100644
--- a/llvm/test/Transforms/InstCombine/bswap-fold.ll
+++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll
@@ -358,9 +358,8 @@ define i64 @bs_and64i_multiuse(i64 %a, i64 %b) #0 {
 
 define i64 @bs_active_high8(i64 %0) {
 ; CHECK-LABEL: @bs_active_high8(
-; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255
+; CHECK-NEXT:    ret i64 [[TMP2]]
 ;
   %2 = shl i64 %0, 56
   %3 = call i64 @llvm.bswap.i64(i64 %2)
@@ -369,8 +368,8 @@ define i64 @bs_active_high8(i64 %0) {
 
 define i32 @bs_active_high7(i32 %0) {
 ; CHECK-LABEL: @bs_active_high7(
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP0:%.*]], -33554432
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 24
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 254
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %2 = and i32 %0, -33554432  ; 0xfe000000
@@ -380,8 +379,8 @@ define i32 @bs_active_high7(i32 %0) {
 
 define <2 x i64> @bs_active_high4(<2 x i64> %0) {
 ; CHECK-LABEL: @bs_active_high4(
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 60, i64 60>
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 4, i64 4>
+; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], <i64 240, i64 240>
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
   %2 = shl <2 x i64> %0, <i64 60, i64 60>
@@ -392,7 +391,7 @@ define <2 x i64> @bs_active_high4(<2 x i64> %0) {
 define <2 x i64> @bs_active_high_
diff erent(<2 x i64> %0) {
 ; CHECK-LABEL: @bs_active_high_
diff erent(
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 56, i64 57>
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <2 x i64> [[TMP2]], <i64 56, i64 56>
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
   %2 = shl <2 x i64> %0, <i64 56, i64 57>
@@ -427,7 +426,7 @@ define <2 x i64> @bs_active_high_undef(<2 x i64> %0) {
 define i64 @bs_active_high8_multiuse(i64 %0) {
 ; CHECK-LABEL: @bs_active_high8_multiuse(
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP0]], 255
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    ret i64 [[TMP4]]
 ;
@@ -440,7 +439,7 @@ define i64 @bs_active_high8_multiuse(i64 %0) {
 define i64 @bs_active_high7_multiuse(i64 %0) {
 ; CHECK-LABEL: @bs_active_high7_multiuse(
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 57
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact i64 [[TMP2]], 56
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    ret i64 [[TMP4]]
 ;
@@ -452,8 +451,8 @@ define i64 @bs_active_high7_multiuse(i64 %0) {
 
 define i64 @bs_active_byte_6h(i64 %0) {
 ; CHECK-LABEL: @bs_active_byte_6h(
-; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 280375465082880
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 24
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 16711680
 ; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %2 = and i64 %0, 280375465082880  ; 0xff00'00000000
@@ -463,8 +462,8 @@ define i64 @bs_active_byte_6h(i64 %0) {
 
 define i32 @bs_active_byte_3h(i32 %0) {
 ; CHECK-LABEL: @bs_active_byte_3h(
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 393216
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 8
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1536
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %2 = and i32 %0, 393216  ; 0x0006'0000
@@ -475,7 +474,7 @@ define i32 @bs_active_byte_3h(i32 %0) {
 define <2 x i32> @bs_active_byte_3h_v2(<2 x i32> %0) {
 ; CHECK-LABEL: @bs_active_byte_3h_v2(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 8388608, i32 65536>
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <2 x i32> [[TMP2]], <i32 8, i32 8>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
   %2 = and <2 x i32> %0, <i32 8388608, i32 65536>  ; 0x0080'0000, 0x0001'0000
@@ -498,8 +497,8 @@ define i64 @bs_active_byte_78h(i64 %0) {
 
 define i16 @bs_active_low1(i16 %0) {
 ; CHECK-LABEL: @bs_active_low1(
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr i16 [[TMP0:%.*]], 15
-; CHECK-NEXT:    [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i16 [[TMP0:%.*]], 7
+; CHECK-NEXT:    [[TMP3:%.*]] = and i16 [[TMP2]], 256
 ; CHECK-NEXT:    ret i16 [[TMP3]]
 ;
   %2 = lshr i16 %0, 15
@@ -509,9 +508,8 @@ define i16 @bs_active_low1(i16 %0) {
 
 define <2 x i32> @bs_active_low8(<2 x i32> %0) {
 ; CHECK-LABEL: @bs_active_low8(
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 255, i32 255>
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
-; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i32> [[TMP0:%.*]], <i32 24, i32 24>
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %2 = and <2 x i32> %0, <i32 255, i32 255>
   %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
@@ -521,7 +519,7 @@ define <2 x i32> @bs_active_low8(<2 x i32> %0) {
 define <2 x i32> @bs_active_low_
diff erent(<2 x i32> %0) {
 ; CHECK-LABEL: @bs_active_low_
diff erent(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 2, i32 128>
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP2]], <i32 24, i32 24>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
   %2 = and <2 x i32> %0, <i32 2, i32 128>
@@ -556,7 +554,7 @@ define <2 x i32> @bs_active_low_undef(<2 x i32> %0) {
 define i64 @bs_active_low8_multiuse(i64 %0) {
 ; CHECK-LABEL: @bs_active_low8_multiuse(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 56
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    ret i64 [[TMP4]]
 ;
@@ -569,7 +567,7 @@ define i64 @bs_active_low8_multiuse(i64 %0) {
 define i64 @bs_active_low7_multiuse(i64 %0) {
 ; CHECK-LABEL: @bs_active_low7_multiuse(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 127
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 56
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    ret i64 [[TMP4]]
 ;
@@ -581,8 +579,8 @@ define i64 @bs_active_low7_multiuse(i64 %0) {
 
 define i64 @bs_active_byte_4l(i64 %0) {
 ; CHECK-LABEL: @bs_active_byte_4l(
-; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 1140850688
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 8
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 292057776128
 ; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %2 = and i64 %0, 1140850688  ; 0x44000000
@@ -592,8 +590,8 @@ define i64 @bs_active_byte_4l(i64 %0) {
 
 define i32 @bs_active_byte_2l(i32 %0) {
 ; CHECK-LABEL: @bs_active_byte_2l(
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 65280
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 8
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 16711680
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %2 = and i32 %0, 65280  ; 0xff00
@@ -604,7 +602,7 @@ define i32 @bs_active_byte_2l(i32 %0) {
 define <2 x i64> @bs_active_byte_2l_v2(<2 x i64> %0) {
 ; CHECK-LABEL: @bs_active_byte_2l_v2(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[TMP0:%.*]], <i64 256, i64 65280>
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw <2 x i64> [[TMP2]], <i64 40, i64 40>
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
   %2 = and <2 x i64> %0, <i64 256, i64 65280>  ; 0x0100, 0xff00


        


More information about the llvm-commits mailing list