[llvm] 374e029 - [X86][InstCombine] Add constant folding and simplification support for pdep and pext

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 31 15:22:44 PST 2019


Author: Craig Topper
Date: 2019-12-31T15:06:47-08:00
New Revision: 374e0299cf08dc1ff2c68e7f6c159b98ab55dfb8

URL: https://github.com/llvm/llvm-project/commit/374e0299cf08dc1ff2c68e7f6c159b98ab55dfb8
DIFF: https://github.com/llvm/llvm-project/commit/374e0299cf08dc1ff2c68e7f6c159b98ab55dfb8.diff

LOG: [X86][InstCombine] Add constant folding and simplification support for pdep and pext

The instructions use a mask to either pack disjoint bits together(pext) or spread bits to disjoint locations(pdep). If the mask is all 0s then no bits are extracted or deposited. If the mask is all ones, then the source value is written to the result since no compression or expansion happens. Otherwise if both the source and mask are constant we can walk the bits in the source/mask and calculate the result.

There other crazier things we could do like computeKnownBits or turning pext into shift/and if only a single contiguous range of bits is extracted.

Fixes PR44389

Differential Revision: https://reviews.llvm.org/D71952

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e4a73c659b69..bcbc953ecd62 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2487,6 +2487,64 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       // TODO should we convert this to an AND if the RHS is constant?
     }
     break;
+  case Intrinsic::x86_bmi_pext_32:
+  case Intrinsic::x86_bmi_pext_64:
+    if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+      if (MaskC->isNullValue())
+        return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
+      if (MaskC->isAllOnesValue())
+        return replaceInstUsesWith(CI, II->getArgOperand(0));
+
+      if (auto *SrcC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
+        uint64_t Src = SrcC->getZExtValue();
+        uint64_t Mask = MaskC->getZExtValue();
+        uint64_t Result = 0;
+        uint64_t BitToSet = 1;
+
+        while (Mask) {
+          // Isolate lowest set bit.
+          uint64_t BitToTest = Mask & -Mask;
+          if (BitToTest & Src)
+            Result |= BitToSet;
+
+          BitToSet <<= 1;
+          // Clear lowest set bit.
+          Mask &= Mask - 1;
+        }
+
+        return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
+      }
+    }
+    break;
+  case Intrinsic::x86_bmi_pdep_32:
+  case Intrinsic::x86_bmi_pdep_64:
+    if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+      if (MaskC->isNullValue())
+        return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
+      if (MaskC->isAllOnesValue())
+        return replaceInstUsesWith(CI, II->getArgOperand(0));
+
+      if (auto *SrcC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
+        uint64_t Src = SrcC->getZExtValue();
+        uint64_t Mask = MaskC->getZExtValue();
+        uint64_t Result = 0;
+        uint64_t BitToTest = 1;
+
+        while (Mask) {
+          // Isolate lowest set bit.
+          uint64_t BitToSet = Mask & -Mask;
+          if (BitToTest & Src)
+            Result |= BitToSet;
+
+          BitToTest <<= 1;
+          // Clear lowest set bit;
+          Mask &= Mask - 1;
+        }
+
+        return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
+      }
+    }
+    break;
 
   case Intrinsic::x86_vcvtph2ps_128:
   case Intrinsic::x86_vcvtph2ps_256: {

diff  --git a/llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll b/llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll
index 2b472cad2da2..669f813306d1 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll
@@ -7,6 +7,10 @@ declare i32 @llvm.x86.bmi.bextr.32(i32, i32) nounwind readnone
 declare i64 @llvm.x86.bmi.bextr.64(i64, i64) nounwind readnone
 declare i32 @llvm.x86.bmi.bzhi.32(i32, i32) nounwind readnone
 declare i64 @llvm.x86.bmi.bzhi.64(i64, i64) nounwind readnone
+declare i32 @llvm.x86.bmi.pext.32(i32, i32) nounwind readnone
+declare i64 @llvm.x86.bmi.pext.64(i64, i64) nounwind readnone
+declare i32 @llvm.x86.bmi.pdep.32(i32, i32) nounwind readnone
+declare i64 @llvm.x86.bmi.pdep.64(i64, i64) nounwind readnone
 
 define i32 @test_x86_tbm_bextri_u32(i32 %a) nounwind readnone {
 ; CHECK-LABEL: @test_x86_tbm_bextri_u32(
@@ -269,3 +273,131 @@ define i64 @test_x86_bmi_bzhi_64_constfold() nounwind readnone {
   %1 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 5, i64 1)
   ret i64 %1
 }
+
+define i32 @test_x86_pext_32_zero_mask(i32 %x) nounwind readnone {
+; CHECK-LABEL: @test_x86_pext_32_zero_mask(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 0)
+  ret i32 %1
+}
+
+define i64 @test_x86_pext_64_zero_mask(i64 %x) nounwind readnone {
+; CHECK-LABEL: @test_x86_pext_64_zero_mask(
+; CHECK-NEXT:    ret i64 0
+;
+  %1 = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 0)
+  ret i64 %1
+}
+
+define i32 @test_x86_pext_32_allones_mask(i32 %x) nounwind readnone {
+; CHECK-LABEL: @test_x86_pext_32_allones_mask(
+; CHECK-NEXT:    ret i32 %x
+;
+  %1 = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 -1)
+  ret i32 %1
+}
+
+define i64 @test_x86_pext_64_allones_mask(i64 %x) nounwind readnone {
+; CHECK-LABEL: @test_x86_pext_64_allones_mask(
+; CHECK-NEXT:    ret i64 %x
+;
+  %1 = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 -1)
+  ret i64 %1
+}
+
+define i32 @test_x86_pext_32_constant_fold() nounwind readnone {
+; CHECK-LABEL: @test_x86_pext_32_constant_fold(
+; CHECK-NEXT:    ret i32 30001
+;
+  %1 = tail call i32 @llvm.x86.bmi.pext.32(i32 1985229328, i32 4042322160)
+  ret i32 %1
+}
+
+define i64 @test_x86_pext_64_constant_fold() nounwind readnone {
+; CHECK-LABEL: @test_x86_pext_64_constant_fold(
+; CHECK-NEXT:    ret i64 1966210489
+;
+  %1 = tail call i64 @llvm.x86.bmi.pext.64(i64 8526495043095935640, i64 -1085102592571150096)
+  ret i64 %1
+}
+
+define i32 @test_x86_pext_32_constant_fold_2() nounwind readnone {
+; CHECK-LABEL: @test_x86_pext_32_constant_fold_2(
+; CHECK-NEXT:    ret i32 30224
+;
+  %1 = tail call i32 @llvm.x86.bmi.pext.32(i32 1985229328, i32 4278190335)
+  ret i32 %1
+}
+
+define i64 @test_x86_pext_64_constant_fold_2() nounwind readnone {
+; CHECK-LABEL: @test_x86_pext_64_constant_fold_2(
+; CHECK-NEXT:    ret i64 1980816570
+;
+  %1 = tail call i64 @llvm.x86.bmi.pext.64(i64 8526495043095935640, i64 -72056498804490496)
+  ret i64 %1
+}
+
+define i32 @test_x86_pdep_32_zero_mask(i32 %x) nounwind readnone {
+; CHECK-LABEL: @test_x86_pdep_32_zero_mask(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 0)
+  ret i32 %1
+}
+
+define i64 @test_x86_pdep_64_zero_mask(i64 %x) nounwind readnone {
+; CHECK-LABEL: @test_x86_pdep_64_zero_mask(
+; CHECK-NEXT:    ret i64 0
+;
+  %1 = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 0)
+  ret i64 %1
+}
+
+define i32 @test_x86_pdep_32_allones_mask(i32 %x) nounwind readnone {
+; CHECK-LABEL: @test_x86_pdep_32_allones_mask(
+; CHECK-NEXT:    ret i32 %x
+;
+  %1 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 -1)
+  ret i32 %1
+}
+
+define i64 @test_x86_pdep_64_allones_mask(i64 %x) nounwind readnone {
+; CHECK-LABEL: @test_x86_pdep_64_allones_mask(
+; CHECK-NEXT:    ret i64 %x
+;
+  %1 = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 -1)
+  ret i64 %1
+}
+
+define i32 @test_x86_pdep_32_constant_fold() nounwind readnone {
+; CHECK-LABEL: @test_x86_pdep_32_constant_fold(
+; CHECK-NEXT:    ret i32 807407616
+;
+  %1 = tail call i32 @llvm.x86.bmi.pdep.32(i32 1985229328, i32 4042322160)
+  ret i32 %1
+}
+
+define i64 @test_x86_pdep_64_constant_fold() nounwind readnone {
+; CHECK-LABEL: @test_x86_pdep_64_constant_fold(
+; CHECK-NEXT:    ret i64 -1089641583808049024
+;
+  %1 = tail call i64 @llvm.x86.bmi.pdep.64(i64 8526495043095935640, i64 -1085102592571150096)
+  ret i64 %1
+}
+
+define i32 @test_x86_pdep_32_constant_fold_2() nounwind readnone {
+; CHECK-LABEL: @test_x86_pdep_32_constant_fold_2(
+; CHECK-NEXT:    ret i32 838860816
+;
+  %1 = tail call i32 @llvm.x86.bmi.pdep.32(i32 1985229328, i32 4278190335)
+  ret i32 %1
+}
+
+define i64 @test_x86_pdep_64_constant_fold_2() nounwind readnone {
+; CHECK-LABEL: @test_x86_pdep_64_constant_fold_2(
+; CHECK-NEXT:    ret i64 -144114243170822144
+;
+  %1 = tail call i64 @llvm.x86.bmi.pdep.64(i64 8526495043095935640, i64 -72056498804490496)
+  ret i64 %1
+}


        


More information about the llvm-commits mailing list