[llvm] [X86] Support lowering for APX promoted BMI instructions. (PR #77433)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 9 01:41:07 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: None (XinWang10)
<details>
<summary>Changes</summary>
R16-R31 was added into GPRs in https://github.com/llvm/llvm-project/pull/70958,
This patch supports the lowering for promoted BMI instructions in EVEX space, enc/dec has been supported in https://github.com/llvm/llvm-project/pull/73899.
RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4
---
Patch is 97.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/77433.diff
12 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelDAGToDAG.cpp (+28-16)
- (modified) llvm/lib/Target/X86/X86InstrArithmetic.td (+12-1)
- (modified) llvm/lib/Target/X86/X86InstrCompiler.td (+59-1)
- (modified) llvm/lib/Target/X86/X86InstrMisc.td (+58-3)
- (modified) llvm/lib/Target/X86/X86InstrShiftRotate.td (+81-1)
- (modified) llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll (+78)
- (modified) llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll (+97-1)
- (modified) llvm/test/CodeGen/X86/bmi-x86_64.ll (+56)
- (modified) llvm/test/CodeGen/X86/bmi.ll (+455)
- (modified) llvm/test/CodeGen/X86/bmi2-x86_64.ll (+62)
- (modified) llvm/test/CodeGen/X86/bmi2.ll (+121)
- (modified) llvm/test/CodeGen/X86/shift-bmi2.ll (+115)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 73b10cf3067e1a..8c8dfee23d4b9d 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -4087,14 +4087,17 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
SDValue Control;
unsigned ROpc, MOpc;
+#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
if (!PreferBEXTR) {
assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then.");
// If we can't make use of BEXTR then we can't fuse shift+mask stages.
// Let's perform the mask first, and apply shift later. Note that we need to
// widen the mask to account for the fact that we'll apply shift afterwards!
Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT);
- ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr;
- MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm;
+ ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rr)
+ : GET_EGPR_IF_ENABLED(X86::BZHI32rr);
+ MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rm)
+ : GET_EGPR_IF_ENABLED(X86::BZHI32rm);
unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
} else {
@@ -4109,12 +4112,15 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
} else {
assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then.");
// BMI requires the immediate to placed in a register.
- ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
- MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
+ ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rr)
+ : GET_EGPR_IF_ENABLED(X86::BEXTR32rr);
+ MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rm)
+ : GET_EGPR_IF_ENABLED(X86::BEXTR32rm);
unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
}
}
+#undef GET_EGPR_IF_ENABLED
MachineSDNode *NewNode;
SDValue Input = N0->getOperand(0);
@@ -5482,26 +5488,32 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty();
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
+#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
case MVT::i32:
- Opc = UseMULXHi ? X86::MULX32Hrr :
- UseMULX ? X86::MULX32rr :
- IsSigned ? X86::IMUL32r : X86::MUL32r;
- MOpc = UseMULXHi ? X86::MULX32Hrm :
- UseMULX ? X86::MULX32rm :
- IsSigned ? X86::IMUL32m : X86::MUL32m;
+ Opc = UseMULXHi ? X86::MULX32Hrr
+ : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX32rr)
+ : IsSigned ? X86::IMUL32r
+ : X86::MUL32r;
+ MOpc = UseMULXHi ? X86::MULX32Hrm
+ : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX32rm)
+ : IsSigned ? X86::IMUL32m
+ : X86::MUL32m;
LoReg = UseMULX ? X86::EDX : X86::EAX;
HiReg = X86::EDX;
break;
case MVT::i64:
- Opc = UseMULXHi ? X86::MULX64Hrr :
- UseMULX ? X86::MULX64rr :
- IsSigned ? X86::IMUL64r : X86::MUL64r;
- MOpc = UseMULXHi ? X86::MULX64Hrm :
- UseMULX ? X86::MULX64rm :
- IsSigned ? X86::IMUL64m : X86::MUL64m;
+ Opc = UseMULXHi ? X86::MULX64Hrr
+ : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX64rr)
+ : IsSigned ? X86::IMUL64r
+ : X86::MUL64r;
+ MOpc = UseMULXHi ? X86::MULX64Hrm
+ : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX64rm)
+ : IsSigned ? X86::IMUL64m
+ : X86::MUL64m;
LoReg = UseMULX ? X86::RDX : X86::RAX;
HiReg = X86::RDX;
break;
+#undef GET_EGPR_IF_ENABLED
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index 5cfa95e085e34a..9c9c387154f267 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -1296,7 +1296,7 @@ defm ANDN32 : AndN<Xi32, "_EVEX">, EVEX, Requires<[HasBMI, HasEGPR, In64BitMode]
defm ANDN64 : AndN<Xi64, "_EVEX">, EVEX, REX_W, Requires<[HasBMI, HasEGPR, In64BitMode]>;
}
-let Predicates = [HasBMI], AddedComplexity = -6 in {
+let Predicates = [HasBMI, NoEGPR], AddedComplexity = -6 in {
def : Pat<(and (not GR32:$src1), GR32:$src2),
(ANDN32rr GR32:$src1, GR32:$src2)>;
def : Pat<(and (not GR64:$src1), GR64:$src2),
@@ -1307,6 +1307,17 @@ let Predicates = [HasBMI], AddedComplexity = -6 in {
(ANDN64rm GR64:$src1, addr:$src2)>;
}
+let Predicates = [HasBMI, HasEGPR], AddedComplexity = -6 in {
+ def : Pat<(and (not GR32:$src1), GR32:$src2),
+ (ANDN32rr_EVEX GR32:$src1, GR32:$src2)>;
+ def : Pat<(and (not GR64:$src1), GR64:$src2),
+ (ANDN64rr_EVEX GR64:$src1, GR64:$src2)>;
+ def : Pat<(and (not GR32:$src1), (loadi32 addr:$src2)),
+ (ANDN32rm_EVEX GR32:$src1, addr:$src2)>;
+ def : Pat<(and (not GR64:$src1), (loadi64 addr:$src2)),
+ (ANDN64rm_EVEX GR64:$src1, addr:$src2)>;
+}
+
//===----------------------------------------------------------------------===//
// MULX Instruction
//
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index c77c77ee4a3eeb..671094128e9b8c 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1845,7 +1845,7 @@ def : Pat<(fshl GR64:$src1, GR64:$src2, (shiftMask64 CL)),
def : Pat<(fshr GR64:$src2, GR64:$src1, (shiftMask64 CL)),
(SHRD64rrCL GR64:$src1, GR64:$src2)>;
-let Predicates = [HasBMI2] in {
+let Predicates = [HasBMI2, NoEGPR] in {
let AddedComplexity = 1 in {
def : Pat<(sra GR32:$src1, (shiftMask32 GR8:$src2)),
(SARX32rr GR32:$src1,
@@ -1903,6 +1903,64 @@ let Predicates = [HasBMI2] in {
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
}
+let Predicates = [HasBMI2, HasEGPR] in {
+ let AddedComplexity = 1 in {
+ def : Pat<(sra GR32:$src1, (shiftMask32 GR8:$src2)),
+ (SARX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra GR64:$src1, (shiftMask64 GR8:$src2)),
+ (SARX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl GR32:$src1, (shiftMask32 GR8:$src2)),
+ (SHRX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl GR64:$src1, (shiftMask64 GR8:$src2)),
+ (SHRX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl GR32:$src1, (shiftMask32 GR8:$src2)),
+ (SHLX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl GR64:$src1, (shiftMask64 GR8:$src2)),
+ (SHLX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ }
+
+ def : Pat<(sra (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
+ (SARX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
+ (SARX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
+ (SHRX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
+ (SHRX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
+ (SHLX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
+ (SHLX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+}
+
// Use BTR/BTS/BTC for clearing/setting/toggling a bit in a variable location.
multiclass one_bit_patterns<RegisterClass RC, ValueType VT, Instruction BTR,
Instruction BTS, Instruction BTC,
diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index 97c625a64cfc0b..0fb332fc5d6cf2 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1241,7 +1241,7 @@ let Predicates = [HasBMI, In64BitMode], Defs = [EFLAGS] in {
defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64, "_EVEX">, EVEX;
}
-let Predicates = [HasBMI] in {
+let Predicates = [HasBMI, NoEGPR] in {
// FIXME(1): patterns for the load versions are not implemented
// FIXME(2): By only matching `add_su` and `ineg_su` we may emit
// extra `mov` instructions if `src` has future uses. It may be better
@@ -1278,6 +1278,39 @@ let Predicates = [HasBMI] in {
(BLSI64rr GR64:$src)>;
}
+let Predicates = [HasBMI, HasEGPR] in {
+ def : Pat<(and GR32:$src, (add_su GR32:$src, -1)),
+ (BLSR32rr_EVEX GR32:$src)>;
+ def : Pat<(and GR64:$src, (add_su GR64:$src, -1)),
+ (BLSR64rr_EVEX GR64:$src)>;
+
+ def : Pat<(xor GR32:$src, (add_su GR32:$src, -1)),
+ (BLSMSK32rr_EVEX GR32:$src)>;
+ def : Pat<(xor GR64:$src, (add_su GR64:$src, -1)),
+ (BLSMSK64rr_EVEX GR64:$src)>;
+
+ def : Pat<(and GR32:$src, (ineg_su GR32:$src)),
+ (BLSI32rr_EVEX GR32:$src)>;
+ def : Pat<(and GR64:$src, (ineg_su GR64:$src)),
+ (BLSI64rr_EVEX GR64:$src)>;
+
+ // Versions to match flag producing ops.
+ def : Pat<(and_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
+ (BLSR32rr_EVEX GR32:$src)>;
+ def : Pat<(and_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
+ (BLSR64rr_EVEX GR64:$src)>;
+
+ def : Pat<(xor_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
+ (BLSMSK32rr_EVEX GR32:$src)>;
+ def : Pat<(xor_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
+ (BLSMSK64rr_EVEX GR64:$src)>;
+
+ def : Pat<(and_flag_nocf GR32:$src, (ineg_su GR32:$src)),
+ (BLSI32rr_EVEX GR32:$src)>;
+ def : Pat<(and_flag_nocf GR64:$src, (ineg_su GR64:$src)),
+ (BLSI64rr_EVEX GR64:$src)>;
+}
+
multiclass Bmi4VOp3<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node,
X86FoldableSchedWrite sched, string Suffix = ""> {
let SchedRW = [sched], Form = MRMSrcReg4VOp3 in
@@ -1324,7 +1357,7 @@ def AndMask64 : ImmLeaf<i64, [{
}]>;
// Use BEXTR for 64-bit 'and' with large immediate 'mask'.
-let Predicates = [HasBMI, NoBMI2, NoTBM] in {
+let Predicates = [HasBMI, NoBMI2, NoTBM, NoEGPR] in {
def : Pat<(and GR64:$src, AndMask64:$mask),
(BEXTR64rr GR64:$src,
(SUBREG_TO_REG (i64 0),
@@ -1335,8 +1368,19 @@ let Predicates = [HasBMI, NoBMI2, NoTBM] in {
(MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
}
+let Predicates = [HasBMI, NoBMI2, NoTBM, HasEGPR] in {
+ def : Pat<(and GR64:$src, AndMask64:$mask),
+ (BEXTR64rr_EVEX GR64:$src,
+ (SUBREG_TO_REG (i64 0),
+ (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
+ def : Pat<(and (loadi64 addr:$src), AndMask64:$mask),
+ (BEXTR64rm_EVEX addr:$src,
+ (SUBREG_TO_REG (i64 0),
+ (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
+}
+
// Use BZHI for 64-bit 'and' with large immediate 'mask'.
-let Predicates = [HasBMI2, NoTBM] in {
+let Predicates = [HasBMI2, NoTBM, NoEGPR] in {
def : Pat<(and GR64:$src, AndMask64:$mask),
(BZHI64rr GR64:$src,
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
@@ -1347,6 +1391,17 @@ let Predicates = [HasBMI2, NoTBM] in {
(MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
}
+let Predicates = [HasBMI2, NoTBM, HasEGPR] in {
+ def : Pat<(and GR64:$src, AndMask64:$mask),
+ (BZHI64rr_EVEX GR64:$src,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
+ def : Pat<(and (loadi64 addr:$src), AndMask64:$mask),
+ (BZHI64rm_EVEX addr:$src,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
+}
+
multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
X86MemOperand x86memop, SDPatternOperator OpNode,
PatFrag ld_frag, string Suffix = ""> {
diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td
index f951894db1890c..e225fe6950e3da 100644
--- a/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -879,7 +879,7 @@ let Predicates = [HasBMI2, HasEGPR, In64BitMode] in {
defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem, "_EVEX">, T8, PD, REX_W, EVEX;
}
-let Predicates = [HasBMI2] in {
+let Predicates = [HasBMI2, NoEGPR] in {
// Prefer RORX which is non-destructive and doesn't update EFLAGS.
let AddedComplexity = 10 in {
def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
@@ -983,6 +983,86 @@ let Predicates = [HasBMI2] in {
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
}
+let Predicates = [HasBMI2, HasEGPR] in {
+ let AddedComplexity = 10 in {
+ def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
+ (RORX32ri_EVEX GR32:$src, imm:$shamt)>;
+ def : Pat<(rotr GR64:$src, (i8 imm:$shamt)),
+ (RORX64ri_EVEX GR64:$src, imm:$shamt)>;
+
+ def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
+ (RORX32ri_EVEX GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
+ def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
+ (RORX64ri_EVEX GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
+ }
+
+ def : Pat<(rotr (loadi32 addr:$src), (i8 imm:$shamt)),
+ (RORX32mi_EVEX addr:$src, imm:$shamt)>;
+ def : Pat<(rotr (loadi64 addr:$src), (i8 imm:$shamt)),
+ (RORX64mi_EVEX addr:$src, imm:$shamt)>;
+
+ def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)),
+ (RORX32mi_EVEX addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
+ def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
+ (RORX64mi_EVEX addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
+
+ let AddedComplexity = 1 in {
+ def : Pat<(sra GR32:$src1, GR8:$src2),
+ (SARX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra GR64:$src1, GR8:$src2),
+ (SARX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl GR32:$src1, GR8:$src2),
+ (SHRX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl GR64:$src1, GR8:$src2),
+ (SHRX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl GR32:$src1, GR8:$src2),
+ (SHLX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl GR64:$src1, GR8:$src2),
+ (SHLX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ }
+
+ def : Pat<(sra (loadi32 addr:$src1), GR8:$src2),
+ (SARX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra (loadi64 addr:$src1), GR8:$src2),
+ (SARX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl (loadi32 addr:$src1), GR8:$src2),
+ (SHRX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl (loadi64 addr:$src1), GR8:$src2),
+ (SHRX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl (loadi32 addr:$src1), GR8:$src2),
+ (SHLX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl (loadi64 addr:$src1), GR8:$src2),
+ (SHLX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+}
+
def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),
(ROL8ri GR8:$src1, relocImm:$src2)>;
def : Pat<(rotl GR16:$src1, (i8 relocImm:$src2)),
diff --git a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
index d704f38307fcb8..5b7bb1ca97b5ca 100644
--- a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
+++ b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/bmi-builtins.c
@@ -14,6 +15,13 @@ define i64 @test__andn_u64(i64 %a0, i64 %a1) {
; X64-NEXT: xorq $-1, %rax
; X64-NEXT: andq %rsi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__andn_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; EGPR-NEXT: xorq $-1, %rax # encoding: [0x48,0x83,0xf0,0xff]
+; EGPR-NEXT: andq %rsi, %rax # encoding: [0x48,0x21,0xf0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%xor = xor i64 %a0, -1
%res = and i64 %xor, %a1
ret i64 %res
@@ -24,6 +32,11 @@ define i64 @test__bextr_u64(i64 %a0, i64 %a1) {
; X64: # %bb.0:
; X64-NEXT: bextrq %rsi, %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__bextr_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: bextrq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc8,0xf7,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%res = call i64 @llvm.x86.bmi.bextr.64(i64 %a0, i64 %a1)
ret i64 %res
}
@@ -35,6 +48,13 @@ define i64 @test__blsi_u64(i64 %a0) {
; X64-NEXT: subq %rdi, %rax
; X64-NEXT: andq %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__blsi_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT: subq %rdi, %rax # encoding: [0x48,0x29,0xf8]
+; EGPR-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
+; EGPR-NEXT: retq # encoding: [0xc3]
%neg = sub i64 0, %a0
%res = and i64 %a0, %neg
ret i64 %res
@@ -46,6 +66,12 @@ define i64 @test__blsmsk_u64(i64 %a0) {
; X64-NEXT: leaq -1(%rdi), %rax
; X64-NEXT: xorq %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__blsmsk_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: leaq -1(%rdi), %rax # encoding: [0x48,0x8d,0x47,0xff]
+; EGPR-NEXT: xorq %rdi, %rax # encoding: [0x48,0x31,0xf8]
+;...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/77433
More information about the llvm-commits
mailing list