[llvm] [X86] Support lowering for APX promoted BMI instructions. (PR #77433)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 10 22:38:50 PST 2024
https://github.com/XinWang10 updated https://github.com/llvm/llvm-project/pull/77433
>From d8d452ae3aa063b9c312de195b0afcabb594d035 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Tue, 9 Jan 2024 00:45:01 -0800
Subject: [PATCH 1/3] [X86] Support lowering for APX promoted BMI instructions.
---
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 44 +-
llvm/lib/Target/X86/X86InstrArithmetic.td | 13 +-
llvm/lib/Target/X86/X86InstrCompiler.td | 60 ++-
llvm/lib/Target/X86/X86InstrMisc.td | 61 ++-
llvm/lib/Target/X86/X86InstrShiftRotate.td | 82 +++-
.../X86/bmi-intrinsics-fast-isel-x86_64.ll | 78 +++
.../CodeGen/X86/bmi-intrinsics-fast-isel.ll | 98 +++-
llvm/test/CodeGen/X86/bmi-x86_64.ll | 56 +++
llvm/test/CodeGen/X86/bmi.ll | 455 ++++++++++++++++++
llvm/test/CodeGen/X86/bmi2-x86_64.ll | 62 +++
llvm/test/CodeGen/X86/bmi2.ll | 121 +++++
llvm/test/CodeGen/X86/shift-bmi2.ll | 115 +++++
12 files changed, 1222 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 53ce720be2da4c..8d519f181549ff 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -4087,14 +4087,17 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
SDValue Control;
unsigned ROpc, MOpc;
+#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
if (!PreferBEXTR) {
assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then.");
// If we can't make use of BEXTR then we can't fuse shift+mask stages.
// Let's perform the mask first, and apply shift later. Note that we need to
// widen the mask to account for the fact that we'll apply shift afterwards!
Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT);
- ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr;
- MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm;
+ ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rr)
+ : GET_EGPR_IF_ENABLED(X86::BZHI32rr);
+ MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rm)
+ : GET_EGPR_IF_ENABLED(X86::BZHI32rm);
unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
} else {
@@ -4109,12 +4112,15 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
} else {
assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then.");
// BMI requires the immediate to placed in a register.
- ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
- MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
+ ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rr)
+ : GET_EGPR_IF_ENABLED(X86::BEXTR32rr);
+ MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rm)
+ : GET_EGPR_IF_ENABLED(X86::BEXTR32rm);
unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
}
}
+#undef GET_EGPR_IF_ENABLED
MachineSDNode *NewNode;
SDValue Input = N0->getOperand(0);
@@ -5482,26 +5488,32 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty();
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
+#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
case MVT::i32:
- Opc = UseMULXHi ? X86::MULX32Hrr :
- UseMULX ? X86::MULX32rr :
- IsSigned ? X86::IMUL32r : X86::MUL32r;
- MOpc = UseMULXHi ? X86::MULX32Hrm :
- UseMULX ? X86::MULX32rm :
- IsSigned ? X86::IMUL32m : X86::MUL32m;
+ Opc = UseMULXHi ? X86::MULX32Hrr
+ : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX32rr)
+ : IsSigned ? X86::IMUL32r
+ : X86::MUL32r;
+ MOpc = UseMULXHi ? X86::MULX32Hrm
+ : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX32rm)
+ : IsSigned ? X86::IMUL32m
+ : X86::MUL32m;
LoReg = UseMULX ? X86::EDX : X86::EAX;
HiReg = X86::EDX;
break;
case MVT::i64:
- Opc = UseMULXHi ? X86::MULX64Hrr :
- UseMULX ? X86::MULX64rr :
- IsSigned ? X86::IMUL64r : X86::MUL64r;
- MOpc = UseMULXHi ? X86::MULX64Hrm :
- UseMULX ? X86::MULX64rm :
- IsSigned ? X86::IMUL64m : X86::MUL64m;
+ Opc = UseMULXHi ? X86::MULX64Hrr
+ : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX64rr)
+ : IsSigned ? X86::IMUL64r
+ : X86::MUL64r;
+ MOpc = UseMULXHi ? X86::MULX64Hrm
+ : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX64rm)
+ : IsSigned ? X86::IMUL64m
+ : X86::MUL64m;
LoReg = UseMULX ? X86::RDX : X86::RAX;
HiReg = X86::RDX;
break;
+#undef GET_EGPR_IF_ENABLED
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index 76b0fe5f5cad18..289141ce6c33f5 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -1338,7 +1338,7 @@ defm ANDN32 : AndN<Xi32, "_EVEX">, EVEX, Requires<[HasBMI, HasEGPR, In64BitMode]
defm ANDN64 : AndN<Xi64, "_EVEX">, EVEX, REX_W, Requires<[HasBMI, HasEGPR, In64BitMode]>;
}
-let Predicates = [HasBMI], AddedComplexity = -6 in {
+let Predicates = [HasBMI, NoEGPR], AddedComplexity = -6 in {
def : Pat<(and (not GR32:$src1), GR32:$src2),
(ANDN32rr GR32:$src1, GR32:$src2)>;
def : Pat<(and (not GR64:$src1), GR64:$src2),
@@ -1349,6 +1349,17 @@ let Predicates = [HasBMI], AddedComplexity = -6 in {
(ANDN64rm GR64:$src1, addr:$src2)>;
}
+let Predicates = [HasBMI, HasEGPR], AddedComplexity = -6 in {
+ def : Pat<(and (not GR32:$src1), GR32:$src2),
+ (ANDN32rr_EVEX GR32:$src1, GR32:$src2)>;
+ def : Pat<(and (not GR64:$src1), GR64:$src2),
+ (ANDN64rr_EVEX GR64:$src1, GR64:$src2)>;
+ def : Pat<(and (not GR32:$src1), (loadi32 addr:$src2)),
+ (ANDN32rm_EVEX GR32:$src1, addr:$src2)>;
+ def : Pat<(and (not GR64:$src1), (loadi64 addr:$src2)),
+ (ANDN64rm_EVEX GR64:$src1, addr:$src2)>;
+}
+
//===----------------------------------------------------------------------===//
// MULX Instruction
//
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 422391a6e02ae0..3510bdeeff4c09 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1864,7 +1864,7 @@ def : Pat<(fshl GR64:$src1, GR64:$src2, (shiftMask64 CL)),
def : Pat<(fshr GR64:$src2, GR64:$src1, (shiftMask64 CL)),
(SHRD64rrCL GR64:$src1, GR64:$src2)>;
-let Predicates = [HasBMI2] in {
+let Predicates = [HasBMI2, NoEGPR] in {
let AddedComplexity = 1 in {
def : Pat<(sra GR32:$src1, (shiftMask32 GR8:$src2)),
(SARX32rr GR32:$src1,
@@ -1922,6 +1922,64 @@ let Predicates = [HasBMI2] in {
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
}
+let Predicates = [HasBMI2, HasEGPR] in {
+ let AddedComplexity = 1 in {
+ def : Pat<(sra GR32:$src1, (shiftMask32 GR8:$src2)),
+ (SARX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra GR64:$src1, (shiftMask64 GR8:$src2)),
+ (SARX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl GR32:$src1, (shiftMask32 GR8:$src2)),
+ (SHRX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl GR64:$src1, (shiftMask64 GR8:$src2)),
+ (SHRX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl GR32:$src1, (shiftMask32 GR8:$src2)),
+ (SHLX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl GR64:$src1, (shiftMask64 GR8:$src2)),
+ (SHLX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ }
+
+ def : Pat<(sra (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
+ (SARX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
+ (SARX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
+ (SHRX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
+ (SHRX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
+ (SHLX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
+ (SHLX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+}
+
// Use BTR/BTS/BTC for clearing/setting/toggling a bit in a variable location.
multiclass one_bit_patterns<RegisterClass RC, ValueType VT, Instruction BTR,
Instruction BTS, Instruction BTC,
diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index 753cf62392a17b..a51720d738f47e 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1241,7 +1241,7 @@ let Predicates = [HasBMI, In64BitMode], Defs = [EFLAGS] in {
defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64, "_EVEX">, EVEX;
}
-let Predicates = [HasBMI] in {
+let Predicates = [HasBMI, NoEGPR] in {
// FIXME(1): patterns for the load versions are not implemented
// FIXME(2): By only matching `add_su` and `ineg_su` we may emit
// extra `mov` instructions if `src` has future uses. It may be better
@@ -1278,6 +1278,39 @@ let Predicates = [HasBMI] in {
(BLSI64rr GR64:$src)>;
}
+let Predicates = [HasBMI, HasEGPR] in {
+ def : Pat<(and GR32:$src, (add_su GR32:$src, -1)),
+ (BLSR32rr_EVEX GR32:$src)>;
+ def : Pat<(and GR64:$src, (add_su GR64:$src, -1)),
+ (BLSR64rr_EVEX GR64:$src)>;
+
+ def : Pat<(xor GR32:$src, (add_su GR32:$src, -1)),
+ (BLSMSK32rr_EVEX GR32:$src)>;
+ def : Pat<(xor GR64:$src, (add_su GR64:$src, -1)),
+ (BLSMSK64rr_EVEX GR64:$src)>;
+
+ def : Pat<(and GR32:$src, (ineg_su GR32:$src)),
+ (BLSI32rr_EVEX GR32:$src)>;
+ def : Pat<(and GR64:$src, (ineg_su GR64:$src)),
+ (BLSI64rr_EVEX GR64:$src)>;
+
+ // Versions to match flag producing ops.
+ def : Pat<(and_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
+ (BLSR32rr_EVEX GR32:$src)>;
+ def : Pat<(and_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
+ (BLSR64rr_EVEX GR64:$src)>;
+
+ def : Pat<(xor_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
+ (BLSMSK32rr_EVEX GR32:$src)>;
+ def : Pat<(xor_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
+ (BLSMSK64rr_EVEX GR64:$src)>;
+
+ def : Pat<(and_flag_nocf GR32:$src, (ineg_su GR32:$src)),
+ (BLSI32rr_EVEX GR32:$src)>;
+ def : Pat<(and_flag_nocf GR64:$src, (ineg_su GR64:$src)),
+ (BLSI64rr_EVEX GR64:$src)>;
+}
+
multiclass Bmi4VOp3<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node,
X86FoldableSchedWrite sched, string Suffix = ""> {
let SchedRW = [sched], Form = MRMSrcReg4VOp3 in
@@ -1324,7 +1357,7 @@ def AndMask64 : ImmLeaf<i64, [{
}]>;
// Use BEXTR for 64-bit 'and' with large immediate 'mask'.
-let Predicates = [HasBMI, NoBMI2, NoTBM] in {
+let Predicates = [HasBMI, NoBMI2, NoTBM, NoEGPR] in {
def : Pat<(and GR64:$src, AndMask64:$mask),
(BEXTR64rr GR64:$src,
(SUBREG_TO_REG (i64 0),
@@ -1335,8 +1368,19 @@ let Predicates = [HasBMI, NoBMI2, NoTBM] in {
(MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
}
+let Predicates = [HasBMI, NoBMI2, NoTBM, HasEGPR] in {
+ def : Pat<(and GR64:$src, AndMask64:$mask),
+ (BEXTR64rr_EVEX GR64:$src,
+ (SUBREG_TO_REG (i64 0),
+ (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
+ def : Pat<(and (loadi64 addr:$src), AndMask64:$mask),
+ (BEXTR64rm_EVEX addr:$src,
+ (SUBREG_TO_REG (i64 0),
+ (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
+}
+
// Use BZHI for 64-bit 'and' with large immediate 'mask'.
-let Predicates = [HasBMI2, NoTBM] in {
+let Predicates = [HasBMI2, NoTBM, NoEGPR] in {
def : Pat<(and GR64:$src, AndMask64:$mask),
(BZHI64rr GR64:$src,
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
@@ -1347,6 +1391,17 @@ let Predicates = [HasBMI2, NoTBM] in {
(MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
}
+let Predicates = [HasBMI2, NoTBM, HasEGPR] in {
+ def : Pat<(and GR64:$src, AndMask64:$mask),
+ (BZHI64rr_EVEX GR64:$src,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
+ def : Pat<(and (loadi64 addr:$src), AndMask64:$mask),
+ (BZHI64rm_EVEX addr:$src,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
+}
+
multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
X86MemOperand x86memop, SDPatternOperator OpNode,
PatFrag ld_frag, string Suffix = ""> {
diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td
index f951894db1890c..e225fe6950e3da 100644
--- a/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -879,7 +879,7 @@ let Predicates = [HasBMI2, HasEGPR, In64BitMode] in {
defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem, "_EVEX">, T8, PD, REX_W, EVEX;
}
-let Predicates = [HasBMI2] in {
+let Predicates = [HasBMI2, NoEGPR] in {
// Prefer RORX which is non-destructive and doesn't update EFLAGS.
let AddedComplexity = 10 in {
def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
@@ -983,6 +983,86 @@ let Predicates = [HasBMI2] in {
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
}
+let Predicates = [HasBMI2, HasEGPR] in {
+ let AddedComplexity = 10 in {
+ def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
+ (RORX32ri_EVEX GR32:$src, imm:$shamt)>;
+ def : Pat<(rotr GR64:$src, (i8 imm:$shamt)),
+ (RORX64ri_EVEX GR64:$src, imm:$shamt)>;
+
+ def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
+ (RORX32ri_EVEX GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
+ def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
+ (RORX64ri_EVEX GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
+ }
+
+ def : Pat<(rotr (loadi32 addr:$src), (i8 imm:$shamt)),
+ (RORX32mi_EVEX addr:$src, imm:$shamt)>;
+ def : Pat<(rotr (loadi64 addr:$src), (i8 imm:$shamt)),
+ (RORX64mi_EVEX addr:$src, imm:$shamt)>;
+
+ def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)),
+ (RORX32mi_EVEX addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
+ def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
+ (RORX64mi_EVEX addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
+
+ let AddedComplexity = 1 in {
+ def : Pat<(sra GR32:$src1, GR8:$src2),
+ (SARX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra GR64:$src1, GR8:$src2),
+ (SARX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl GR32:$src1, GR8:$src2),
+ (SHRX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl GR64:$src1, GR8:$src2),
+ (SHRX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl GR32:$src1, GR8:$src2),
+ (SHLX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl GR64:$src1, GR8:$src2),
+ (SHLX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ }
+
+ def : Pat<(sra (loadi32 addr:$src1), GR8:$src2),
+ (SARX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra (loadi64 addr:$src1), GR8:$src2),
+ (SARX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl (loadi32 addr:$src1), GR8:$src2),
+ (SHRX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl (loadi64 addr:$src1), GR8:$src2),
+ (SHRX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl (loadi32 addr:$src1), GR8:$src2),
+ (SHLX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl (loadi64 addr:$src1), GR8:$src2),
+ (SHLX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+}
+
def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),
(ROL8ri GR8:$src1, relocImm:$src2)>;
def : Pat<(rotl GR16:$src1, (i8 relocImm:$src2)),
diff --git a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
index d704f38307fcb8..5b7bb1ca97b5ca 100644
--- a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
+++ b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/bmi-builtins.c
@@ -14,6 +15,13 @@ define i64 @test__andn_u64(i64 %a0, i64 %a1) {
; X64-NEXT: xorq $-1, %rax
; X64-NEXT: andq %rsi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__andn_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; EGPR-NEXT: xorq $-1, %rax # encoding: [0x48,0x83,0xf0,0xff]
+; EGPR-NEXT: andq %rsi, %rax # encoding: [0x48,0x21,0xf0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%xor = xor i64 %a0, -1
%res = and i64 %xor, %a1
ret i64 %res
@@ -24,6 +32,11 @@ define i64 @test__bextr_u64(i64 %a0, i64 %a1) {
; X64: # %bb.0:
; X64-NEXT: bextrq %rsi, %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__bextr_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: bextrq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc8,0xf7,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%res = call i64 @llvm.x86.bmi.bextr.64(i64 %a0, i64 %a1)
ret i64 %res
}
@@ -35,6 +48,13 @@ define i64 @test__blsi_u64(i64 %a0) {
; X64-NEXT: subq %rdi, %rax
; X64-NEXT: andq %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__blsi_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT: subq %rdi, %rax # encoding: [0x48,0x29,0xf8]
+; EGPR-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
+; EGPR-NEXT: retq # encoding: [0xc3]
%neg = sub i64 0, %a0
%res = and i64 %a0, %neg
ret i64 %res
@@ -46,6 +66,12 @@ define i64 @test__blsmsk_u64(i64 %a0) {
; X64-NEXT: leaq -1(%rdi), %rax
; X64-NEXT: xorq %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__blsmsk_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: leaq -1(%rdi), %rax # encoding: [0x48,0x8d,0x47,0xff]
+; EGPR-NEXT: xorq %rdi, %rax # encoding: [0x48,0x31,0xf8]
+; EGPR-NEXT: retq # encoding: [0xc3]
%dec = sub i64 %a0, 1
%res = xor i64 %a0, %dec
ret i64 %res
@@ -57,6 +83,12 @@ define i64 @test__blsr_u64(i64 %a0) {
; X64-NEXT: leaq -1(%rdi), %rax
; X64-NEXT: andq %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__blsr_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: leaq -1(%rdi), %rax # encoding: [0x48,0x8d,0x47,0xff]
+; EGPR-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
+; EGPR-NEXT: retq # encoding: [0xc3]
%dec = sub i64 %a0, 1
%res = and i64 %a0, %dec
ret i64 %res
@@ -67,6 +99,11 @@ define i64 @test__tzcnt_u64(i64 %a0) {
; X64: # %bb.0:
; X64-NEXT: tzcntq %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__tzcnt_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: tzcntq %rdi, %rax # encoding: [0xf3,0x48,0x0f,0xbc,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%cmp = icmp ne i64 %a0, 0
%cttz = call i64 @llvm.cttz.i64(i64 %a0, i1 false)
ret i64 %cttz
@@ -83,6 +120,13 @@ define i64 @test_andn_u64(i64 %a0, i64 %a1) {
; X64-NEXT: xorq $-1, %rax
; X64-NEXT: andq %rsi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test_andn_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; EGPR-NEXT: xorq $-1, %rax # encoding: [0x48,0x83,0xf0,0xff]
+; EGPR-NEXT: andq %rsi, %rax # encoding: [0x48,0x21,0xf0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%xor = xor i64 %a0, -1
%res = and i64 %xor, %a1
ret i64 %res
@@ -98,6 +142,16 @@ define i64 @test_bextr_u64(i64 %a0, i32 %a1, i32 %a2) {
; X64-NEXT: movl %edx, %eax
; X64-NEXT: bextrq %rax, %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test_bextr_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: andl $255, %esi # encoding: [0x81,0xe6,0xff,0x00,0x00,0x00]
+; EGPR-NEXT: andl $255, %edx # encoding: [0x81,0xe2,0xff,0x00,0x00,0x00]
+; EGPR-NEXT: shll $8, %edx # encoding: [0xc1,0xe2,0x08]
+; EGPR-NEXT: orl %esi, %edx # encoding: [0x09,0xf2]
+; EGPR-NEXT: movl %edx, %eax # encoding: [0x89,0xd0]
+; EGPR-NEXT: bextrq %rax, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf7,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%and1 = and i32 %a1, 255
%and2 = and i32 %a2, 255
%shl = shl i32 %and2, 8
@@ -114,6 +168,13 @@ define i64 @test_blsi_u64(i64 %a0) {
; X64-NEXT: subq %rdi, %rax
; X64-NEXT: andq %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test_blsi_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT: subq %rdi, %rax # encoding: [0x48,0x29,0xf8]
+; EGPR-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
+; EGPR-NEXT: retq # encoding: [0xc3]
%neg = sub i64 0, %a0
%res = and i64 %a0, %neg
ret i64 %res
@@ -125,6 +186,12 @@ define i64 @test_blsmsk_u64(i64 %a0) {
; X64-NEXT: leaq -1(%rdi), %rax
; X64-NEXT: xorq %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test_blsmsk_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: leaq -1(%rdi), %rax # encoding: [0x48,0x8d,0x47,0xff]
+; EGPR-NEXT: xorq %rdi, %rax # encoding: [0x48,0x31,0xf8]
+; EGPR-NEXT: retq # encoding: [0xc3]
%dec = sub i64 %a0, 1
%res = xor i64 %a0, %dec
ret i64 %res
@@ -136,6 +203,12 @@ define i64 @test_blsr_u64(i64 %a0) {
; X64-NEXT: leaq -1(%rdi), %rax
; X64-NEXT: andq %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test_blsr_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: leaq -1(%rdi), %rax # encoding: [0x48,0x8d,0x47,0xff]
+; EGPR-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
+; EGPR-NEXT: retq # encoding: [0xc3]
%dec = sub i64 %a0, 1
%res = and i64 %a0, %dec
ret i64 %res
@@ -146,6 +219,11 @@ define i64 @test_tzcnt_u64(i64 %a0) {
; X64: # %bb.0:
; X64-NEXT: tzcntq %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test_tzcnt_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: tzcntq %rdi, %rax # encoding: [0xf3,0x48,0x0f,0xbc,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%cmp = icmp ne i64 %a0, 0
%cttz = call i64 @llvm.cttz.i64(i64 %a0, i1 false)
ret i64 %cttz
diff --git a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
index 58b894a9da8b6f..7dbd1bba63861e 100644
--- a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64
-
+; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/bmi-builtins.c
;
@@ -23,6 +23,14 @@ define i16 @test__tzcnt_u16(i16 %a0) {
; X64-NEXT: tzcntl %edi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__tzcnt_u16:
+; EGPR: # %bb.0:
+; EGPR-NEXT: orl $65536, %edi # encoding: [0x81,0xcf,0x00,0x00,0x01,0x00]
+; EGPR-NEXT: # imm = 0x10000
+; EGPR-NEXT: tzcntl %edi, %eax # encoding: [0xf3,0x0f,0xbc,0xc7]
+; EGPR-NEXT: # kill: def $ax killed $ax killed $eax
+; EGPR-NEXT: retq # encoding: [0xc3]
%zext = zext i16 %a0 to i32
%cmp = icmp ne i32 %zext, 0
%cttz = call i16 @llvm.cttz.i16(i16 %a0, i1 false)
@@ -43,6 +51,13 @@ define i32 @test__andn_u32(i32 %a0, i32 %a1) {
; X64-NEXT: xorl $-1, %eax
; X64-NEXT: andl %esi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__andn_u32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
+; EGPR-NEXT: xorl $-1, %eax # encoding: [0x83,0xf0,0xff]
+; EGPR-NEXT: andl %esi, %eax # encoding: [0x21,0xf0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%xor = xor i32 %a0, -1
%res = and i32 %xor, %a1
ret i32 %res
@@ -59,6 +74,11 @@ define i32 @test__bextr_u32(i32 %a0, i32 %a1) {
; X64: # %bb.0:
; X64-NEXT: bextrl %esi, %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__bextr_u32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: bextrl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%res = call i32 @llvm.x86.bmi.bextr.32(i32 %a0, i32 %a1)
ret i32 %res
}
@@ -78,6 +98,13 @@ define i32 @test__blsi_u32(i32 %a0) {
; X64-NEXT: subl %edi, %eax
; X64-NEXT: andl %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__blsi_u32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT: subl %edi, %eax # encoding: [0x29,0xf8]
+; EGPR-NEXT: andl %edi, %eax # encoding: [0x21,0xf8]
+; EGPR-NEXT: retq # encoding: [0xc3]
%neg = sub i32 0, %a0
%res = and i32 %a0, %neg
ret i32 %res
@@ -97,6 +124,13 @@ define i32 @test__blsmsk_u32(i32 %a0) {
; X64-NEXT: leal -1(%rdi), %eax
; X64-NEXT: xorl %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__blsmsk_u32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: # kill: def $edi killed $edi def $rdi
+; EGPR-NEXT: leal -1(%rdi), %eax # encoding: [0x8d,0x47,0xff]
+; EGPR-NEXT: xorl %edi, %eax # encoding: [0x31,0xf8]
+; EGPR-NEXT: retq # encoding: [0xc3]
%dec = sub i32 %a0, 1
%res = xor i32 %a0, %dec
ret i32 %res
@@ -116,6 +150,13 @@ define i32 @test__blsr_u32(i32 %a0) {
; X64-NEXT: leal -1(%rdi), %eax
; X64-NEXT: andl %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__blsr_u32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: # kill: def $edi killed $edi def $rdi
+; EGPR-NEXT: leal -1(%rdi), %eax # encoding: [0x8d,0x47,0xff]
+; EGPR-NEXT: andl %edi, %eax # encoding: [0x21,0xf8]
+; EGPR-NEXT: retq # encoding: [0xc3]
%dec = sub i32 %a0, 1
%res = and i32 %a0, %dec
ret i32 %res
@@ -131,6 +172,11 @@ define i32 @test__tzcnt_u32(i32 %a0) {
; X64: # %bb.0:
; X64-NEXT: tzcntl %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__tzcnt_u32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: tzcntl %edi, %eax # encoding: [0xf3,0x0f,0xbc,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%cmp = icmp ne i32 %a0, 0
%cttz = call i32 @llvm.cttz.i32(i32 %a0, i1 false)
ret i32 %cttz
@@ -155,6 +201,14 @@ define i16 @test_tzcnt_u16(i16 %a0) {
; X64-NEXT: tzcntl %edi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test_tzcnt_u16:
+; EGPR: # %bb.0:
+; EGPR-NEXT: orl $65536, %edi # encoding: [0x81,0xcf,0x00,0x00,0x01,0x00]
+; EGPR-NEXT: # imm = 0x10000
+; EGPR-NEXT: tzcntl %edi, %eax # encoding: [0xf3,0x0f,0xbc,0xc7]
+; EGPR-NEXT: # kill: def $ax killed $ax killed $eax
+; EGPR-NEXT: retq # encoding: [0xc3]
%zext = zext i16 %a0 to i32
%cmp = icmp ne i32 %zext, 0
%cttz = call i16 @llvm.cttz.i16(i16 %a0, i1 false)
@@ -175,6 +229,13 @@ define i32 @test_andn_u32(i32 %a0, i32 %a1) {
; X64-NEXT: xorl $-1, %eax
; X64-NEXT: andl %esi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test_andn_u32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
+; EGPR-NEXT: xorl $-1, %eax # encoding: [0x83,0xf0,0xff]
+; EGPR-NEXT: andl %esi, %eax # encoding: [0x21,0xf0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%xor = xor i32 %a0, -1
%res = and i32 %xor, %a1
ret i32 %res
@@ -200,6 +261,15 @@ define i32 @test_bextr_u32(i32 %a0, i32 %a1, i32 %a2) {
; X64-NEXT: orl %esi, %edx
; X64-NEXT: bextrl %edx, %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test_bextr_u32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: andl $255, %esi # encoding: [0x81,0xe6,0xff,0x00,0x00,0x00]
+; EGPR-NEXT: andl $255, %edx # encoding: [0x81,0xe2,0xff,0x00,0x00,0x00]
+; EGPR-NEXT: shll $8, %edx # encoding: [0xc1,0xe2,0x08]
+; EGPR-NEXT: orl %esi, %edx # encoding: [0x09,0xf2]
+; EGPR-NEXT: bextrl %edx, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x68,0xf7,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%and1 = and i32 %a1, 255
%and2 = and i32 %a2, 255
%shl = shl i32 %and2, 8
@@ -223,6 +293,13 @@ define i32 @test_blsi_u32(i32 %a0) {
; X64-NEXT: subl %edi, %eax
; X64-NEXT: andl %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test_blsi_u32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT: subl %edi, %eax # encoding: [0x29,0xf8]
+; EGPR-NEXT: andl %edi, %eax # encoding: [0x21,0xf8]
+; EGPR-NEXT: retq # encoding: [0xc3]
%neg = sub i32 0, %a0
%res = and i32 %a0, %neg
ret i32 %res
@@ -242,6 +319,13 @@ define i32 @test_blsmsk_u32(i32 %a0) {
; X64-NEXT: leal -1(%rdi), %eax
; X64-NEXT: xorl %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test_blsmsk_u32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: # kill: def $edi killed $edi def $rdi
+; EGPR-NEXT: leal -1(%rdi), %eax # encoding: [0x8d,0x47,0xff]
+; EGPR-NEXT: xorl %edi, %eax # encoding: [0x31,0xf8]
+; EGPR-NEXT: retq # encoding: [0xc3]
%dec = sub i32 %a0, 1
%res = xor i32 %a0, %dec
ret i32 %res
@@ -261,6 +345,13 @@ define i32 @test_blsr_u32(i32 %a0) {
; X64-NEXT: leal -1(%rdi), %eax
; X64-NEXT: andl %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test_blsr_u32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: # kill: def $edi killed $edi def $rdi
+; EGPR-NEXT: leal -1(%rdi), %eax # encoding: [0x8d,0x47,0xff]
+; EGPR-NEXT: andl %edi, %eax # encoding: [0x21,0xf8]
+; EGPR-NEXT: retq # encoding: [0xc3]
%dec = sub i32 %a0, 1
%res = and i32 %a0, %dec
ret i32 %res
@@ -276,6 +367,11 @@ define i32 @test_tzcnt_u32(i32 %a0) {
; X64: # %bb.0:
; X64-NEXT: tzcntl %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test_tzcnt_u32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: tzcntl %edi, %eax # encoding: [0xf3,0x0f,0xbc,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%cmp = icmp ne i32 %a0, 0
%cttz = call i32 @llvm.cttz.i32(i32 %a0, i1 false)
ret i32 %cttz
diff --git a/llvm/test/CodeGen/X86/bmi-x86_64.ll b/llvm/test/CodeGen/X86/bmi-x86_64.ll
index df180d0f0235ce..aa571531c0c6aa 100644
--- a/llvm/test/CodeGen/X86/bmi-x86_64.ll
+++ b/llvm/test/CodeGen/X86/bmi-x86_64.ll
@@ -3,6 +3,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,BEXTR-SLOW,BMI2-SLOW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=CHECK,BEXTR-FAST
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+fast-bextr | FileCheck %s --check-prefixes=CHECK,BEXTR-FAST
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
declare i64 @llvm.x86.bmi.bextr.64(i64, i64)
@@ -11,6 +12,11 @@ define i64 @bextr64(i64 %x, i64 %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: bextrq %rsi, %rdi, %rax
; CHECK-NEXT: retq
+;
+; EGPR-LABEL: bextr64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: bextrq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc8,0xf7,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %y)
ret i64 %tmp
}
@@ -28,6 +34,14 @@ define i64 @bextr64b(i64 %x) uwtable ssp {
; BEXTR-FAST-NEXT: movl $3076, %eax # imm = 0xC04
; BEXTR-FAST-NEXT: bextrl %eax, %edi, %eax
; BEXTR-FAST-NEXT: retq
+;
+; EGPR-LABEL: bextr64b:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; EGPR-NEXT: shrl $4, %eax # encoding: [0xc1,0xe8,0x04]
+; EGPR-NEXT: andl $4095, %eax # encoding: [0x25,0xff,0x0f,0x00,0x00]
+; EGPR-NEXT: # imm = 0xFFF
+; EGPR-NEXT: retq # encoding: [0xc3]
%1 = lshr i64 %x, 4
%2 = and i64 %1, 4095
ret i64 %2
@@ -40,6 +54,12 @@ define i64 @bextr64_subreg(i64 %x) uwtable ssp {
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: movzbl %ah, %eax
; CHECK-NEXT: retq
+;
+; EGPR-LABEL: bextr64_subreg:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; EGPR-NEXT: movzbl %ah, %eax # encoding: [0x0f,0xb6,0xc4]
+; EGPR-NEXT: retq # encoding: [0xc3]
%1 = lshr i64 %x, 8
%2 = and i64 %1, 255
ret i64 %2
@@ -58,6 +78,14 @@ define i64 @bextr64b_load(ptr %x) {
; BEXTR-FAST-NEXT: movl $3076, %eax # imm = 0xC04
; BEXTR-FAST-NEXT: bextrl %eax, (%rdi), %eax
; BEXTR-FAST-NEXT: retq
+;
+; EGPR-LABEL: bextr64b_load:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07]
+; EGPR-NEXT: shrl $4, %eax # encoding: [0xc1,0xe8,0x04]
+; EGPR-NEXT: andl $4095, %eax # encoding: [0x25,0xff,0x0f,0x00,0x00]
+; EGPR-NEXT: # imm = 0xFFF
+; EGPR-NEXT: retq # encoding: [0xc3]
%1 = load i64, ptr %x, align 8
%2 = lshr i64 %1, 4
%3 = and i64 %2, 4095
@@ -71,6 +99,12 @@ define i64 @bextr64c(i64 %x, i32 %y) {
; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
; CHECK-NEXT: bextrq %rsi, %rdi, %rax
; CHECK-NEXT: retq
+;
+; EGPR-LABEL: bextr64c:
+; EGPR: # %bb.0:
+; EGPR-NEXT: # kill: def $esi killed $esi def $rsi
+; EGPR-NEXT: bextrq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc8,0xf7,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp0 = sext i32 %y to i64
%tmp1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %tmp0)
ret i64 %tmp1
@@ -96,6 +130,13 @@ define i64 @bextr64d(i64 %a) {
; BEXTR-FAST-NEXT: movl $8450, %eax # imm = 0x2102
; BEXTR-FAST-NEXT: bextrq %rax, %rdi, %rax
; BEXTR-FAST-NEXT: retq
+;
+; EGPR-LABEL: bextr64d:
+; EGPR: # %bb.0: # %entry
+; EGPR-NEXT: movl $35, %eax # encoding: [0xb8,0x23,0x00,0x00,0x00]
+; EGPR-NEXT: bzhiq %rax, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf5,0xc7]
+; EGPR-NEXT: shrq $2, %rax # encoding: [0x48,0xc1,0xe8,0x02]
+; EGPR-NEXT: retq # encoding: [0xc3]
entry:
%shr = lshr i64 %a, 2
%and = and i64 %shr, 8589934591
@@ -123,6 +164,13 @@ define i64 @bextr64d_load(ptr %aptr) {
; BEXTR-FAST-NEXT: movl $8450, %eax # imm = 0x2102
; BEXTR-FAST-NEXT: bextrq %rax, (%rdi), %rax
; BEXTR-FAST-NEXT: retq
+;
+; EGPR-LABEL: bextr64d_load:
+; EGPR: # %bb.0: # %entry
+; EGPR-NEXT: movl $35, %eax # encoding: [0xb8,0x23,0x00,0x00,0x00]
+; EGPR-NEXT: bzhiq %rax, (%rdi), %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf5,0x07]
+; EGPR-NEXT: shrq $2, %rax # encoding: [0x48,0xc1,0xe8,0x02]
+; EGPR-NEXT: retq # encoding: [0xc3]
entry:
%a = load i64, ptr %aptr, align 8
%shr = lshr i64 %a, 2
@@ -137,6 +185,14 @@ define i64 @non_bextr64(i64 %x) {
; CHECK-NEXT: movabsq $8589934590, %rax # imm = 0x1FFFFFFFE
; CHECK-NEXT: andq %rdi, %rax
; CHECK-NEXT: retq
+;
+; EGPR-LABEL: non_bextr64:
+; EGPR: # %bb.0: # %entry
+; EGPR-NEXT: shrq $2, %rdi # encoding: [0x48,0xc1,0xef,0x02]
+; EGPR-NEXT: movabsq $8589934590, %rax # encoding: [0x48,0xb8,0xfe,0xff,0xff,0xff,0x01,0x00,0x00,0x00]
+; EGPR-NEXT: # imm = 0x1FFFFFFFE
+; EGPR-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
+; EGPR-NEXT: retq # encoding: [0xc3]
entry:
%shr = lshr i64 %x, 2
%and = and i64 %shr, 8589934590
diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll
index e4e33c99a6b88a..2683fab59ad1bc 100644
--- a/llvm/test/CodeGen/X86/bmi.ll
+++ b/llvm/test/CodeGen/X86/bmi.ll
@@ -5,6 +5,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=X64,X64-SLOW-BEXTR
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi,+fast-bextr | FileCheck %s --check-prefixes=X86,X86-FAST-BEXTR
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=X64,X64-FAST-BEXTR
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
define i32 @andn32(i32 %x, i32 %y) {
; X86-LABEL: andn32:
@@ -17,6 +18,11 @@ define i32 @andn32(i32 %x, i32 %y) {
; X64: # %bb.0:
; X64-NEXT: andnl %esi, %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: andn32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp1 = xor i32 %x, -1
%tmp2 = and i32 %y, %tmp1
ret i32 %tmp2
@@ -34,6 +40,11 @@ define i32 @andn32_load(i32 %x, ptr %y) {
; X64: # %bb.0:
; X64-NEXT: andnl (%rsi), %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: andn32_load:
+; EGPR: # %bb.0:
+; EGPR-NEXT: andnl (%rsi), %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0x06]
+; EGPR-NEXT: retq # encoding: [0xc3]
%y1 = load i32, ptr %y
%tmp1 = xor i32 %x, -1
%tmp2 = and i32 %y1, %tmp1
@@ -53,6 +64,11 @@ define i64 @andn64(i64 %x, i64 %y) {
; X64: # %bb.0:
; X64-NEXT: andnq %rsi, %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: andn64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: andnq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc0,0xf2,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp1 = xor i64 %x, -1
%tmp2 = and i64 %tmp1, %y
ret i64 %tmp2
@@ -72,6 +88,13 @@ define i1 @andn_cmp(i32 %x, i32 %y) {
; X64-NEXT: andnl %esi, %edi, %eax
; X64-NEXT: sete %al
; X64-NEXT: retq
+;
+; EGPR-LABEL: andn_cmp:
+; EGPR: # %bb.0:
+; EGPR-NEXT: andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
+; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%notx = xor i32 %x, -1
%and = and i32 %notx, %y
%cmp = icmp eq i32 %and, 0
@@ -92,6 +115,13 @@ define i1 @and_cmp1(i32 %x, i32 %y) {
; X64-NEXT: andnl %esi, %edi, %eax
; X64-NEXT: sete %al
; X64-NEXT: retq
+;
+; EGPR-LABEL: and_cmp1:
+; EGPR: # %bb.0:
+; EGPR-NEXT: andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
+; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%and = and i32 %x, %y
%cmp = icmp eq i32 %and, %y
ret i1 %cmp
@@ -110,6 +140,13 @@ define i1 @and_cmp2(i32 %x, i32 %y) {
; X64-NEXT: andnl %esi, %edi, %eax
; X64-NEXT: setne %al
; X64-NEXT: retq
+;
+; EGPR-LABEL: and_cmp2:
+; EGPR: # %bb.0:
+; EGPR-NEXT: andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
+; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%and = and i32 %y, %x
%cmp = icmp ne i32 %and, %y
ret i1 %cmp
@@ -128,6 +165,13 @@ define i1 @and_cmp3(i32 %x, i32 %y) {
; X64-NEXT: andnl %esi, %edi, %eax
; X64-NEXT: sete %al
; X64-NEXT: retq
+;
+; EGPR-LABEL: and_cmp3:
+; EGPR: # %bb.0:
+; EGPR-NEXT: andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
+; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%and = and i32 %x, %y
%cmp = icmp eq i32 %y, %and
ret i1 %cmp
@@ -146,6 +190,13 @@ define i1 @and_cmp4(i32 %x, i32 %y) {
; X64-NEXT: andnl %esi, %edi, %eax
; X64-NEXT: setne %al
; X64-NEXT: retq
+;
+; EGPR-LABEL: and_cmp4:
+; EGPR: # %bb.0:
+; EGPR-NEXT: andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
+; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%and = and i32 %y, %x
%cmp = icmp ne i32 %y, %and
ret i1 %cmp
@@ -168,6 +219,13 @@ define i1 @and_cmp_const(i32 %x) {
; X64-NEXT: testb $43, %dil
; X64-NEXT: sete %al
; X64-NEXT: retq
+;
+; EGPR-LABEL: and_cmp_const:
+; EGPR: # %bb.0:
+; EGPR-NEXT: notl %edi # encoding: [0xf7,0xd7]
+; EGPR-NEXT: testb $43, %dil # encoding: [0x40,0xf6,0xc7,0x2b]
+; EGPR-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%and = and i32 %x, 43
%cmp = icmp eq i32 %and, 43
ret i1 %cmp
@@ -188,6 +246,12 @@ define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) {
; X64-NEXT: btl %esi, %edi
; X64-NEXT: setae %al
; X64-NEXT: retq
+;
+; EGPR-LABEL: and_cmp_const_power_of_two:
+; EGPR: # %bb.0:
+; EGPR-NEXT: btl %esi, %edi # encoding: [0x0f,0xa3,0xf7]
+; EGPR-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%shl = shl i32 1, %y
%and = and i32 %x, %shl
%cmp = icmp ne i32 %and, %shl
@@ -214,6 +278,15 @@ define i32 @and_cmp_not_one_use(i32 %x) {
; X64-NEXT: sete %al
; X64-NEXT: addl %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: and_cmp_not_one_use:
+; EGPR: # %bb.0:
+; EGPR-NEXT: andl $37, %edi # encoding: [0x83,0xe7,0x25]
+; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT: cmpl $37, %edi # encoding: [0x83,0xff,0x25]
+; EGPR-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; EGPR-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
+; EGPR-NEXT: retq # encoding: [0xc3]
%and = and i32 %x, 37
%cmp = icmp eq i32 %and, 37
%ext = zext i1 %cmp to i32
@@ -238,6 +311,13 @@ define i1 @not_an_andn1(i32 %x, i32 %y) {
; X64-NEXT: cmpl %edi, %esi
; X64-NEXT: setg %al
; X64-NEXT: retq
+;
+; EGPR-LABEL: not_an_andn1:
+; EGPR: # %bb.0:
+; EGPR-NEXT: andl %esi, %edi # encoding: [0x21,0xf7]
+; EGPR-NEXT: cmpl %edi, %esi # encoding: [0x39,0xfe]
+; EGPR-NEXT: setg %al # encoding: [0x0f,0x9f,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%and = and i32 %x, %y
%cmp = icmp sgt i32 %y, %and
ret i1 %cmp
@@ -259,6 +339,13 @@ define i1 @not_an_andn2(i32 %x, i32 %y) {
; X64-NEXT: cmpl %edi, %esi
; X64-NEXT: setbe %al
; X64-NEXT: retq
+;
+; EGPR-LABEL: not_an_andn2:
+; EGPR: # %bb.0:
+; EGPR-NEXT: andl %esi, %edi # encoding: [0x21,0xf7]
+; EGPR-NEXT: cmpl %edi, %esi # encoding: [0x39,0xfe]
+; EGPR-NEXT: setbe %al # encoding: [0x0f,0x96,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%and = and i32 %y, %x
%cmp = icmp ule i32 %y, %and
ret i1 %cmp
@@ -281,6 +368,13 @@ define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) {
; X64-NEXT: andnq %rsi, %rdi, %rax
; X64-NEXT: sete %al
; X64-NEXT: retq
+;
+; EGPR-LABEL: andn_cmp_swap_ops:
+; EGPR: # %bb.0:
+; EGPR-NEXT: andnq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc0,0xf2,0xc6]
+; EGPR-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0]
+; EGPR-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%notx = xor i64 %x, -1
%and = and i64 %y, %notx
%cmp = icmp eq i64 %and, 0
@@ -303,6 +397,13 @@ define i1 @andn_cmp_i8(i8 %x, i8 %y) {
; X64-NEXT: testb %sil, %dil
; X64-NEXT: sete %al
; X64-NEXT: retq
+;
+; EGPR-LABEL: andn_cmp_i8:
+; EGPR: # %bb.0:
+; EGPR-NEXT: notb %sil # encoding: [0x40,0xf6,0xd6]
+; EGPR-NEXT: testb %sil, %dil # encoding: [0x40,0x84,0xf7]
+; EGPR-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%noty = xor i8 %y, -1
%and = and i8 %x, %noty
%cmp = icmp eq i8 %and, 0
@@ -323,6 +424,13 @@ define i1 @andn_cmp_i32_overflow(i32 %x, i32 %y) {
; X64-NEXT: andnl %edi, %esi, %eax
; X64-NEXT: setle %al
; X64-NEXT: retq
+;
+; EGPR-LABEL: andn_cmp_i32_overflow:
+; EGPR: # %bb.0:
+; EGPR-NEXT: andnl %edi, %esi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf2,0xc7]
+; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT: setle %al # encoding: [0x0f,0x9e,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%noty = xor i32 %y, -1
%and = and i32 %x, %noty
%cmp = icmp slt i32 %and, 1
@@ -342,6 +450,11 @@ define i32 @bextr32(i32 %x, i32 %y) {
; X64: # %bb.0:
; X64-NEXT: bextrl %esi, %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: bextr32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: bextrl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y)
ret i32 %tmp
}
@@ -358,6 +471,11 @@ define i32 @bextr32_load(ptr %x, i32 %y) {
; X64: # %bb.0:
; X64-NEXT: bextrl %esi, (%rdi), %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: bextr32_load:
+; EGPR: # %bb.0:
+; EGPR-NEXT: bextrl %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0x07]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x1 = load i32, ptr %x
%tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y)
ret i32 %tmp
@@ -389,6 +507,13 @@ define i32 @bextr32b(i32 %x) uwtable ssp {
; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04
; X64-FAST-BEXTR-NEXT: bextrl %eax, %edi, %eax
; X64-FAST-BEXTR-NEXT: retq
+;
+; EGPR-LABEL: bextr32b:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl $3076, %eax # encoding: [0xb8,0x04,0x0c,0x00,0x00]
+; EGPR-NEXT: # imm = 0xC04
+; EGPR-NEXT: bextrl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf7,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%1 = lshr i32 %x, 4
%2 = and i32 %1, 4095
ret i32 %2
@@ -406,6 +531,12 @@ define i32 @bextr32_subreg(i32 %x) uwtable ssp {
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movzbl %ah, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: bextr32_subreg:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
+; EGPR-NEXT: movzbl %ah, %eax # encoding: [0x0f,0xb6,0xc4]
+; EGPR-NEXT: retq # encoding: [0xc3]
%1 = lshr i32 %x, 8
%2 = and i32 %1, 255
ret i32 %2
@@ -439,6 +570,13 @@ define i32 @bextr32b_load(ptr %x) uwtable ssp {
; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04
; X64-FAST-BEXTR-NEXT: bextrl %eax, (%rdi), %eax
; X64-FAST-BEXTR-NEXT: retq
+;
+; EGPR-LABEL: bextr32b_load:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl $3076, %eax # encoding: [0xb8,0x04,0x0c,0x00,0x00]
+; EGPR-NEXT: # imm = 0xC04
+; EGPR-NEXT: bextrl %eax, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf7,0x07]
+; EGPR-NEXT: retq # encoding: [0xc3]
%1 = load i32, ptr %x
%2 = lshr i32 %1, 4
%3 = and i32 %2, 4095
@@ -457,6 +595,11 @@ define i32 @bextr32c(i32 %x, i16 zeroext %y) {
; X64: # %bb.0:
; X64-NEXT: bextrl %esi, %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: bextr32c:
+; EGPR: # %bb.0:
+; EGPR-NEXT: bextrl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp0 = sext i16 %y to i32
%tmp1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %tmp0)
ret i32 %tmp1
@@ -476,6 +619,13 @@ define i32 @non_bextr32(i32 %x) {
; X64-NEXT: shrl $2, %eax
; X64-NEXT: andl $111, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: non_bextr32:
+; EGPR: # %bb.0: # %entry
+; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
+; EGPR-NEXT: shrl $2, %eax # encoding: [0xc1,0xe8,0x02]
+; EGPR-NEXT: andl $111, %eax # encoding: [0x83,0xe0,0x6f]
+; EGPR-NEXT: retq # encoding: [0xc3]
entry:
%shr = lshr i32 %x, 2
%and = and i32 %shr, 111
@@ -492,6 +642,11 @@ define i32 @blsi32(i32 %x) {
; X64: # %bb.0:
; X64-NEXT: blsil %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsi32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsil %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xdf]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = sub i32 0, %x
%tmp2 = and i32 %x, %tmp
ret i32 %tmp2
@@ -508,6 +663,11 @@ define i32 @blsi32_load(ptr %x) {
; X64: # %bb.0:
; X64-NEXT: blsil (%rdi), %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsi32_load:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsil (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0x1f]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x1 = load i32, ptr %x
%tmp = sub i32 0, %x1
%tmp2 = and i32 %x1, %tmp
@@ -529,6 +689,13 @@ define i32 @blsi32_z(i32 %a, i32 %b) nounwind {
; X64-NEXT: blsil %edi, %eax
; X64-NEXT: cmovel %esi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsi32_z:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsil %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xdf]
+; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT: cmovel %esi, %eax # encoding: [0x0f,0x44,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i32 0, %a
%t1 = and i32 %t0, %a
%t2 = icmp eq i32 %t1, 0
@@ -552,6 +719,14 @@ define i32 @blsi32_z2(i32 %a, i32 %b, i32 %c) nounwind {
; X64-NEXT: blsil %edi, %ecx
; X64-NEXT: cmovnel %edx, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsi32_z2:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
+; EGPR-NEXT: blsil %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xdf]
+; EGPR-NEXT: testl %ecx, %ecx # encoding: [0x85,0xc9]
+; EGPR-NEXT: cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i32 0, %a
%t1 = and i32 %t0, %a
%t2 = icmp eq i32 %t1, 0
@@ -577,6 +752,14 @@ define i32 @blsi32_sle(i32 %a, i32 %b, i32 %c) nounwind {
; X64-NEXT: blsil %edi, %ecx
; X64-NEXT: cmovgl %edx, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsi32_sle:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
+; EGPR-NEXT: blsil %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xdf]
+; EGPR-NEXT: testl %ecx, %ecx # encoding: [0x85,0xc9]
+; EGPR-NEXT: cmovgl %edx, %eax # encoding: [0x0f,0x4f,0xc2]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i32 0, %a
%t1 = and i32 %t0, %a
%t2 = icmp sle i32 %t1, 0
@@ -606,6 +789,11 @@ define i64 @blsi64(i64 %x) {
; X64: # %bb.0:
; X64-NEXT: blsiq %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsi64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsiq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xdf]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = sub i64 0, %x
%tmp2 = and i64 %tmp, %x
ret i64 %tmp2
@@ -638,6 +826,13 @@ define i64 @blsi64_z(i64 %a, i64 %b) nounwind {
; X64-NEXT: blsiq %rdi, %rax
; X64-NEXT: cmoveq %rsi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsi64_z:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsiq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xdf]
+; EGPR-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0]
+; EGPR-NEXT: cmoveq %rsi, %rax # encoding: [0x48,0x0f,0x44,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i64 0, %a
%t1 = and i64 %t0, %a
%t2 = icmp eq i64 %t1, 0
@@ -672,6 +867,14 @@ define i64 @blsi64_z2(i64 %a, i64 %b, i64 %c) nounwind {
; X64-NEXT: blsiq %rdi, %rcx
; X64-NEXT: cmovneq %rdx, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsi64_z2:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
+; EGPR-NEXT: blsiq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xdf]
+; EGPR-NEXT: testq %rcx, %rcx # encoding: [0x48,0x85,0xc9]
+; EGPR-NEXT: cmovneq %rdx, %rax # encoding: [0x48,0x0f,0x45,0xc2]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i64 0, %a
%t1 = and i64 %t0, %a
%t2 = icmp eq i64 %t1, 0
@@ -707,6 +910,14 @@ define i64 @blsi64_sle(i64 %a, i64 %b, i64 %c) nounwind {
; X64-NEXT: blsiq %rdi, %rcx
; X64-NEXT: cmovgq %rdx, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsi64_sle:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
+; EGPR-NEXT: blsiq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xdf]
+; EGPR-NEXT: testq %rcx, %rcx # encoding: [0x48,0x85,0xc9]
+; EGPR-NEXT: cmovgq %rdx, %rax # encoding: [0x48,0x0f,0x4f,0xc2]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i64 0, %a
%t1 = and i64 %t0, %a
%t2 = icmp sle i64 %t1, 0
@@ -724,6 +935,11 @@ define i32 @blsmsk32(i32 %x) {
; X64: # %bb.0:
; X64-NEXT: blsmskl %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsmsk32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsmskl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xd7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = sub i32 %x, 1
%tmp2 = xor i32 %x, %tmp
ret i32 %tmp2
@@ -740,6 +956,11 @@ define i32 @blsmsk32_load(ptr %x) {
; X64: # %bb.0:
; X64-NEXT: blsmskl (%rdi), %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsmsk32_load:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsmskl (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0x17]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x1 = load i32, ptr %x
%tmp = sub i32 %x1, 1
%tmp2 = xor i32 %x1, %tmp
@@ -761,6 +982,13 @@ define i32 @blsmsk32_z(i32 %a, i32 %b) nounwind {
; X64-NEXT: blsmskl %edi, %eax
; X64-NEXT: cmovel %esi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsmsk32_z:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsmskl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xd7]
+; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT: cmovel %esi, %eax # encoding: [0x0f,0x44,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i32 %a, 1
%t1 = xor i32 %t0, %a
%t2 = icmp eq i32 %t1, 0
@@ -784,6 +1012,13 @@ define i32 @blsmsk32_z2(i32 %a, i32 %b, i32 %c) nounwind {
; X64-NEXT: blsmskl %edi, %ecx
; X64-NEXT: cmovnel %edx, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsmsk32_z2:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
+; EGPR-NEXT: blsmskl %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xd7]
+; EGPR-NEXT: cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i32 %a, 1
%t1 = xor i32 %t0, %a
%t2 = icmp eq i32 %t1, 0
@@ -807,6 +1042,14 @@ define i32 @blsmsk32_sle(i32 %a, i32 %b, i32 %c) nounwind {
; X64-NEXT: blsmskl %edi, %ecx
; X64-NEXT: cmovgl %edx, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsmsk32_sle:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
+; EGPR-NEXT: blsmskl %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xd7]
+; EGPR-NEXT: testl %ecx, %ecx # encoding: [0x85,0xc9]
+; EGPR-NEXT: cmovgl %edx, %eax # encoding: [0x0f,0x4f,0xc2]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i32 %a, 1
%t1 = xor i32 %t0, %a
%t2 = icmp sle i32 %t1, 0
@@ -836,6 +1079,11 @@ define i64 @blsmsk64(i64 %x) {
; X64: # %bb.0:
; X64-NEXT: blsmskq %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsmsk64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsmskq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xd7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = sub i64 %x, 1
%tmp2 = xor i64 %tmp, %x
ret i64 %tmp2
@@ -868,6 +1116,13 @@ define i64 @blsmsk64_z(i64 %a, i64 %b) nounwind {
; X64-NEXT: blsmskq %rdi, %rax
; X64-NEXT: cmoveq %rsi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsmsk64_z:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsmskq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xd7]
+; EGPR-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0]
+; EGPR-NEXT: cmoveq %rsi, %rax # encoding: [0x48,0x0f,0x44,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i64 %a, 1
%t1 = xor i64 %t0, %a
%t2 = icmp eq i64 %t1, 0
@@ -902,6 +1157,13 @@ define i64 @blsmsk64_z2(i64 %a, i64 %b, i64 %c) nounwind {
; X64-NEXT: blsmskq %rdi, %rcx
; X64-NEXT: cmovneq %rdx, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsmsk64_z2:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
+; EGPR-NEXT: blsmskq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xd7]
+; EGPR-NEXT: cmovneq %rdx, %rax # encoding: [0x48,0x0f,0x45,0xc2]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i64 %a, 1
%t1 = xor i64 %t0, %a
%t2 = icmp eq i64 %t1, 0
@@ -937,6 +1199,14 @@ define i64 @blsmsk64_sle(i64 %a, i64 %b, i64 %c) nounwind {
; X64-NEXT: blsmskq %rdi, %rcx
; X64-NEXT: cmovgq %rdx, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsmsk64_sle:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
+; EGPR-NEXT: blsmskq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xd7]
+; EGPR-NEXT: testq %rcx, %rcx # encoding: [0x48,0x85,0xc9]
+; EGPR-NEXT: cmovgq %rdx, %rax # encoding: [0x48,0x0f,0x4f,0xc2]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i64 %a, 1
%t1 = xor i64 %t0, %a
%t2 = icmp sle i64 %t1, 0
@@ -954,6 +1224,11 @@ define i32 @blsr32(i32 %x) {
; X64: # %bb.0:
; X64-NEXT: blsrl %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsr32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = sub i32 %x, 1
%tmp2 = and i32 %x, %tmp
ret i32 %tmp2
@@ -970,6 +1245,11 @@ define i32 @blsr32_load(ptr %x) {
; X64: # %bb.0:
; X64-NEXT: blsrl (%rdi), %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsr32_load:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsrl (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0x0f]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x1 = load i32, ptr %x
%tmp = sub i32 %x1, 1
%tmp2 = and i32 %x1, %tmp
@@ -991,6 +1271,13 @@ define i32 @blsr32_z(i32 %a, i32 %b) nounwind {
; X64-NEXT: blsrl %edi, %eax
; X64-NEXT: cmovel %esi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsr32_z:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf]
+; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT: cmovel %esi, %eax # encoding: [0x0f,0x44,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i32 %a, 1
%t1 = and i32 %t0, %a
%t2 = icmp eq i32 %t1, 0
@@ -1014,6 +1301,14 @@ define i32 @blsr32_z2(i32 %a, i32 %b, i32 %c) nounwind {
; X64-NEXT: blsrl %edi, %ecx
; X64-NEXT: cmovnel %edx, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsr32_z2:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
+; EGPR-NEXT: blsrl %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xcf]
+; EGPR-NEXT: testl %ecx, %ecx # encoding: [0x85,0xc9]
+; EGPR-NEXT: cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i32 %a, 1
%t1 = and i32 %t0, %a
%t2 = icmp eq i32 %t1, 0
@@ -1037,6 +1332,14 @@ define i32 @blsr32_sle(i32 %a, i32 %b, i32 %c) nounwind {
; X64-NEXT: blsrl %edi, %ecx
; X64-NEXT: cmovgl %edx, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsr32_sle:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
+; EGPR-NEXT: blsrl %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xcf]
+; EGPR-NEXT: testl %ecx, %ecx # encoding: [0x85,0xc9]
+; EGPR-NEXT: cmovgl %edx, %eax # encoding: [0x0f,0x4f,0xc2]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i32 %a, 1
%t1 = and i32 %t0, %a
%t2 = icmp sle i32 %t1, 0
@@ -1066,6 +1369,11 @@ define i64 @blsr64(i64 %x) {
; X64: # %bb.0:
; X64-NEXT: blsrq %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsr64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsrq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xcf]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = sub i64 %x, 1
%tmp2 = and i64 %tmp, %x
ret i64 %tmp2
@@ -1098,6 +1406,13 @@ define i64 @blsr64_z(i64 %a, i64 %b) nounwind {
; X64-NEXT: blsrq %rdi, %rax
; X64-NEXT: cmoveq %rsi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsr64_z:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsrq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xcf]
+; EGPR-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0]
+; EGPR-NEXT: cmoveq %rsi, %rax # encoding: [0x48,0x0f,0x44,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i64 %a, 1
%t1 = and i64 %t0, %a
%t2 = icmp eq i64 %t1, 0
@@ -1132,6 +1447,14 @@ define i64 @blsr64_z2(i64 %a, i64 %b, i64 %c) nounwind {
; X64-NEXT: blsrq %rdi, %rcx
; X64-NEXT: cmovneq %rdx, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsr64_z2:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
+; EGPR-NEXT: blsrq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xcf]
+; EGPR-NEXT: testq %rcx, %rcx # encoding: [0x48,0x85,0xc9]
+; EGPR-NEXT: cmovneq %rdx, %rax # encoding: [0x48,0x0f,0x45,0xc2]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i64 %a, 1
%t1 = and i64 %t0, %a
%t2 = icmp eq i64 %t1, 0
@@ -1167,6 +1490,14 @@ define i64 @blsr64_sle(i64 %a, i64 %b, i64 %c) nounwind {
; X64-NEXT: blsrq %rdi, %rcx
; X64-NEXT: cmovgq %rdx, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsr64_sle:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
+; EGPR-NEXT: blsrq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xcf]
+; EGPR-NEXT: testq %rcx, %rcx # encoding: [0x48,0x85,0xc9]
+; EGPR-NEXT: cmovgq %rdx, %rax # encoding: [0x48,0x0f,0x4f,0xc2]
+; EGPR-NEXT: retq # encoding: [0xc3]
%t0 = sub i64 %a, 1
%t1 = and i64 %t0, %a
%t2 = icmp sle i64 %t1, 0
@@ -1189,6 +1520,12 @@ define i64 @blsr_disguised_constant(i64 %x) {
; X64-NEXT: blsrl %edi, %eax
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsr_disguised_constant:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf]
+; EGPR-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%a1 = and i64 %x, 65535
%a2 = add i64 %x, 65535
%r = and i64 %a1, %a2
@@ -1211,6 +1548,12 @@ define i64 @blsr_disguised_shrunk_add(i64 %x) {
; X64-NEXT: shrq $48, %rdi
; X64-NEXT: blsrl %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsr_disguised_shrunk_add:
+; EGPR: # %bb.0:
+; EGPR-NEXT: shrq $48, %rdi # encoding: [0x48,0xc1,0xef,0x30]
+; EGPR-NEXT: blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf]
+; EGPR-NEXT: retq # encoding: [0xc3]
%a = lshr i64 %x, 48
%b = add i64 %a, -1
%c = and i64 %b, %a
@@ -1234,6 +1577,16 @@ define void @pr40060(i32, i32) {
; X64-NEXT: jns bar # TAILCALL
; X64-NEXT: # %bb.1:
; X64-NEXT: retq
+;
+; EGPR-LABEL: pr40060:
+; EGPR: # %bb.0:
+; EGPR-NEXT: bextrl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0xc7]
+; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT: jns bar # TAILCALL
+; EGPR-NEXT: # encoding: [0x79,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
+; EGPR-NEXT: # %bb.1:
+; EGPR-NEXT: retq # encoding: [0xc3]
%3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %0, i32 %1)
%4 = icmp sgt i32 %3, -1
br i1 %4, label %5, label %6
@@ -1274,6 +1627,23 @@ define i32 @blsr32_branch(i32 %x) {
; X64-NEXT: popq %rbx
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsr32_branch:
+; EGPR: # %bb.0:
+; EGPR-NEXT: pushq %rbx # encoding: [0x53]
+; EGPR-NEXT: .cfi_def_cfa_offset 16
+; EGPR-NEXT: .cfi_offset %rbx, -16
+; EGPR-NEXT: blsrl %edi, %ebx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x60,0xf3,0xcf]
+; EGPR-NEXT: jne .LBB53_2 # encoding: [0x75,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: .LBB53_2-1, kind: FK_PCRel_1
+; EGPR-NEXT: # %bb.1:
+; EGPR-NEXT: callq bar # encoding: [0xe8,A,A,A,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: bar-4, kind: reloc_branch_4byte_pcrel
+; EGPR-NEXT: .LBB53_2:
+; EGPR-NEXT: movl %ebx, %eax # encoding: [0x89,0xd8]
+; EGPR-NEXT: popq %rbx # encoding: [0x5b]
+; EGPR-NEXT: .cfi_def_cfa_offset 8
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = sub i32 %x, 1
%tmp2 = and i32 %x, %tmp
%cmp = icmp eq i32 %tmp2, 0
@@ -1329,6 +1699,23 @@ define i64 @blsr64_branch(i64 %x) {
; X64-NEXT: popq %rbx
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsr64_branch:
+; EGPR: # %bb.0:
+; EGPR-NEXT: pushq %rbx # encoding: [0x53]
+; EGPR-NEXT: .cfi_def_cfa_offset 16
+; EGPR-NEXT: .cfi_offset %rbx, -16
+; EGPR-NEXT: blsrq %rdi, %rbx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe0,0xf3,0xcf]
+; EGPR-NEXT: jne .LBB54_2 # encoding: [0x75,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: .LBB54_2-1, kind: FK_PCRel_1
+; EGPR-NEXT: # %bb.1:
+; EGPR-NEXT: callq bar # encoding: [0xe8,A,A,A,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: bar-4, kind: reloc_branch_4byte_pcrel
+; EGPR-NEXT: .LBB54_2:
+; EGPR-NEXT: movq %rbx, %rax # encoding: [0x48,0x89,0xd8]
+; EGPR-NEXT: popq %rbx # encoding: [0x5b]
+; EGPR-NEXT: .cfi_def_cfa_offset 8
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = sub i64 %x, 1
%tmp2 = and i64 %x, %tmp
%cmp = icmp eq i64 %tmp2, 0
@@ -1369,6 +1756,23 @@ define i32 @blsi32_branch(i32 %x) {
; X64-NEXT: popq %rbx
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsi32_branch:
+; EGPR: # %bb.0:
+; EGPR-NEXT: pushq %rbx # encoding: [0x53]
+; EGPR-NEXT: .cfi_def_cfa_offset 16
+; EGPR-NEXT: .cfi_offset %rbx, -16
+; EGPR-NEXT: blsil %edi, %ebx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x60,0xf3,0xdf]
+; EGPR-NEXT: jne .LBB55_2 # encoding: [0x75,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: .LBB55_2-1, kind: FK_PCRel_1
+; EGPR-NEXT: # %bb.1:
+; EGPR-NEXT: callq bar # encoding: [0xe8,A,A,A,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: bar-4, kind: reloc_branch_4byte_pcrel
+; EGPR-NEXT: .LBB55_2:
+; EGPR-NEXT: movl %ebx, %eax # encoding: [0x89,0xd8]
+; EGPR-NEXT: popq %rbx # encoding: [0x5b]
+; EGPR-NEXT: .cfi_def_cfa_offset 8
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = sub i32 0, %x
%tmp2 = and i32 %x, %tmp
%cmp = icmp eq i32 %tmp2, 0
@@ -1424,6 +1828,23 @@ define i64 @blsi64_branch(i64 %x) {
; X64-NEXT: popq %rbx
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsi64_branch:
+; EGPR: # %bb.0:
+; EGPR-NEXT: pushq %rbx # encoding: [0x53]
+; EGPR-NEXT: .cfi_def_cfa_offset 16
+; EGPR-NEXT: .cfi_offset %rbx, -16
+; EGPR-NEXT: blsiq %rdi, %rbx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe0,0xf3,0xdf]
+; EGPR-NEXT: jne .LBB56_2 # encoding: [0x75,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: .LBB56_2-1, kind: FK_PCRel_1
+; EGPR-NEXT: # %bb.1:
+; EGPR-NEXT: callq bar # encoding: [0xe8,A,A,A,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: bar-4, kind: reloc_branch_4byte_pcrel
+; EGPR-NEXT: .LBB56_2:
+; EGPR-NEXT: movq %rbx, %rax # encoding: [0x48,0x89,0xd8]
+; EGPR-NEXT: popq %rbx # encoding: [0x5b]
+; EGPR-NEXT: .cfi_def_cfa_offset 8
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = sub i64 0, %x
%tmp2 = and i64 %x, %tmp
%cmp = icmp eq i64 %tmp2, 0
@@ -1450,6 +1871,16 @@ define void @pr42118_i32(i32 %x) {
; X64-NEXT: je bar # TAILCALL
; X64-NEXT: # %bb.1:
; X64-NEXT: retq
+;
+; EGPR-LABEL: pr42118_i32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf]
+; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT: je bar # TAILCALL
+; EGPR-NEXT: # encoding: [0x74,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
+; EGPR-NEXT: # %bb.1:
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = sub i32 0, %x
%tmp1 = and i32 %tmp, %x
%cmp = icmp eq i32 %tmp1, %x
@@ -1493,6 +1924,16 @@ define void @pr42118_i64(i64 %x) {
; X64-NEXT: je bar # TAILCALL
; X64-NEXT: # %bb.1:
; X64-NEXT: retq
+;
+; EGPR-LABEL: pr42118_i64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsrq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xcf]
+; EGPR-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0]
+; EGPR-NEXT: je bar # TAILCALL
+; EGPR-NEXT: # encoding: [0x74,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
+; EGPR-NEXT: # %bb.1:
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = sub i64 0, %x
%tmp1 = and i64 %tmp, %x
%cmp = icmp eq i64 %tmp1, %x
@@ -1522,6 +1963,13 @@ define i32 @blsi_cflag_32(i32 %x, i32 %y) nounwind {
; X64-NEXT: blsil %edi, %eax
; X64-NEXT: cmovael %esi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsi_cflag_32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsil %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xdf]
+; EGPR-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
+; EGPR-NEXT: cmovel %esi, %eax # encoding: [0x0f,0x44,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tobool = icmp eq i32 %x, 0
%sub = sub nsw i32 0, %x
%and = and i32 %sub, %x
@@ -1560,6 +2008,13 @@ define i64 @blsi_cflag_64(i64 %x, i64 %y) nounwind {
; X64-NEXT: blsiq %rdi, %rax
; X64-NEXT: cmovaeq %rsi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: blsi_cflag_64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: blsiq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xdf]
+; EGPR-NEXT: testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
+; EGPR-NEXT: cmoveq %rsi, %rax # encoding: [0x48,0x0f,0x44,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tobool = icmp eq i64 %x, 0
%sub = sub nsw i64 0, %x
%and = and i64 %sub, %x
diff --git a/llvm/test/CodeGen/X86/bmi2-x86_64.ll b/llvm/test/CodeGen/X86/bmi2-x86_64.ll
index 214c4eeb1f4c59..fa1c67986e11f2 100644
--- a/llvm/test/CodeGen/X86/bmi2-x86_64.ll
+++ b/llvm/test/CodeGen/X86/bmi2-x86_64.ll
@@ -1,11 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,egpr --show-mc-encoding | FileCheck %s --check-prefixes=EGPR
define i64 @bzhi64(i64 %x, i64 %y) {
; CHECK-LABEL: bzhi64:
; CHECK: # %bb.0:
; CHECK-NEXT: bzhiq %rsi, %rdi, %rax
; CHECK-NEXT: retq
+;
+; EGPR-LABEL: bzhi64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: bzhiq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc8,0xf5,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %x, i64 %y)
ret i64 %tmp
}
@@ -15,6 +21,11 @@ define i64 @bzhi64_load(ptr %x, i64 %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: bzhiq %rsi, (%rdi), %rax
; CHECK-NEXT: retq
+;
+; EGPR-LABEL: bzhi64_load:
+; EGPR: # %bb.0:
+; EGPR-NEXT: bzhiq %rsi, (%rdi), %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc8,0xf5,0x07]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x1 = load i64, ptr %x
%tmp = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %x1, i64 %y)
ret i64 %tmp
@@ -27,6 +38,11 @@ define i64 @pdep64(i64 %x, i64 %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: pdepq %rsi, %rdi, %rax
; CHECK-NEXT: retq
+;
+; EGPR-LABEL: pdep64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: pdepq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc3,0xf5,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 %y)
ret i64 %tmp
}
@@ -36,6 +52,11 @@ define i64 @pdep64_load(i64 %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: pdepq (%rsi), %rdi, %rax
; CHECK-NEXT: retq
+;
+; EGPR-LABEL: pdep64_load:
+; EGPR: # %bb.0:
+; EGPR-NEXT: pdepq (%rsi), %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc3,0xf5,0x06]
+; EGPR-NEXT: retq # encoding: [0xc3]
%y1 = load i64, ptr %y
%tmp = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 %y1)
ret i64 %tmp
@@ -48,6 +69,14 @@ define i64 @pdep64_anyext(i32 %x) {
; CHECK-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
; CHECK-NEXT: pdepq %rax, %rdi, %rax
; CHECK-NEXT: retq
+;
+; EGPR-LABEL: pdep64_anyext:
+; EGPR: # %bb.0:
+; EGPR-NEXT: # kill: def $edi killed $edi def $rdi
+; EGPR-NEXT: movabsq $6148914691236517205, %rax # encoding: [0x48,0xb8,0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55]
+; EGPR-NEXT: # imm = 0x5555555555555555
+; EGPR-NEXT: pdepq %rax, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc3,0xf5,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x1 = sext i32 %x to i64
%tmp = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x1, i64 6148914691236517205)
ret i64 %tmp
@@ -60,6 +89,11 @@ define i64 @pext64(i64 %x, i64 %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: pextq %rsi, %rdi, %rax
; CHECK-NEXT: retq
+;
+; EGPR-LABEL: pext64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: pextq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc2,0xf5,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 %y)
ret i64 %tmp
}
@@ -69,6 +103,11 @@ define i64 @pext64_load(i64 %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: pextq (%rsi), %rdi, %rax
; CHECK-NEXT: retq
+;
+; EGPR-LABEL: pext64_load:
+; EGPR: # %bb.0:
+; EGPR-NEXT: pextq (%rsi), %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc2,0xf5,0x06]
+; EGPR-NEXT: retq # encoding: [0xc3]
%y1 = load i64, ptr %y
%tmp = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 %y1)
ret i64 %tmp
@@ -80,6 +119,13 @@ define i64 @pext64_knownbits(i64 %x, i64 %y) {
; CHECK-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
; CHECK-NEXT: pextq %rax, %rdi, %rax
; CHECK-NEXT: retq
+;
+; EGPR-LABEL: pext64_knownbits:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movabsq $6148914691236517205, %rax # encoding: [0x48,0xb8,0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55]
+; EGPR-NEXT: # imm = 0x5555555555555555
+; EGPR-NEXT: pextq %rax, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc2,0xf5,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 6148914691236517205)
%tmp2 = and i64 %tmp, 4294967295
ret i64 %tmp2
@@ -95,6 +141,14 @@ define i64 @mulx64(i64 %x, i64 %y, ptr %p) {
; CHECK-NEXT: mulxq %rsi, %rax, %rdx
; CHECK-NEXT: movq %rdx, (%rcx)
; CHECK-NEXT: retq
+;
+; EGPR-LABEL: mulx64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movq %rdx, %rcx # encoding: [0x48,0x89,0xd1]
+; EGPR-NEXT: movq %rdi, %rdx # encoding: [0x48,0x89,0xfa]
+; EGPR-NEXT: mulxq %rsi, %rax, %rdx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfb,0xf6,0xd6]
+; EGPR-NEXT: movq %rdx, (%rcx) # encoding: [0x48,0x89,0x11]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x1 = zext i64 %x to i128
%y1 = zext i64 %y to i128
%r1 = mul i128 %x1, %y1
@@ -113,6 +167,14 @@ define i64 @mulx64_load(i64 %x, ptr %y, ptr %p) {
; CHECK-NEXT: mulxq (%rsi), %rax, %rdx
; CHECK-NEXT: movq %rdx, (%rcx)
; CHECK-NEXT: retq
+;
+; EGPR-LABEL: mulx64_load:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movq %rdx, %rcx # encoding: [0x48,0x89,0xd1]
+; EGPR-NEXT: movq %rdi, %rdx # encoding: [0x48,0x89,0xfa]
+; EGPR-NEXT: mulxq (%rsi), %rax, %rdx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfb,0xf6,0x16]
+; EGPR-NEXT: movq %rdx, (%rcx) # encoding: [0x48,0x89,0x11]
+; EGPR-NEXT: retq # encoding: [0xc3]
%y1 = load i64, ptr %y
%x2 = zext i64 %x to i128
%y2 = zext i64 %y1 to i128
diff --git a/llvm/test/CodeGen/X86/bmi2.ll b/llvm/test/CodeGen/X86/bmi2.ll
index 24e38cfeb704df..cabeebb0c3f366 100644
--- a/llvm/test/CodeGen/X86/bmi2.ll
+++ b/llvm/test/CodeGen/X86/bmi2.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2,+cmov | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
define i32 @bzhi32(i32 %x, i32 %y) {
; X86-LABEL: bzhi32:
@@ -16,6 +17,12 @@ define i32 @bzhi32(i32 %x, i32 %y) {
; X64-NEXT: addl %edi, %edi
; X64-NEXT: bzhil %esi, %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: bzhi32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: addl %edi, %edi # encoding: [0x01,0xff]
+; EGPR-NEXT: bzhil %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf5,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x1 = add i32 %x, %x
%tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y)
ret i32 %tmp
@@ -33,6 +40,11 @@ define i32 @bzhi32_load(ptr %x, i32 %y) {
; X64: # %bb.0:
; X64-NEXT: bzhil %esi, (%rdi), %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: bzhi32_load:
+; EGPR: # %bb.0:
+; EGPR-NEXT: bzhil %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf5,0x07]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x1 = load i32, ptr %x
%tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y)
ret i32 %tmp
@@ -52,6 +64,13 @@ define i1 @bzhi32_overflow(i32 %x, i32 %y) {
; X64-NEXT: bzhil %esi, %edi, %eax
; X64-NEXT: setle %al
; X64-NEXT: retq
+;
+; EGPR-LABEL: bzhi32_overflow:
+; EGPR: # %bb.0:
+; EGPR-NEXT: bzhil %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf5,0xc7]
+; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT: setle %al # encoding: [0x0f,0x9e,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x, i32 %y)
%cmp = icmp slt i32 %tmp, 1
ret i1 %cmp
@@ -73,6 +92,12 @@ define i32 @pdep32(i32 %x, i32 %y) {
; X64-NEXT: addl %esi, %esi
; X64-NEXT: pdepl %esi, %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: pdep32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: addl %esi, %esi # encoding: [0x01,0xf6]
+; EGPR-NEXT: pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%y1 = add i32 %y, %y
%tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1)
ret i32 %tmp
@@ -90,6 +115,11 @@ define i32 @pdep32_load(i32 %x, ptr %y) {
; X64: # %bb.0:
; X64-NEXT: pdepl (%rsi), %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: pdep32_load:
+; EGPR: # %bb.0:
+; EGPR-NEXT: pdepl (%rsi), %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0x06]
+; EGPR-NEXT: retq # encoding: [0xc3]
%y1 = load i32, ptr %y
%tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1)
ret i32 %tmp
@@ -108,6 +138,13 @@ define i32 @pdep32_anyext(i16 %x) {
; X64-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA
; X64-NEXT: pdepl %eax, %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: pdep32_anyext:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl $-1431655766, %eax # encoding: [0xb8,0xaa,0xaa,0xaa,0xaa]
+; EGPR-NEXT: # imm = 0xAAAAAAAA
+; EGPR-NEXT: pdepl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x1 = sext i16 %x to i32
%tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x1, i32 -1431655766)
ret i32 %tmp
@@ -126,6 +163,13 @@ define i32 @pdep32_demandedbits(i32 %x) {
; X64-NEXT: movl $1431655765, %eax # imm = 0x55555555
; X64-NEXT: pdepl %eax, %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: pdep32_demandedbits:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl $1431655765, %eax # encoding: [0xb8,0x55,0x55,0x55,0x55]
+; EGPR-NEXT: # imm = 0x55555555
+; EGPR-NEXT: pdepl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 1431655765)
%tmp2 = and i32 %tmp, 1431655765
ret i32 %tmp2
@@ -144,6 +188,12 @@ define i32 @pdep32_demandedbits2(i32 %x, i32 %y) {
; X64-NEXT: pdepl %esi, %edi, %eax
; X64-NEXT: andl $128, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: pdep32_demandedbits2:
+; EGPR: # %bb.0:
+; EGPR-NEXT: pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
+; EGPR-NEXT: andl $128, %eax # encoding: [0x25,0x80,0x00,0x00,0x00]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = and i32 %x, 255
%tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %tmp, i32 %y)
%tmp3 = and i32 %tmp2, 128
@@ -164,6 +214,13 @@ define i32 @pdep32_demandedbits_mask(i32 %x, i16 %y) {
; X64-NEXT: pdepl %esi, %edi, %eax
; X64-NEXT: andl $32768, %eax # imm = 0x8000
; X64-NEXT: retq
+;
+; EGPR-LABEL: pdep32_demandedbits_mask:
+; EGPR: # %bb.0:
+; EGPR-NEXT: pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
+; EGPR-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
+; EGPR-NEXT: # imm = 0x8000
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = sext i16 %y to i32
%tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %tmp)
%tmp3 = and i32 %tmp2, 32768
@@ -184,6 +241,12 @@ define i32 @pdep32_demandedbits_mask2(i32 %x, i16 %y) {
; X64-NEXT: pdepl %esi, %edi, %eax
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: pdep32_demandedbits_mask2:
+; EGPR: # %bb.0:
+; EGPR-NEXT: pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
+; EGPR-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = sext i16 %y to i32
%tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %tmp)
%tmp3 = and i32 %tmp2, 65535
@@ -205,6 +268,14 @@ define i32 @pdep32_knownbits(i32 %x) {
; X64-NEXT: pdepl %eax, %edi, %eax
; X64-NEXT: imull %eax, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: pdep32_knownbits:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl $1431655765, %eax # encoding: [0xb8,0x55,0x55,0x55,0x55]
+; EGPR-NEXT: # imm = 0x55555555
+; EGPR-NEXT: pdepl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc0]
+; EGPR-NEXT: imull %eax, %eax # encoding: [0x0f,0xaf,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 1431655765)
%tmp2 = and i32 %tmp, 1431655765
%tmp3 = mul i32 %tmp, %tmp2
@@ -226,6 +297,13 @@ define i32 @pdep32_knownbits2(i32 %x, i32 %y) {
; X64-NEXT: pdepl %esi, %edi, %eax
; X64-NEXT: imull %eax, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: pdep32_knownbits2:
+; EGPR: # %bb.0:
+; EGPR-NEXT: andl $-256, %edi # encoding: [0x81,0xe7,0x00,0xff,0xff,0xff]
+; EGPR-NEXT: pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
+; EGPR-NEXT: imull %eax, %eax # encoding: [0x0f,0xaf,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = and i32 %x, -256
%tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %tmp, i32 %y)
%tmp3 = and i32 %tmp2, -256
@@ -249,6 +327,12 @@ define i32 @pext32(i32 %x, i32 %y) {
; X64-NEXT: addl %esi, %esi
; X64-NEXT: pextl %esi, %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: pext32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: addl %esi, %esi # encoding: [0x01,0xf6]
+; EGPR-NEXT: pextl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf5,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%y1 = add i32 %y, %y
%tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1)
ret i32 %tmp
@@ -266,6 +350,11 @@ define i32 @pext32_load(i32 %x, ptr %y) {
; X64: # %bb.0:
; X64-NEXT: pextl (%rsi), %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: pext32_load:
+; EGPR: # %bb.0:
+; EGPR-NEXT: pextl (%rsi), %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf5,0x06]
+; EGPR-NEXT: retq # encoding: [0xc3]
%y1 = load i32, ptr %y
%tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1)
ret i32 %tmp
@@ -284,6 +373,13 @@ define i32 @pext32_knownbits(i32 %x) {
; X64-NEXT: movl $1431655765, %eax # imm = 0x55555555
; X64-NEXT: pextl %eax, %edi, %eax
; X64-NEXT: retq
+;
+; EGPR-LABEL: pext32_knownbits:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl $1431655765, %eax # encoding: [0xb8,0x55,0x55,0x55,0x55]
+; EGPR-NEXT: # imm = 0x55555555
+; EGPR-NEXT: pextl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf5,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 1431655765)
%tmp2 = and i32 %tmp, 65535
ret i32 %tmp2
@@ -315,6 +411,19 @@ define i32 @mulx32(i32 %x, i32 %y, ptr %p) {
; X64-NEXT: movl %ecx, (%rdx)
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: mulx32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: # kill: def $esi killed $esi def $rsi
+; EGPR-NEXT: # kill: def $edi killed $edi def $rdi
+; EGPR-NEXT: addl %edi, %edi # encoding: [0x01,0xff]
+; EGPR-NEXT: leal (%rsi,%rsi), %eax # encoding: [0x8d,0x04,0x36]
+; EGPR-NEXT: imulq %rdi, %rax # encoding: [0x48,0x0f,0xaf,0xc7]
+; EGPR-NEXT: movq %rax, %rcx # encoding: [0x48,0x89,0xc1]
+; EGPR-NEXT: shrq $32, %rcx # encoding: [0x48,0xc1,0xe9,0x20]
+; EGPR-NEXT: movl %ecx, (%rdx) # encoding: [0x89,0x0a]
+; EGPR-NEXT: # kill: def $eax killed $eax killed $rax
+; EGPR-NEXT: retq # encoding: [0xc3]
%x1 = add i32 %x, %x
%y1 = add i32 %y, %y
%x2 = zext i32 %x1 to i64
@@ -349,6 +458,18 @@ define i32 @mulx32_load(i32 %x, ptr %y, ptr %p) {
; X64-NEXT: movl %ecx, (%rdx)
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: mulx32_load:
+; EGPR: # %bb.0:
+; EGPR-NEXT: # kill: def $edi killed $edi def $rdi
+; EGPR-NEXT: leal (%rdi,%rdi), %eax # encoding: [0x8d,0x04,0x3f]
+; EGPR-NEXT: movl (%rsi), %ecx # encoding: [0x8b,0x0e]
+; EGPR-NEXT: imulq %rcx, %rax # encoding: [0x48,0x0f,0xaf,0xc1]
+; EGPR-NEXT: movq %rax, %rcx # encoding: [0x48,0x89,0xc1]
+; EGPR-NEXT: shrq $32, %rcx # encoding: [0x48,0xc1,0xe9,0x20]
+; EGPR-NEXT: movl %ecx, (%rdx) # encoding: [0x89,0x0a]
+; EGPR-NEXT: # kill: def $eax killed $eax killed $rax
+; EGPR-NEXT: retq # encoding: [0xc3]
%x1 = add i32 %x, %x
%y1 = load i32, ptr %y
%x2 = zext i32 %x1 to i64
diff --git a/llvm/test/CodeGen/X86/shift-bmi2.ll b/llvm/test/CodeGen/X86/shift-bmi2.ll
index db00e1c49dca7b..bb0213891c9768 100644
--- a/llvm/test/CodeGen/X86/shift-bmi2.ll
+++ b/llvm/test/CodeGen/X86/shift-bmi2.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=i386-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI2 %s
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI264 %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 -mattr=+egpr --show-mc-encoding < %s | FileCheck --check-prefix=EGPR %s
define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone {
; BMI2-LABEL: shl32:
@@ -13,6 +14,11 @@ define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone {
; BMI264: # %bb.0:
; BMI264-NEXT: shlxl %esi, %edi, %eax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: shl32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: shlxl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x49,0xf7,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%shl = shl i32 %x, %shamt
ret i32 %shl
}
@@ -29,6 +35,12 @@ define i32 @shl32i(i32 %x) nounwind uwtable readnone {
; BMI264-NEXT: movl %edi, %eax
; BMI264-NEXT: shll $5, %eax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: shl32i:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
+; EGPR-NEXT: shll $5, %eax # encoding: [0xc1,0xe0,0x05]
+; EGPR-NEXT: retq # encoding: [0xc3]
%shl = shl i32 %x, 5
ret i32 %shl
}
@@ -45,6 +57,11 @@ define i32 @shl32p(ptr %p, i32 %shamt) nounwind uwtable readnone {
; BMI264: # %bb.0:
; BMI264-NEXT: shlxl %esi, (%rdi), %eax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: shl32p:
+; EGPR: # %bb.0:
+; EGPR-NEXT: shlxl %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x49,0xf7,0x07]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x = load i32, ptr %p
%shl = shl i32 %x, %shamt
ret i32 %shl
@@ -63,6 +80,12 @@ define i32 @shl32pi(ptr %p) nounwind uwtable readnone {
; BMI264-NEXT: movl (%rdi), %eax
; BMI264-NEXT: shll $5, %eax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: shl32pi:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07]
+; EGPR-NEXT: shll $5, %eax # encoding: [0xc1,0xe0,0x05]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x = load i32, ptr %p
%shl = shl i32 %x, 5
ret i32 %shl
@@ -91,6 +114,11 @@ define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone {
; BMI264: # %bb.0:
; BMI264-NEXT: shlxq %rsi, %rdi, %rax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: shl64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: shlxq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc9,0xf7,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%shl = shl i64 %x, %shamt
ret i64 %shl
}
@@ -109,6 +137,12 @@ define i64 @shl64i(i64 %x) nounwind uwtable readnone {
; BMI264-NEXT: movq %rdi, %rax
; BMI264-NEXT: shlq $7, %rax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: shl64i:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; EGPR-NEXT: shlq $7, %rax # encoding: [0x48,0xc1,0xe0,0x07]
+; EGPR-NEXT: retq # encoding: [0xc3]
%shl = shl i64 %x, 7
ret i64 %shl
}
@@ -137,6 +171,11 @@ define i64 @shl64p(ptr %p, i64 %shamt) nounwind uwtable readnone {
; BMI264: # %bb.0:
; BMI264-NEXT: shlxq %rsi, (%rdi), %rax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: shl64p:
+; EGPR: # %bb.0:
+; EGPR-NEXT: shlxq %rsi, (%rdi), %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc9,0xf7,0x07]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x = load i64, ptr %p
%shl = shl i64 %x, %shamt
ret i64 %shl
@@ -157,6 +196,12 @@ define i64 @shl64pi(ptr %p) nounwind uwtable readnone {
; BMI264-NEXT: movq (%rdi), %rax
; BMI264-NEXT: shlq $7, %rax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: shl64pi:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07]
+; EGPR-NEXT: shlq $7, %rax # encoding: [0x48,0xc1,0xe0,0x07]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x = load i64, ptr %p
%shl = shl i64 %x, 7
ret i64 %shl
@@ -173,6 +218,11 @@ define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
; BMI264: # %bb.0:
; BMI264-NEXT: shrxl %esi, %edi, %eax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: lshr32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: shrxl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x4b,0xf7,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%shl = lshr i32 %x, %shamt
ret i32 %shl
}
@@ -189,6 +239,11 @@ define i32 @lshr32p(ptr %p, i32 %shamt) nounwind uwtable readnone {
; BMI264: # %bb.0:
; BMI264-NEXT: shrxl %esi, (%rdi), %eax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: lshr32p:
+; EGPR: # %bb.0:
+; EGPR-NEXT: shrxl %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x4b,0xf7,0x07]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x = load i32, ptr %p
%shl = lshr i32 %x, %shamt
ret i32 %shl
@@ -217,6 +272,11 @@ define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
; BMI264: # %bb.0:
; BMI264-NEXT: shrxq %rsi, %rdi, %rax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: lshr64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: shrxq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xcb,0xf7,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%shl = lshr i64 %x, %shamt
ret i64 %shl
}
@@ -245,6 +305,11 @@ define i64 @lshr64p(ptr %p, i64 %shamt) nounwind uwtable readnone {
; BMI264: # %bb.0:
; BMI264-NEXT: shrxq %rsi, (%rdi), %rax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: lshr64p:
+; EGPR: # %bb.0:
+; EGPR-NEXT: shrxq %rsi, (%rdi), %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xcb,0xf7,0x07]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x = load i64, ptr %p
%shl = lshr i64 %x, %shamt
ret i64 %shl
@@ -261,6 +326,11 @@ define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
; BMI264: # %bb.0:
; BMI264-NEXT: sarxl %esi, %edi, %eax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: ashr32:
+; EGPR: # %bb.0:
+; EGPR-NEXT: sarxl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x4a,0xf7,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%shl = ashr i32 %x, %shamt
ret i32 %shl
}
@@ -277,6 +347,11 @@ define i32 @ashr32p(ptr %p, i32 %shamt) nounwind uwtable readnone {
; BMI264: # %bb.0:
; BMI264-NEXT: sarxl %esi, (%rdi), %eax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: ashr32p:
+; EGPR: # %bb.0:
+; EGPR-NEXT: sarxl %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x4a,0xf7,0x07]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x = load i32, ptr %p
%shl = ashr i32 %x, %shamt
ret i32 %shl
@@ -305,6 +380,11 @@ define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
; BMI264: # %bb.0:
; BMI264-NEXT: sarxq %rsi, %rdi, %rax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: ashr64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: sarxq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xca,0xf7,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%shl = ashr i64 %x, %shamt
ret i64 %shl
}
@@ -333,6 +413,11 @@ define i64 @ashr64p(ptr %p, i64 %shamt) nounwind uwtable readnone {
; BMI264: # %bb.0:
; BMI264-NEXT: sarxq %rsi, (%rdi), %rax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: ashr64p:
+; EGPR: # %bb.0:
+; EGPR-NEXT: sarxq %rsi, (%rdi), %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xca,0xf7,0x07]
+; EGPR-NEXT: retq # encoding: [0xc3]
%x = load i64, ptr %p
%shl = ashr i64 %x, %shamt
ret i64 %shl
@@ -349,6 +434,11 @@ define i32 @shl32and(i32 %t, i32 %val) nounwind {
; BMI264: # %bb.0:
; BMI264-NEXT: shlxl %edi, %esi, %eax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: shl32and:
+; EGPR: # %bb.0:
+; EGPR-NEXT: shlxl %edi, %esi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x41,0xf7,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%shamt = and i32 %t, 31
%res = shl i32 %val, %shamt
ret i32 %res
@@ -374,6 +464,11 @@ define i64 @shl64and(i64 %t, i64 %val) nounwind {
; BMI264: # %bb.0:
; BMI264-NEXT: shlxq %rdi, %rsi, %rax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: shl64and:
+; EGPR: # %bb.0:
+; EGPR-NEXT: shlxq %rdi, %rsi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc1,0xf7,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%shamt = and i64 %t, 63
%res = shl i64 %val, %shamt
ret i64 %res
@@ -390,6 +485,11 @@ define i32 @lshr32and(i32 %t, i32 %val) nounwind {
; BMI264: # %bb.0:
; BMI264-NEXT: shrxl %edi, %esi, %eax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: lshr32and:
+; EGPR: # %bb.0:
+; EGPR-NEXT: shrxl %edi, %esi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf7,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%shamt = and i32 %t, 31
%res = lshr i32 %val, %shamt
ret i32 %res
@@ -415,6 +515,11 @@ define i64 @lshr64and(i64 %t, i64 %val) nounwind {
; BMI264: # %bb.0:
; BMI264-NEXT: shrxq %rdi, %rsi, %rax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: lshr64and:
+; EGPR: # %bb.0:
+; EGPR-NEXT: shrxq %rdi, %rsi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc3,0xf7,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%shamt = and i64 %t, 63
%res = lshr i64 %val, %shamt
ret i64 %res
@@ -431,6 +536,11 @@ define i32 @ashr32and(i32 %t, i32 %val) nounwind {
; BMI264: # %bb.0:
; BMI264-NEXT: sarxl %edi, %esi, %eax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: ashr32and:
+; EGPR: # %bb.0:
+; EGPR-NEXT: sarxl %edi, %esi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf7,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%shamt = and i32 %t, 31
%res = ashr i32 %val, %shamt
ret i32 %res
@@ -456,6 +566,11 @@ define i64 @ashr64and(i64 %t, i64 %val) nounwind {
; BMI264: # %bb.0:
; BMI264-NEXT: sarxq %rdi, %rsi, %rax
; BMI264-NEXT: retq
+;
+; EGPR-LABEL: ashr64and:
+; EGPR: # %bb.0:
+; EGPR-NEXT: sarxq %rdi, %rsi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc2,0xf7,0xc6]
+; EGPR-NEXT: retq # encoding: [0xc3]
%shamt = and i64 %t, 63
%res = ashr i64 %val, %shamt
ret i64 %res
>From 6eb87d58252b3c53a12966cac4be038e50f386be Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Tue, 9 Jan 2024 17:55:28 -0800
Subject: [PATCH 2/3] add braces to macro
---
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 8d519f181549ff..9b25d9d7ab886a 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -4087,7 +4087,7 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
SDValue Control;
unsigned ROpc, MOpc;
-#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
+#define GET_EGPR_IF_ENABLED(OPC) (Subtarget->hasEGPR() ? OPC##_EVEX : OPC)
if (!PreferBEXTR) {
assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then.");
// If we can't make use of BEXTR then we can't fuse shift+mask stages.
@@ -5488,7 +5488,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty();
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
-#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
+#define GET_EGPR_IF_ENABLED(OPC) (Subtarget->hasEGPR() ? OPC##_EVEX : OPC)
case MVT::i32:
Opc = UseMULXHi ? X86::MULX32Hrr
: UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX32rr)
>From e2feb9a7d06930456430c50a01c82fa7ddc78da4 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Wed, 10 Jan 2024 22:18:21 -0800
Subject: [PATCH 3/3] use multiclass to avoid messy
---
llvm/lib/Target/X86/X86InstrArithmetic.td | 23 ++-
llvm/lib/Target/X86/X86InstrCompiler.td | 117 +++------------
llvm/lib/Target/X86/X86InstrMisc.td | 61 +++-----
llvm/lib/Target/X86/X86InstrShiftRotate.td | 163 ++++-----------------
4 files changed, 75 insertions(+), 289 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index 289141ce6c33f5..b09c1d2b301d94 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -1338,26 +1338,23 @@ defm ANDN32 : AndN<Xi32, "_EVEX">, EVEX, Requires<[HasBMI, HasEGPR, In64BitMode]
defm ANDN64 : AndN<Xi64, "_EVEX">, EVEX, REX_W, Requires<[HasBMI, HasEGPR, In64BitMode]>;
}
-let Predicates = [HasBMI, NoEGPR], AddedComplexity = -6 in {
+multiclass Andn_patterns<string Suffix = ""> {
def : Pat<(and (not GR32:$src1), GR32:$src2),
- (ANDN32rr GR32:$src1, GR32:$src2)>;
+ (!cast<Instruction>(ANDN32rr#Suffix) GR32:$src1, GR32:$src2)>;
def : Pat<(and (not GR64:$src1), GR64:$src2),
- (ANDN64rr GR64:$src1, GR64:$src2)>;
+ (!cast<Instruction>(ANDN64rr#Suffix) GR64:$src1, GR64:$src2)>;
def : Pat<(and (not GR32:$src1), (loadi32 addr:$src2)),
- (ANDN32rm GR32:$src1, addr:$src2)>;
+ (!cast<Instruction>(ANDN32rm#Suffix) GR32:$src1, addr:$src2)>;
def : Pat<(and (not GR64:$src1), (loadi64 addr:$src2)),
- (ANDN64rm GR64:$src1, addr:$src2)>;
+ (!cast<Instruction>(ANDN64rm#Suffix) GR64:$src1, addr:$src2)>;
+}
+
+let Predicates = [HasBMI, NoEGPR], AddedComplexity = -6 in {
+ defm : Andn_patterns<>;
}
let Predicates = [HasBMI, HasEGPR], AddedComplexity = -6 in {
- def : Pat<(and (not GR32:$src1), GR32:$src2),
- (ANDN32rr_EVEX GR32:$src1, GR32:$src2)>;
- def : Pat<(and (not GR64:$src1), GR64:$src2),
- (ANDN64rr_EVEX GR64:$src1, GR64:$src2)>;
- def : Pat<(and (not GR32:$src1), (loadi32 addr:$src2)),
- (ANDN32rm_EVEX GR32:$src1, addr:$src2)>;
- def : Pat<(and (not GR64:$src1), (loadi64 addr:$src2)),
- (ANDN64rm_EVEX GR64:$src1, addr:$src2)>;
+ defm : Andn_patterns<"_EVEX">;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 3510bdeeff4c09..f9dc035c3fab61 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1864,120 +1864,37 @@ def : Pat<(fshl GR64:$src1, GR64:$src2, (shiftMask64 CL)),
def : Pat<(fshr GR64:$src2, GR64:$src1, (shiftMask64 CL)),
(SHRD64rrCL GR64:$src1, GR64:$src2)>;
-let Predicates = [HasBMI2, NoEGPR] in {
+multiclass bmi_shift_mask_patterns<SDNode op, string name, string Suffix = ""> {
let AddedComplexity = 1 in {
- def : Pat<(sra GR32:$src1, (shiftMask32 GR8:$src2)),
- (SARX32rr GR32:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(sra GR64:$src1, (shiftMask64 GR8:$src2)),
- (SARX64rr GR64:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
- def : Pat<(srl GR32:$src1, (shiftMask32 GR8:$src2)),
- (SHRX32rr GR32:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(srl GR64:$src1, (shiftMask64 GR8:$src2)),
- (SHRX64rr GR64:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
- def : Pat<(shl GR32:$src1, (shiftMask32 GR8:$src2)),
- (SHLX32rr GR32:$src1,
+ def : Pat<(op GR32:$src1, (shiftMask32 GR8:$src2)),
+ (!cast<Instruction>(name#"32rr"#Suffix) GR32:$src1,
(INSERT_SUBREG
(i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(shl GR64:$src1, (shiftMask64 GR8:$src2)),
- (SHLX64rr GR64:$src1,
+ def : Pat<(op GR64:$src1, (shiftMask64 GR8:$src2)),
+ (!cast<Instruction>(name#"64rr"#Suffix) GR64:$src1,
(INSERT_SUBREG
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
}
-
- def : Pat<(sra (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
- (SARX32rm addr:$src1,
+ def : Pat<(op (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
+ (!cast<Instruction>(name#"32rm"#Suffix) addr:$src1,
(INSERT_SUBREG
(i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(sra (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
- (SARX64rm addr:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
- def : Pat<(srl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
- (SHRX32rm addr:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(srl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
- (SHRX64rm addr:$src1,
+ def : Pat<(op (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
+ (!cast<Instruction>(name#"64rm"#Suffix) addr:$src1,
(INSERT_SUBREG
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+}
- def : Pat<(shl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
- (SHLX32rm addr:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(shl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
- (SHLX64rm addr:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+let Predicates = [HasBMI2, NoEGPR] in {
+ defm : bmi_shift_mask_patterns<sra, "SARX">;
+ defm : bmi_shift_mask_patterns<srl, "SHRX">;
+ defm : bmi_shift_mask_patterns<shl, "SHLX">;
}
let Predicates = [HasBMI2, HasEGPR] in {
- let AddedComplexity = 1 in {
- def : Pat<(sra GR32:$src1, (shiftMask32 GR8:$src2)),
- (SARX32rr_EVEX GR32:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(sra GR64:$src1, (shiftMask64 GR8:$src2)),
- (SARX64rr_EVEX GR64:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
- def : Pat<(srl GR32:$src1, (shiftMask32 GR8:$src2)),
- (SHRX32rr_EVEX GR32:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(srl GR64:$src1, (shiftMask64 GR8:$src2)),
- (SHRX64rr_EVEX GR64:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
- def : Pat<(shl GR32:$src1, (shiftMask32 GR8:$src2)),
- (SHLX32rr_EVEX GR32:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(shl GR64:$src1, (shiftMask64 GR8:$src2)),
- (SHLX64rr_EVEX GR64:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- }
-
- def : Pat<(sra (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
- (SARX32rm_EVEX addr:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(sra (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
- (SARX64rm_EVEX addr:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
- def : Pat<(srl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
- (SHRX32rm_EVEX addr:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(srl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
- (SHRX64rm_EVEX addr:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
- def : Pat<(shl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
- (SHLX32rm_EVEX addr:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(shl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
- (SHLX64rm_EVEX addr:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ defm : bmi_shift_mask_patterns<sra, "SARX", "_EVEX">;
+ defm : bmi_shift_mask_patterns<srl, "SHRX", "_EVEX">;
+ defm : bmi_shift_mask_patterns<shl, "SHLX", "_EVEX">;
}
// Use BTR/BTS/BTC for clearing/setting/toggling a bit in a variable location.
diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index a51720d738f47e..d4b67f37b0c6b4 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1241,75 +1241,48 @@ let Predicates = [HasBMI, In64BitMode], Defs = [EFLAGS] in {
defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64, "_EVEX">, EVEX;
}
-let Predicates = [HasBMI, NoEGPR] in {
+multiclass Bls_patterns<string Suffix = ""> {
// FIXME(1): patterns for the load versions are not implemented
// FIXME(2): By only matching `add_su` and `ineg_su` we may emit
// extra `mov` instructions if `src` has future uses. It may be better
// to always match if `src` has more users.
def : Pat<(and GR32:$src, (add_su GR32:$src, -1)),
- (BLSR32rr GR32:$src)>;
+ (!cast<Instruction>(BLSR32rr#Suffix) GR32:$src)>;
def : Pat<(and GR64:$src, (add_su GR64:$src, -1)),
- (BLSR64rr GR64:$src)>;
+ (!cast<Instruction>(BLSR64rr#Suffix) GR64:$src)>;
def : Pat<(xor GR32:$src, (add_su GR32:$src, -1)),
- (BLSMSK32rr GR32:$src)>;
+ (!cast<Instruction>(BLSMSK32rr#Suffix) GR32:$src)>;
def : Pat<(xor GR64:$src, (add_su GR64:$src, -1)),
- (BLSMSK64rr GR64:$src)>;
+ (!cast<Instruction>(BLSMSK64rr#Suffix) GR64:$src)>;
def : Pat<(and GR32:$src, (ineg_su GR32:$src)),
- (BLSI32rr GR32:$src)>;
+ (!cast<Instruction>(BLSI32rr#Suffix) GR32:$src)>;
def : Pat<(and GR64:$src, (ineg_su GR64:$src)),
- (BLSI64rr GR64:$src)>;
+ (!cast<Instruction>(BLSI64rr#Suffix) GR64:$src)>;
// Versions to match flag producing ops.
def : Pat<(and_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
- (BLSR32rr GR32:$src)>;
+ (!cast<Instruction>(BLSR32rr#Suffix) GR32:$src)>;
def : Pat<(and_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
- (BLSR64rr GR64:$src)>;
+ (!cast<Instruction>(BLSR64rr#Suffix) GR64:$src)>;
def : Pat<(xor_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
- (BLSMSK32rr GR32:$src)>;
+ (!cast<Instruction>(BLSMSK32rr#Suffix) GR32:$src)>;
def : Pat<(xor_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
- (BLSMSK64rr GR64:$src)>;
+ (!cast<Instruction>(BLSMSK64rr#Suffix) GR64:$src)>;
def : Pat<(and_flag_nocf GR32:$src, (ineg_su GR32:$src)),
- (BLSI32rr GR32:$src)>;
+ (!cast<Instruction>(BLSI32rr#Suffix) GR32:$src)>;
def : Pat<(and_flag_nocf GR64:$src, (ineg_su GR64:$src)),
- (BLSI64rr GR64:$src)>;
+ (!cast<Instruction>(BLSI64rr#Suffix) GR64:$src)>;
}
-let Predicates = [HasBMI, HasEGPR] in {
- def : Pat<(and GR32:$src, (add_su GR32:$src, -1)),
- (BLSR32rr_EVEX GR32:$src)>;
- def : Pat<(and GR64:$src, (add_su GR64:$src, -1)),
- (BLSR64rr_EVEX GR64:$src)>;
-
- def : Pat<(xor GR32:$src, (add_su GR32:$src, -1)),
- (BLSMSK32rr_EVEX GR32:$src)>;
- def : Pat<(xor GR64:$src, (add_su GR64:$src, -1)),
- (BLSMSK64rr_EVEX GR64:$src)>;
-
- def : Pat<(and GR32:$src, (ineg_su GR32:$src)),
- (BLSI32rr_EVEX GR32:$src)>;
- def : Pat<(and GR64:$src, (ineg_su GR64:$src)),
- (BLSI64rr_EVEX GR64:$src)>;
-
- // Versions to match flag producing ops.
- def : Pat<(and_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
- (BLSR32rr_EVEX GR32:$src)>;
- def : Pat<(and_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
- (BLSR64rr_EVEX GR64:$src)>;
+let Predicates = [HasBMI, NoEGPR] in
+ defm : Bls_patterns<>;
- def : Pat<(xor_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
- (BLSMSK32rr_EVEX GR32:$src)>;
- def : Pat<(xor_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
- (BLSMSK64rr_EVEX GR64:$src)>;
-
- def : Pat<(and_flag_nocf GR32:$src, (ineg_su GR32:$src)),
- (BLSI32rr_EVEX GR32:$src)>;
- def : Pat<(and_flag_nocf GR64:$src, (ineg_su GR64:$src)),
- (BLSI64rr_EVEX GR64:$src)>;
-}
+let Predicates = [HasBMI, HasEGPR] in
+ defm : Bls_patterns<"_EVEX">;
multiclass Bmi4VOp3<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node,
X86FoldableSchedWrite sched, string Suffix = ""> {
diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td
index e225fe6950e3da..9d58c908208f5c 100644
--- a/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -879,30 +879,7 @@ let Predicates = [HasBMI2, HasEGPR, In64BitMode] in {
defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem, "_EVEX">, T8, PD, REX_W, EVEX;
}
-let Predicates = [HasBMI2, NoEGPR] in {
- // Prefer RORX which is non-destructive and doesn't update EFLAGS.
- let AddedComplexity = 10 in {
- def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
- (RORX32ri GR32:$src, imm:$shamt)>;
- def : Pat<(rotr GR64:$src, (i8 imm:$shamt)),
- (RORX64ri GR64:$src, imm:$shamt)>;
-
- def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
- (RORX32ri GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
- def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
- (RORX64ri GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
- }
-
- def : Pat<(rotr (loadi32 addr:$src), (i8 imm:$shamt)),
- (RORX32mi addr:$src, imm:$shamt)>;
- def : Pat<(rotr (loadi64 addr:$src), (i8 imm:$shamt)),
- (RORX64mi addr:$src, imm:$shamt)>;
-
- def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)),
- (RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
- def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
- (RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
-
+multiclass bmi_shift_patterns<SDNode op, string name, string Suffix = ""> {
// Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not
// immediate shift, i.e. the following code is considered better
//
@@ -917,34 +894,15 @@ let Predicates = [HasBMI2, NoEGPR] in {
// ... %edi, ...
//
let AddedComplexity = 1 in {
- def : Pat<(sra GR32:$src1, GR8:$src2),
- (SARX32rr GR32:$src1,
+ def : Pat<(op GR32:$src1, GR8:$src2),
+ (!cast<Instruction>(name#"32rr"#Suffix) GR32:$src1,
(INSERT_SUBREG
(i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(sra GR64:$src1, GR8:$src2),
- (SARX64rr GR64:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
- def : Pat<(srl GR32:$src1, GR8:$src2),
- (SHRX32rr GR32:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(srl GR64:$src1, GR8:$src2),
- (SHRX64rr GR64:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
- def : Pat<(shl GR32:$src1, GR8:$src2),
- (SHLX32rr GR32:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(shl GR64:$src1, GR8:$src2),
- (SHLX64rr GR64:$src1,
+ def : Pat<(op GR64:$src1, GR8:$src2),
+ (!cast<Instruction>(name#"64rr"#Suffix) GR64:$src1,
(INSERT_SUBREG
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
}
-
// We prefer to use
// mov (%ecx), %esi
// shl $imm, $esi
@@ -955,112 +913,53 @@ let Predicates = [HasBMI2, NoEGPR] in {
// shlx %al, (%ecx), %esi
//
// This priority is enforced by IsProfitableToFoldLoad.
- def : Pat<(sra (loadi32 addr:$src1), GR8:$src2),
- (SARX32rm addr:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(sra (loadi64 addr:$src1), GR8:$src2),
- (SARX64rm addr:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
- def : Pat<(srl (loadi32 addr:$src1), GR8:$src2),
- (SHRX32rm addr:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(srl (loadi64 addr:$src1), GR8:$src2),
- (SHRX64rm addr:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
- def : Pat<(shl (loadi32 addr:$src1), GR8:$src2),
- (SHLX32rm addr:$src1,
+ def : Pat<(op (loadi32 addr:$src1), GR8:$src2),
+ (!cast<Instruction>(name#"32rm"#Suffix) addr:$src1,
(INSERT_SUBREG
(i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(shl (loadi64 addr:$src1), GR8:$src2),
- (SHLX64rm addr:$src1,
+ def : Pat<(op (loadi64 addr:$src1), GR8:$src2),
+ (!cast<Instruction>(name#"64rm"#Suffix) addr:$src1,
(INSERT_SUBREG
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
}
-let Predicates = [HasBMI2, HasEGPR] in {
+multiclass RORX_patterns<string Suffix = ""> {
+ // Prefer RORX which is non-destructive and doesn't update EFLAGS.
let AddedComplexity = 10 in {
def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
- (RORX32ri_EVEX GR32:$src, imm:$shamt)>;
+ (!cast<Instruction>(RORX32ri#Suffix) GR32:$src, imm:$shamt)>;
def : Pat<(rotr GR64:$src, (i8 imm:$shamt)),
- (RORX64ri_EVEX GR64:$src, imm:$shamt)>;
+ (!cast<Instruction>(RORX64ri#Suffix) GR64:$src, imm:$shamt)>;
def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
- (RORX32ri_EVEX GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
+ (!cast<Instruction>(RORX32ri#Suffix) GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
- (RORX64ri_EVEX GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
+ (!cast<Instruction>(RORX64ri#Suffix) GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
}
def : Pat<(rotr (loadi32 addr:$src), (i8 imm:$shamt)),
- (RORX32mi_EVEX addr:$src, imm:$shamt)>;
+ (!cast<Instruction>(RORX32mi#Suffix) addr:$src, imm:$shamt)>;
def : Pat<(rotr (loadi64 addr:$src), (i8 imm:$shamt)),
- (RORX64mi_EVEX addr:$src, imm:$shamt)>;
+ (!cast<Instruction>(RORX64mi#Suffix) addr:$src, imm:$shamt)>;
def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)),
- (RORX32mi_EVEX addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
+ (!cast<Instruction>(RORX32mi#Suffix) addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
- (RORX64mi_EVEX addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
-
- let AddedComplexity = 1 in {
- def : Pat<(sra GR32:$src1, GR8:$src2),
- (SARX32rr_EVEX GR32:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(sra GR64:$src1, GR8:$src2),
- (SARX64rr_EVEX GR64:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
- def : Pat<(srl GR32:$src1, GR8:$src2),
- (SHRX32rr_EVEX GR32:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(srl GR64:$src1, GR8:$src2),
- (SHRX64rr_EVEX GR64:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
- def : Pat<(shl GR32:$src1, GR8:$src2),
- (SHLX32rr_EVEX GR32:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(shl GR64:$src1, GR8:$src2),
- (SHLX64rr_EVEX GR64:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- }
-
- def : Pat<(sra (loadi32 addr:$src1), GR8:$src2),
- (SARX32rm_EVEX addr:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(sra (loadi64 addr:$src1), GR8:$src2),
- (SARX64rm_EVEX addr:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ (!cast<Instruction>(RORX64mi#Suffix) addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
+}
- def : Pat<(srl (loadi32 addr:$src1), GR8:$src2),
- (SHRX32rm_EVEX addr:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(srl (loadi64 addr:$src1), GR8:$src2),
- (SHRX64rm_EVEX addr:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+let Predicates = [HasBMI2, NoEGPR] in {
+ defm : RORX_patterns<>;
+ defm : bmi_shift_patterns<sra, "SARX">;
+ defm : bmi_shift_patterns<srl, "SHRX">;
+ defm : bmi_shift_patterns<shl, "SHLX">;
+}
- def : Pat<(shl (loadi32 addr:$src1), GR8:$src2),
- (SHLX32rm_EVEX addr:$src1,
- (INSERT_SUBREG
- (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(shl (loadi64 addr:$src1), GR8:$src2),
- (SHLX64rm_EVEX addr:$src1,
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+let Predicates = [HasBMI2, HasEGPR] in {
+ defm : RORX_patterns<"_EVEX">;
+ defm : bmi_shift_patterns<sra, "SARX", "_EVEX">;
+ defm : bmi_shift_patterns<srl, "SHRX", "_EVEX">;
+ defm : bmi_shift_patterns<shl, "SHLX", "_EVEX">;
}
def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),
More information about the llvm-commits
mailing list