[llvm] [X86] Support encoding/decoding and lowering for APX variant SHL/SHR/SAR/SHLD/SHRD (PR #78853)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 20 08:12:14 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Shengchen Kan (KanRobert)
<details>
<summary>Changes</summary>
---
Patch is 413.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78853.diff
37 Files Affected:
- (modified) llvm/lib/Target/X86/X86InstrShiftRotate.td (+362-47)
- (modified) llvm/lib/Target/X86/X86InstrUtils.td (+35-15)
- (added) llvm/test/CodeGen/X86/apx/rol.ll (+530)
- (added) llvm/test/CodeGen/X86/apx/ror.ll (+530)
- (added) llvm/test/CodeGen/X86/apx/sar.ll (+434)
- (added) llvm/test/CodeGen/X86/apx/shl.ll (+434)
- (added) llvm/test/CodeGen/X86/apx/shld.ll (+228)
- (added) llvm/test/CodeGen/X86/apx/shr.ll (+434)
- (added) llvm/test/CodeGen/X86/apx/shrd.ll (+240)
- (added) llvm/test/MC/Disassembler/X86/apx/rcl.txt (+194)
- (added) llvm/test/MC/Disassembler/X86/apx/rcr.txt (+194)
- (added) llvm/test/MC/Disassembler/X86/apx/rol.txt (+386)
- (added) llvm/test/MC/Disassembler/X86/apx/ror.txt (+386)
- (added) llvm/test/MC/Disassembler/X86/apx/sar.txt (+386)
- (added) llvm/test/MC/Disassembler/X86/apx/shl.txt (+386)
- (added) llvm/test/MC/Disassembler/X86/apx/shld.txt (+194)
- (added) llvm/test/MC/Disassembler/X86/apx/shr.txt (+386)
- (added) llvm/test/MC/Disassembler/X86/apx/shrd.txt (+194)
- (added) llvm/test/MC/X86/apx/rcl-att.s (+146)
- (added) llvm/test/MC/X86/apx/rcl-intel.s (+143)
- (added) llvm/test/MC/X86/apx/rcr-att.s (+146)
- (added) llvm/test/MC/X86/apx/rcr-intel.s (+143)
- (added) llvm/test/MC/X86/apx/rol-att.s (+287)
- (added) llvm/test/MC/X86/apx/rol-intel.s (+284)
- (added) llvm/test/MC/X86/apx/ror-att.s (+287)
- (added) llvm/test/MC/X86/apx/ror-intel.s (+284)
- (added) llvm/test/MC/X86/apx/sar-att.s (+287)
- (added) llvm/test/MC/X86/apx/sar-intel.s (+284)
- (added) llvm/test/MC/X86/apx/shl-att.s (+287)
- (added) llvm/test/MC/X86/apx/shl-intel.s (+284)
- (added) llvm/test/MC/X86/apx/shld-att.s (+149)
- (added) llvm/test/MC/X86/apx/shld-intel.s (+146)
- (added) llvm/test/MC/X86/apx/shr-att.s (+287)
- (added) llvm/test/MC/X86/apx/shr-intel.s (+284)
- (added) llvm/test/MC/X86/apx/shrd-att.s (+149)
- (added) llvm/test/MC/X86/apx/shrd-intel.s (+146)
- (modified) llvm/test/TableGen/x86-fold-tables.inc (+240)
``````````diff
diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td
index 7166e0bc39179c8..7e2893f340973aa 100644
--- a/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -20,22 +20,66 @@ multiclass ShiftRotate<string m, Format RegMRM, Format MemMRM, SDPatternOperator
let Uses = uses in {
let isConvertibleToThreeAddress = !if(!eq(m, "shl"), 1, 0) in {
- def 8ri : BinOpRI8U_R<m, RegMRM, Xi8, node>, Sched<[ri]>, DefEFLAGS;
- def 16ri : BinOpRI8U_R<m, RegMRM, Xi16, node>, Sched<[ri]>, DefEFLAGS, OpSize16;
- def 32ri : BinOpRI8U_R<m, RegMRM, Xi32, node>, Sched<[ri]>, DefEFLAGS, OpSize32;
- def 64ri : BinOpRI8U_R<m, RegMRM, Xi64, node>, Sched<[ri]>, DefEFLAGS;
+ let Predicates = [NoNDD] in {
+ def 8ri : BinOpRI8U_R<m, RegMRM, Xi8, node>, Sched<[ri]>, DefEFLAGS;
+ def 16ri : BinOpRI8U_R<m, RegMRM, Xi16, node>, Sched<[ri]>, DefEFLAGS, OpSize16;
+ def 32ri : BinOpRI8U_R<m, RegMRM, Xi32, node>, Sched<[ri]>, DefEFLAGS, OpSize32;
+ def 64ri : BinOpRI8U_R<m, RegMRM, Xi64, node>, Sched<[ri]>, DefEFLAGS;
+ }
+ let Predicates = [HasNDD, In64BitMode] in {
+ def 8ri_ND : BinOpRI8U_R<m, RegMRM, Xi8, node, 1>, Sched<[ri]>, DefEFLAGS;
+ def 16ri_ND : BinOpRI8U_R<m, RegMRM, Xi16, node, 1>, Sched<[ri]>, DefEFLAGS, PD;
+ def 32ri_ND : BinOpRI8U_R<m, RegMRM, Xi32, node, 1>, Sched<[ri]>, DefEFLAGS;
+ def 64ri_ND : BinOpRI8U_R<m, RegMRM, Xi64, node, 1>, Sched<[ri]>, DefEFLAGS;
+ }
+ let Predicates = [In64BitMode] in {
+ def 8ri_EVEX : BinOpRI8U_R<m, RegMRM, Xi8, null_frag>, Sched<[ri]>, DefEFLAGS, PL;
+ def 16ri_EVEX : BinOpRI8U_R<m, RegMRM, Xi16, null_frag>, Sched<[ri]>, DefEFLAGS, PL, PD;
+ def 32ri_EVEX : BinOpRI8U_R<m, RegMRM, Xi32, null_frag>, Sched<[ri]>, DefEFLAGS, PL;
+ def 64ri_EVEX : BinOpRI8U_R<m, RegMRM, Xi64, null_frag>, Sched<[ri]>, DefEFLAGS, PL;
+ }
}
def 8mi : BinOpMI8U_M<m, MemMRM, Xi8, node>, Sched<[mi, WriteRMW]>, DefEFLAGS;
def 16mi : BinOpMI8U_M<m, MemMRM, Xi16, node>, Sched<[mi, WriteRMW]>, DefEFLAGS, OpSize16;
def 32mi : BinOpMI8U_M<m, MemMRM, Xi32, node>, Sched<[mi, WriteRMW]>, DefEFLAGS, OpSize32;
def 64mi : BinOpMI8U_M<m, MemMRM, Xi64, node>, Sched<[mi, WriteRMW]>, DefEFLAGS, Requires<[In64BitMode]>;
+ let Predicates = [HasNDD, In64BitMode] in {
+ def 8mi_ND : BinOpMI8U_R<m, MemMRM, Xi8, node>, Sched<[mi, ri]>, DefEFLAGS;
+ def 16mi_ND : BinOpMI8U_R<m, MemMRM, Xi16, node>, Sched<[mi, ri]>, DefEFLAGS, PD;
+ def 32mi_ND : BinOpMI8U_R<m, MemMRM, Xi32, node>, Sched<[mi, ri]>, DefEFLAGS;
+ def 64mi_ND : BinOpMI8U_R<m, MemMRM, Xi64, node>, Sched<[mi, ri]>, DefEFLAGS;
+ }
+ let Predicates = [In64BitMode] in {
+ def 8mi_EVEX : BinOpMI8U_M<m, MemMRM, Xi8, null_frag>, Sched<[mi, WriteRMW]>, DefEFLAGS, PL;
+ def 16mi_EVEX : BinOpMI8U_M<m, MemMRM, Xi16, null_frag>, Sched<[mi, WriteRMW]>, DefEFLAGS, PL, PD;
+ def 32mi_EVEX : BinOpMI8U_M<m, MemMRM, Xi32, null_frag>, Sched<[mi, WriteRMW]>, DefEFLAGS, PL;
+ def 64mi_EVEX : BinOpMI8U_M<m, MemMRM, Xi64, null_frag>, Sched<[mi, WriteRMW]>, DefEFLAGS, PL;
+ }
let SchedRW = [ri] in {
def 8r1 : UnaryOpR_RF<0xD1, RegMRM, m, Xi8, null_frag>;
def 16r1 : UnaryOpR_RF<0xD1, RegMRM, m, Xi16, null_frag>, OpSize16;
def 32r1 : UnaryOpR_RF<0xD1, RegMRM, m, Xi32, null_frag>, OpSize32;
def 64r1 : UnaryOpR_RF<0xD1, RegMRM, m, Xi64, null_frag>;
+
+ // FIXME: Assembler can't tell whether it's 8r1_ND or 8rCL when the source register is cl, e.g.
+ //
+ // shlb %cl, %al
+ //
+ // GNU binutils distinguish them by adding an explicit $1 to asm string of 8r1_ND. But we haven't support
+ // constant immediate in ams string for X86 in TD. So we add DisassembleOnly for 8r1_ND for the time being.
+ let Predicates = [In64BitMode] in {
+ def 8r1_ND : UnaryOpR_RF<0xD1, RegMRM, m, Xi8, null_frag, 1>, DisassembleOnly;
+ def 16r1_ND : UnaryOpR_RF<0xD1, RegMRM, m, Xi16, null_frag, 1>, PD;
+ def 32r1_ND : UnaryOpR_RF<0xD1, RegMRM, m, Xi32, null_frag, 1>;
+ def 64r1_ND : UnaryOpR_RF<0xD1, RegMRM, m, Xi64, null_frag, 1>;
+
+ def 8r1_EVEX : UnaryOpR_RF<0xD1, RegMRM, m, Xi8, null_frag>, PL;
+ def 16r1_EVEX : UnaryOpR_RF<0xD1, RegMRM, m, Xi16, null_frag>, PL, PD;
+ def 32r1_EVEX : UnaryOpR_RF<0xD1, RegMRM, m, Xi32, null_frag>, PL;
+ def 64r1_EVEX : UnaryOpR_RF<0xD1, RegMRM, m, Xi64, null_frag>, PL;
+ }
}
let SchedRW = [mi, WriteRMW] in {
@@ -43,22 +87,142 @@ multiclass ShiftRotate<string m, Format RegMRM, Format MemMRM, SDPatternOperator
def 16m1 : UnaryOpM_MF<0xD1, MemMRM, m, Xi16, null_frag>, OpSize16;
def 32m1 : UnaryOpM_MF<0xD1, MemMRM, m, Xi32, null_frag>, OpSize32;
def 64m1 : UnaryOpM_MF<0xD1, MemMRM, m, Xi64, null_frag>, Requires<[In64BitMode]>;
+
+ let Predicates = [In64BitMode] in {
+ def 8m1_EVEX : UnaryOpM_MF<0xD1, MemMRM, m, Xi8, null_frag>, PL;
+ def 16m1_EVEX : UnaryOpM_MF<0xD1, MemMRM, m, Xi16, null_frag>, PL, PD;
+ def 32m1_EVEX : UnaryOpM_MF<0xD1, MemMRM, m, Xi32, null_frag>, PL;
+ def 64m1_EVEX : UnaryOpM_MF<0xD1, MemMRM, m, Xi64, null_frag>, PL;
+ }
+ }
+ let SchedRW = [mi, ri], Predicates = [In64BitMode] in {
+ def 8m1_ND : UnaryOpM_RF<0xD1, MemMRM, m, Xi8, null_frag>;
+ def 16m1_ND : UnaryOpM_RF<0xD1, MemMRM, m, Xi16, null_frag>, PD;
+ def 32m1_ND : UnaryOpM_RF<0xD1, MemMRM, m, Xi32, null_frag>;
+ def 64m1_ND : UnaryOpM_RF<0xD1, MemMRM, m, Xi64, null_frag>;
}
}
let Uses = !listconcat([CL], uses) in {
- def 8rCL : BinOpRC_R<m, RegMRM, Xi8, node>, Sched<[rCL]>, DefEFLAGS;
- def 16rCL : BinOpRC_R<m, RegMRM, Xi16, node>, Sched<[rCL]>, DefEFLAGS, OpSize16;
- def 32rCL : BinOpRC_R<m, RegMRM, Xi32, node>, Sched<[rCL]>, DefEFLAGS, OpSize32;
- def 64rCL : BinOpRC_R<m, RegMRM, Xi64, node>, Sched<[rCL]>, DefEFLAGS;
+ let Predicates = [NoNDD] in {
+ def 8rCL : BinOpRC_R<m, RegMRM, Xi8, node>, Sched<[rCL]>, DefEFLAGS;
+ def 16rCL : BinOpRC_R<m, RegMRM, Xi16, node>, Sched<[rCL]>, DefEFLAGS, OpSize16;
+ def 32rCL : BinOpRC_R<m, RegMRM, Xi32, node>, Sched<[rCL]>, DefEFLAGS, OpSize32;
+ def 64rCL : BinOpRC_R<m, RegMRM, Xi64, node>, Sched<[rCL]>, DefEFLAGS;
+ }
+ let Predicates = [HasNDD, In64BitMode] in {
+ def 8rCL_ND : BinOpRC_R<m, RegMRM, Xi8, node, 1>, Sched<[rCL]>, DefEFLAGS;
+ def 16rCL_ND : BinOpRC_R<m, RegMRM, Xi16, node, 1>, Sched<[rCL]>, DefEFLAGS, PD;
+ def 32rCL_ND : BinOpRC_R<m, RegMRM, Xi32, node, 1>, Sched<[rCL]>, DefEFLAGS;
+ def 64rCL_ND : BinOpRC_R<m, RegMRM, Xi64, node, 1>, Sched<[rCL]>, DefEFLAGS;
+ }
+ let Predicates = [In64BitMode] in {
+ def 8rCL_EVEX : BinOpRC_R<m, RegMRM, Xi8, null_frag>, Sched<[rCL]>, DefEFLAGS, PL;
+ def 16rCL_EVEX : BinOpRC_R<m, RegMRM, Xi16, null_frag>, Sched<[rCL]>, DefEFLAGS, PL, PD;
+ def 32rCL_EVEX : BinOpRC_R<m, RegMRM, Xi32, null_frag>, Sched<[rCL]>, DefEFLAGS, PL;
+ def 64rCL_EVEX : BinOpRC_R<m, RegMRM, Xi64, null_frag>, Sched<[rCL]>, DefEFLAGS, PL;
+ }
def 8mCL : BinOpMC_M<m, MemMRM, Xi8, node>, Sched<[mCL, WriteRMW]>, DefEFLAGS;
def 16mCL : BinOpMC_M<m, MemMRM, Xi16, node>, Sched<[mCL, WriteRMW]>, DefEFLAGS, OpSize16;
def 32mCL : BinOpMC_M<m, MemMRM, Xi32, node>, Sched<[mCL, WriteRMW]>, DefEFLAGS, OpSize32;
def 64mCL : BinOpMC_M<m, MemMRM, Xi64, node>, Sched<[mCL, WriteRMW]>, DefEFLAGS, Requires<[In64BitMode]>;
+
+ let Predicates = [HasNDD, In64BitMode] in {
+ def 8mCL_ND : BinOpMC_R<m, MemMRM, Xi8, node>, Sched<[mCL, rCL]>, DefEFLAGS;
+ def 16mCL_ND : BinOpMC_R<m, MemMRM, Xi16, node>, Sched<[mCL, rCL]>, DefEFLAGS, PD;
+ def 32mCL_ND : BinOpMC_R<m, MemMRM, Xi32, node>, Sched<[mCL, rCL]>, DefEFLAGS;
+ def 64mCL_ND : BinOpMC_R<m, MemMRM, Xi64, node>, Sched<[mCL, rCL]>, DefEFLAGS;
+ }
+
+ let Predicates = [In64BitMode] in {
+ def 8mCL_EVEX : BinOpMC_M<m, MemMRM, Xi8, null_frag>, Sched<[mCL, WriteRMW]>, DefEFLAGS, PL;
+ def 16mCL_EVEX : BinOpMC_M<m, MemMRM, Xi16, null_frag>, Sched<[mCL, WriteRMW]>, DefEFLAGS, PL, PD;
+ def 32mCL_EVEX : BinOpMC_M<m, MemMRM, Xi32, null_frag>, Sched<[mCL, WriteRMW]>, DefEFLAGS, PL;
+ def 64mCL_EVEX : BinOpMC_M<m, MemMRM, Xi64, null_frag>, Sched<[mCL, WriteRMW]>, DefEFLAGS, PL;
+ }
}
}
+multiclass ShiftRotate_NF<string m, Format RegMRM, Format MemMRM, SchedReadWrite rCL,
+ SchedReadWrite ri, SchedReadWrite mCL, SchedReadWrite mi> {
+ let Predicates = [In64BitMode] in {
+ let isConvertibleToThreeAddress = !if(!eq(m, "shl"), 1, 0) in {
+ def 8ri_NF : BinOpRI8U_R<m, RegMRM, Xi8, null_frag>, Sched<[ri]>, NF;
+ def 16ri_NF : BinOpRI8U_R<m, RegMRM, Xi16, null_frag>, Sched<[ri]>, NF, PD;
+ def 32ri_NF : BinOpRI8U_R<m, RegMRM, Xi32, null_frag>, Sched<[ri]>, NF;
+ def 64ri_NF : BinOpRI8U_R<m, RegMRM, Xi64, null_frag>, Sched<[ri]>, NF;
+
+ def 8ri_NF_ND : BinOpRI8U_R<m, RegMRM, Xi8, null_frag, 1>, Sched<[ri]>, EVEX_NF;
+ def 16ri_NF_ND : BinOpRI8U_R<m, RegMRM, Xi16, null_frag, 1>, Sched<[ri]>, EVEX_NF, PD;
+ def 32ri_NF_ND : BinOpRI8U_R<m, RegMRM, Xi32, null_frag, 1>, Sched<[ri]>, EVEX_NF;
+ def 64ri_NF_ND : BinOpRI8U_R<m, RegMRM, Xi64, null_frag, 1>, Sched<[ri]>, EVEX_NF;
+ }
+
+ def 8mi_NF : BinOpMI8U_M<m, MemMRM, Xi8, null_frag>, Sched<[mi, WriteRMW]>, NF;
+ def 16mi_NF : BinOpMI8U_M<m, MemMRM, Xi16, null_frag>, Sched<[mi, WriteRMW]>, NF, PD;
+ def 32mi_NF : BinOpMI8U_M<m, MemMRM, Xi32, null_frag>, Sched<[mi, WriteRMW]>, NF;
+ def 64mi_NF : BinOpMI8U_M<m, MemMRM, Xi64, null_frag>, Sched<[mi, WriteRMW]>, NF;
+
+ def 8mi_NF_ND : BinOpMI8U_R<m, MemMRM, Xi8, null_frag>, Sched<[mi, ri]>, EVEX_NF;
+ def 16mi_NF_ND : BinOpMI8U_R<m, MemMRM, Xi16, null_frag>, Sched<[mi, ri]>, EVEX_NF, PD;
+ def 32mi_NF_ND : BinOpMI8U_R<m, MemMRM, Xi32, null_frag>, Sched<[mi, ri]>, EVEX_NF;
+ def 64mi_NF_ND : BinOpMI8U_R<m, MemMRM, Xi64, null_frag>, Sched<[mi, ri]>, EVEX_NF;
+
+ let SchedRW = [ri] in {
+ // FIXME: Assembler can't tell whether it's 8r1_NF_ND or 8rCL_NF when the source register is cl, e.g.
+ //
+ // {nf} shlb %cl, %al
+ //
+ // GNU binutils distinguish them by adding an explicit $1 to asm string of 8r1_NF_ND. But we haven't support
+ // constant immediate in ams string for X86 in TD. So we add DisassembleOnly for 8r1_NF_ND for the time being.
+ def 8r1_NF : UnaryOpR_R<0xD1, RegMRM, m, Xi8, null_frag>, NF;
+ def 16r1_NF : UnaryOpR_R<0xD1, RegMRM, m, Xi16, null_frag>, NF, PD;
+ def 32r1_NF : UnaryOpR_R<0xD1, RegMRM, m, Xi32, null_frag>, NF;
+ def 64r1_NF : UnaryOpR_R<0xD1, RegMRM, m, Xi64, null_frag>, NF;
+
+ def 8r1_NF_ND : UnaryOpR_R<0xD1, RegMRM, m, Xi8, null_frag, 1>, EVEX_NF, DisassembleOnly;
+ def 16r1_NF_ND : UnaryOpR_R<0xD1, RegMRM, m, Xi16, null_frag, 1>, EVEX_NF, PD;
+ def 32r1_NF_ND : UnaryOpR_R<0xD1, RegMRM, m, Xi32, null_frag, 1>, EVEX_NF;
+ def 64r1_NF_ND : UnaryOpR_R<0xD1, RegMRM, m, Xi64, null_frag, 1>, EVEX_NF;
+ }
+
+ let SchedRW = [mi, WriteRMW] in {
+ def 8m1_NF : UnaryOpM_M<0xD1, MemMRM, m, Xi8, null_frag>, NF;
+ def 16m1_NF : UnaryOpM_M<0xD1, MemMRM, m, Xi16, null_frag>, NF, PD;
+ def 32m1_NF : UnaryOpM_M<0xD1, MemMRM, m, Xi32, null_frag>, NF;
+ def 64m1_NF : UnaryOpM_M<0xD1, MemMRM, m, Xi64, null_frag>, NF;
+ }
+ let SchedRW = [mi, ri] in {
+ def 8m1_NF_ND : UnaryOpM_R<0xD1, MemMRM, m, Xi8, null_frag>, EVEX_NF;
+ def 16m1_NF_ND : UnaryOpM_R<0xD1, MemMRM, m, Xi16, null_frag>, EVEX_NF, PD;
+ def 32m1_NF_ND : UnaryOpM_R<0xD1, MemMRM, m, Xi32, null_frag>, EVEX_NF;
+ def 64m1_NF_ND : UnaryOpM_R<0xD1, MemMRM, m, Xi64, null_frag>, EVEX_NF;
+ }
+
+ let Uses = [CL] in {
+ def 8rCL_NF : BinOpRC_R<m, RegMRM, Xi8, null_frag>, Sched<[rCL]>, NF;
+ def 16rCL_NF : BinOpRC_R<m, RegMRM, Xi16, null_frag>, Sched<[rCL]>, NF, PD;
+ def 32rCL_NF : BinOpRC_R<m, RegMRM, Xi32, null_frag>, Sched<[rCL]>, NF;
+ def 64rCL_NF : BinOpRC_R<m, RegMRM, Xi64, null_frag>, Sched<[rCL]>, NF;
+
+ def 8rCL_NF_ND : BinOpRC_R<m, RegMRM, Xi8, null_frag, 1>, Sched<[rCL]>, EVEX_NF;
+ def 16rCL_NF_ND : BinOpRC_R<m, RegMRM, Xi16, null_frag, 1>, Sched<[rCL]>, EVEX_NF, PD;
+ def 32rCL_NF_ND : BinOpRC_R<m, RegMRM, Xi32, null_frag, 1>, Sched<[rCL]>, EVEX_NF;
+ def 64rCL_NF_ND : BinOpRC_R<m, RegMRM, Xi64, null_frag, 1>, Sched<[rCL]>, EVEX_NF;
+
+ def 8mCL_NF : BinOpMC_M<m, MemMRM, Xi8, null_frag>, Sched<[mCL, WriteRMW]>, NF;
+ def 16mCL_NF : BinOpMC_M<m, MemMRM, Xi16, null_frag>, Sched<[mCL, WriteRMW]>, NF, PD;
+ def 32mCL_NF : BinOpMC_M<m, MemMRM, Xi32, null_frag>, Sched<[mCL, WriteRMW]>, NF;
+ def 64mCL_NF : BinOpMC_M<m, MemMRM, Xi64, null_frag>, Sched<[mCL, WriteRMW]>, NF;
+
+ def 8mCL_NF_ND : BinOpMC_R<m, MemMRM, Xi8, null_frag>, Sched<[mCL, rCL]>, EVEX_NF;
+ def 16mCL_NF_ND : BinOpMC_R<m, MemMRM, Xi16, null_frag>, Sched<[mCL, rCL]>, EVEX_NF, PD;
+ def 32mCL_NF_ND : BinOpMC_R<m, MemMRM, Xi32, null_frag>, Sched<[mCL, rCL]>, EVEX_NF;
+ def 64mCL_NF_ND : BinOpMC_R<m, MemMRM, Xi64, null_frag>, Sched<[mCL, rCL]>, EVEX_NF;
+ }
+ }
+}
defm SHL: ShiftRotate<"shl", MRM4r, MRM4m, shl, WriteShiftCL, WriteShift, WriteShiftCLLd, WriteShiftLd>;
defm SHR: ShiftRotate<"shr", MRM5r, MRM5m, srl, WriteShiftCL, WriteShift, WriteShiftCLLd, WriteShiftLd>;
defm SAR: ShiftRotate<"sar", MRM7r, MRM7m, sra, WriteShiftCL, WriteShift, WriteShiftCLLd, WriteShiftLd>;
@@ -68,15 +232,34 @@ defm ROR: ShiftRotate<"ror", MRM1r, MRM1m, rotr, WriteRotateCL, WriteRotate, Wri
defm RCL: ShiftRotate<"rcl", MRM2r, MRM2m, null_frag, WriteRotateCL, WriteRotate, WriteRotateCLLd, WriteRotateLd, [EFLAGS]>;
defm RCR: ShiftRotate<"rcr", MRM3r, MRM3m, null_frag, WriteRotateCL, WriteRotate, WriteRotateCLLd, WriteRotateLd, [EFLAGS]>;
+defm SHL: ShiftRotate_NF<"shl", MRM4r, MRM4m, WriteShiftCL, WriteShift, WriteShiftCLLd, WriteShiftLd>;
+defm SHR: ShiftRotate_NF<"shr", MRM5r, MRM5m, WriteShiftCL, WriteShift, WriteShiftCLLd, WriteShiftLd>;
+defm SAR: ShiftRotate_NF<"sar", MRM7r, MRM7m, WriteShiftCL, WriteShift, WriteShiftCLLd, WriteShiftLd>;
+
+defm ROL: ShiftRotate_NF<"rol", MRM0r, MRM0m, WriteRotateCL, WriteRotate, WriteRotateCLLd, WriteRotateLd>;
+defm ROR: ShiftRotate_NF<"ror", MRM1r, MRM1m, WriteRotateCL, WriteRotate, WriteRotateCLLd, WriteRotateLd>;
+
// Use the opposite rotate if allows us to use the rotate by 1 instruction.
-def : Pat<(rotl GR8:$src1, (i8 7)), (ROR8r1 GR8:$src1)>;
-def : Pat<(rotl GR16:$src1, (i8 15)), (ROR16r1 GR16:$src1)>;
-def : Pat<(rotl GR32:$src1, (i8 31)), (ROR32r1 GR32:$src1)>;
-def : Pat<(rotl GR64:$src1, (i8 63)), (ROR64r1 GR64:$src1)>;
-def : Pat<(rotr GR8:$src1, (i8 7)), (ROL8r1 GR8:$src1)>;
-def : Pat<(rotr GR16:$src1, (i8 15)), (ROL16r1 GR16:$src1)>;
-def : Pat<(rotr GR32:$src1, (i8 31)), (ROL32r1 GR32:$src1)>;
-def : Pat<(rotr GR64:$src1, (i8 63)), (ROL64r1 GR64:$src1)>;
+let Predicates = [NoNDD] in {
+ def : Pat<(rotl GR8:$src1, (i8 7)), (ROR8r1 GR8:$src1)>;
+ def : Pat<(rotl GR16:$src1, (i8 15)), (ROR16r1 GR16:$src1)>;
+ def : Pat<(rotl GR32:$src1, (i8 31)), (ROR32r1 GR32:$src1)>;
+ def : Pat<(rotl GR64:$src1, (i8 63)), (ROR64r1 GR64:$src1)>;
+ def : Pat<(rotr GR8:$src1, (i8 7)), (ROL8r1 GR8:$src1)>;
+ def : Pat<(rotr GR16:$src1, (i8 15)), (ROL16r1 GR16:$src1)>;
+ def : Pat<(rotr GR32:$src1, (i8 31)), (ROL32r1 GR32:$src1)>;
+ def : Pat<(rotr GR64:$src1, (i8 63)), (ROL64r1 GR64:$src1)>;
+}
+let Predicates = [HasNDD] in {
+ def : Pat<(rotl GR8:$src1, (i8 7)), (ROR8r1_ND GR8:$src1)>;
+ def : Pat<(rotl GR16:$src1, (i8 15)), (ROR16r1_ND GR16:$src1)>;
+ def : Pat<(rotl GR32:$src1, (i8 31)), (ROR32r1_ND GR32:$src1)>;
+ def : Pat<(rotl GR64:$src1, (i8 63)), (ROR64r1_ND GR64:$src1)>;
+ def : Pat<(rotr GR8:$src1, (i8 7)), (ROL8r1_ND GR8:$src1)>;
+ def : Pat<(rotr GR16:$src1, (i8 15)), (ROL16r1_ND GR16:$src1)>;
+ def : Pat<(rotr GR32:$src1, (i8 31)), (ROL32r1_ND GR32:$src1)>;
+ def : Pat<(rotr GR64:$src1, (i8 63)), (ROL64r1_ND GR64:$src1)>;
+}
def : Pat<(store (rotl (loadi8 addr:$dst), (i8 7)), addr:$dst),
(ROR8m1 addr:$dst)>;
@@ -96,34 +279,74 @@ def : Pat<(store (rotr (loadi32 addr:$dst), (i8 31)), addr:$dst),
def : Pat<(store (rotr (loadi64 addr:$dst), (i8 63)), addr:$dst),
(ROL64m1 addr:$dst)>, Requires<[In64BitMode]>;
+let Predicates = [HasNDD] in {
+def : Pat<(rotl (loadi8 addr:$src), (i8 7)),
+ (ROR8m1_ND addr:$src)>;
+def : Pat<(rotl (loadi16 addr:$src), (i8 15)),
+ (ROR16m1_ND addr:$src)>;
+def : Pat<(rotl (loadi32 addr:$src), (i8 31)),
+ (ROR32m1_ND addr:$src)>;
+def : Pat<(rotl (loadi64 addr:$src), (i8 63)),
+ (ROR64m1_ND addr:$src)>;
+
+def : Pat<(rotr (loadi8 addr:$src), (i8 7)),
+ (ROL8m1_ND addr:$src)>;
+def : Pat<(rotr (loadi16 addr:$src), (i8 15)),
+ (ROL16m1_ND addr:$src)>;
+def : Pat<(rotr (loadi32 addr:$src), (i8 31)),
+ (ROL32m1_ND addr:$src)>;
+def : Pat<(rotr (loadi64 addr:$src), (i8 63)),
+ (ROL64m1_ND addr:$src)>;
+}
// Patterns for rotate with relocImm for the immediate field.
-def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),
- (ROL8ri GR8:$src1, relocImm:$src2)>;
-def : Pat<(rotl GR16:$src1, (i8 relocImm:$src2)),
- (ROL16ri GR16:$src1, relocImm:$src2)>;
-def : Pat<(rotl GR32:$src1, (i8 relocImm:$src2)),
- (ROL32ri GR32:$src1, relocImm:$src2)>;
-def : Pat<(rotl GR64:$src1, (i8 relocImm:$src2)),
- (ROL64ri GR64:$src1, relocImm:$src2)>;
-
-def : Pat<(rotr GR8:$src1, (i8 relocImm:$src2)),
- (ROR8ri GR8:$src1, relocImm:$src2)>;
-def : Pat<(rotr GR16:$src1, (i8 relocImm:$src2)),
- (ROR16ri GR16:$src1, relocImm:$src2)>;
-def : Pat<(rotr GR32:$src1, (i8 relocImm:$src2)),
- (ROR32ri GR32:$src1, relocImm:$src2)>;
-def : Pat<(rotr GR64:$src1, (i8 relocImm:$src2)),
- (ROR64ri GR64:$src1, relocImm:$src2)>;
+let Predicates = [NoNDD] in {
+ def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),
+ (ROL8ri GR8:$src1, relocImm:$src2)>;
+ def : Pat<(rotl GR16:$src1, (i8 relocImm:$src2)),
+ (ROL16ri GR16:$src1, relocImm:$src2)>;
+ def : Pat<(rotl GR32:$src1, (i8 relocImm:$src2)),
+ (ROL32ri GR32:$src1, relocImm:$src2)>;
+ def : Pat<(rotl GR64:$src1, (i8 relocImm:$src2)),
+ (ROL64ri GR64:$src1, relocImm:$src2)>;
+
+ def : Pat<(rotr GR8:$src1, (i8 relocImm:$src2)),
+ (ROR8ri GR8:$src1, relocImm:$src2)>;
+ def : Pat<(rotr GR16:$src1, (i8 relocImm:$src2)),
+ (ROR16ri GR16:$src1, relocImm:$src2)>;
+ def : Pat<(rotr GR32:$src1, (i8 relocImm:$src2)),
+ (ROR32ri GR32:$src1, relocImm:$src2)>;
+ def : Pat<(rotr GR64:$src1, (i8 relocImm:$src2)),
+ (ROR64ri GR64:$src1, relocImm:$src2)>;
+}
+let Predicates = [HasNDD] in {
+ def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),
+ (ROL8ri_ND GR8:$src1, relocImm:$src2)>;
+ def : Pat<(rotl GR16:$src1, (i8 relocImm:$src2)),
+ (ROL16ri_ND GR16:$src1, relocImm:$src2)>;
+ def : Pat<(rotl GR32:$src1, (i8 relocImm:$src2)),
+ (ROL32ri_ND GR32:$src1, relocImm:$src2)>;
+ def : Pat<(rotl GR64:$src1, (i8 relocImm:$src2)),
+ (ROL64ri_ND GR64:$src1, relocImm:$src2)>;
+
+ def : Pat<(rotr GR8:$src1, (i8 relocImm:$src2)),
+ (ROR8ri_ND GR8:$src1, relocImm:$src2)>;
+ def : Pat<(rotr GR16:$src1, (i8 relocImm:$src2)),
+ (ROR16ri_ND GR16:$src1, relocImm:$src2)>;
+ def : Pat<(rotr GR32:$src1, (i8 relocImm:$src2)),
+ (ROR32ri_ND GR32:$src1, relocImm:$src2)>;
+ def : Pat<(rotr GR64:$src1, (i8 relocImm:$src2)),
+ (ROR64ri_ND GR64:$src1, relocImm:$src2)>;
+}
//===----------------------------------------------------------------------===//
// Double precision shift instructions (generalizations of rotate)
//===----------------------------------------------------------------------===//
-class ShlrdOpRRI8U_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+class ShlrdOpRRI8U_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0>
: ITy<o, MRMDestReg, t, (outs t.RegClass:$dst),
- (ins t.RegClass:$sr...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/78853
More information about the llvm-commits
mailing list