[llvm] [X86] Support APX promoted RAO-INT and MOVBE instructions (PR #77431)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 25 02:16:54 PST 2024


https://github.com/XinWang10 updated https://github.com/llvm/llvm-project/pull/77431

>From 2278af0f954449e1aa0d5fc1c4616f07759abb84 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Tue, 9 Jan 2024 01:19:36 -0800
Subject: [PATCH 01/13] [X86] Surpport APX promoted RAO-INT and MOVBE
 instructions

---
 llvm/lib/Target/X86/X86InstrMisc.td           |  54 ++++++-
 llvm/lib/Target/X86/X86InstrRAOINT.td         |  27 ++--
 llvm/test/CodeGen/X86/movbe.ll                | 146 +++++++++++++++---
 llvm/test/CodeGen/X86/raoint-intrinsics-32.ll |  21 +++
 llvm/test/CodeGen/X86/raoint-intrinsics-64.ll |  21 +++
 llvm/test/MC/Disassembler/X86/apx/movbe.txt   |  86 +++++++++++
 llvm/test/MC/Disassembler/X86/apx/rao-int.txt |  74 +++++++++
 llvm/test/MC/X86/apx/movbe-att.s              |  76 +++++++++
 llvm/test/MC/X86/apx/movbe-intel.s            |  73 +++++++++
 llvm/test/MC/X86/apx/rao-int-att.s            |  77 +++++++++
 llvm/test/MC/X86/apx/rao-int-intel.s          |  73 +++++++++
 llvm/test/TableGen/x86-fold-tables.inc        |   3 +
 12 files changed, 697 insertions(+), 34 deletions(-)
 create mode 100644 llvm/test/MC/Disassembler/X86/apx/movbe.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/apx/rao-int.txt
 create mode 100644 llvm/test/MC/X86/apx/movbe-att.s
 create mode 100644 llvm/test/MC/X86/apx/movbe-intel.s
 create mode 100644 llvm/test/MC/X86/apx/rao-int-att.s
 create mode 100644 llvm/test/MC/X86/apx/rao-int-intel.s

diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index 66fac2369d0a918..6b1c7ce3d72b0f6 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1090,7 +1090,7 @@ def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
 //===----------------------------------------------------------------------===//
 // MOVBE Instructions
 //
-let Predicates = [HasMOVBE] in {
+let Predicates = [HasMOVBE, NoEGPR] in {
   let SchedRW = [WriteALULd] in {
   def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                     "movbe{w}\t{$src, $dst|$dst, $src}",
@@ -1121,6 +1121,58 @@ let Predicates = [HasMOVBE] in {
   }
 }
 
+let Predicates = [HasMOVBE, HasEGPR, In64BitMode] in {
+  let SchedRW = [WriteALULd] in {
+  def MOVBE16rm_EVEX : I<0x60, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                         "movbe{w}\t{$src, $dst|$dst, $src}",
+                         [(set GR16:$dst, (bswap (loadi16 addr:$src)))]>,
+                       EVEX, NoCD8, T_MAP4, PD;
+  def MOVBE32rm_EVEX : I<0x60, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                         "movbe{l}\t{$src, $dst|$dst, $src}",
+                         [(set GR32:$dst, (bswap (loadi32 addr:$src)))]>,
+                       EVEX, NoCD8, T_MAP4;
+  def MOVBE64rm_EVEX : RI<0x60, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                         "movbe{q}\t{$src, $dst|$dst, $src}",
+                         [(set GR64:$dst, (bswap (loadi64 addr:$src)))]>,
+                       EVEX, NoCD8, T_MAP4;
+  }
+  let SchedRW = [WriteStore] in {
+  def MOVBE16mr_EVEX : I<0x61, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+                         "movbe{w}\t{$src, $dst|$dst, $src}",
+                         [(store (bswap GR16:$src), addr:$dst)]>,
+                       EVEX, NoCD8, T_MAP4, PD;
+  def MOVBE32mr_EVEX : I<0x61, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                         "movbe{l}\t{$src, $dst|$dst, $src}",
+                         [(store (bswap GR32:$src), addr:$dst)]>,
+                       EVEX, NoCD8, T_MAP4;
+  def MOVBE64mr_EVEX : RI<0x61, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                         "movbe{q}\t{$src, $dst|$dst, $src}",
+                         [(store (bswap GR64:$src), addr:$dst)]>,
+                       EVEX, NoCD8, T_MAP4;
+  }
+  let SchedRW = [WriteALU] in {
+  def MOVBE16rr_EVEX : I<0x61, MRMDestReg, (outs), (ins GR16:$dst, GR16:$src),
+                         "movbe{w}\t{$src, $dst|$dst, $src}", []>,
+                       EVEX, NoCD8, T_MAP4, PD;
+  def MOVBE32rr_EVEX : I<0x61, MRMDestReg, (outs), (ins GR32:$dst, GR32:$src),
+                         "movbe{l}\t{$src, $dst|$dst, $src}", []>,
+                       EVEX, NoCD8, T_MAP4;
+  def MOVBE64rr_EVEX : RI<0x61, MRMDestReg, (outs), (ins GR64:$dst, GR64:$src),
+                          "movbe{q}\t{$src, $dst|$dst, $src}", []>,
+                       EVEX, NoCD8, T_MAP4;
+
+  def MOVBE16rr_EVEX_REV : I<0x60, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                             "movbe{w}\t{$src, $dst|$dst, $src}", []>,
+                           EVEX, NoCD8, T_MAP4, PD;
+  def MOVBE32rr_EVEX_REV : I<0x60, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                             "movbe{l}\t{$src, $dst|$dst, $src}", []>,
+                           EVEX, NoCD8, T_MAP4;
+  def MOVBE64rr_EVEX_REV : RI<0x60, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                              "movbe{q}\t{$src, $dst|$dst, $src}", []>,
+                           EVEX, NoCD8, T_MAP4;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // RDRAND Instruction
 //
diff --git a/llvm/lib/Target/X86/X86InstrRAOINT.td b/llvm/lib/Target/X86/X86InstrRAOINT.td
index bc17b00f3573a98..c2f202bd616a51d 100644
--- a/llvm/lib/Target/X86/X86InstrRAOINT.td
+++ b/llvm/lib/Target/X86/X86InstrRAOINT.td
@@ -25,21 +25,30 @@ def X86rao_xor  : SDNode<"X86ISD::AXOR", SDTRAOBinaryArith,
 def X86rao_and  : SDNode<"X86ISD::AAND", SDTRAOBinaryArith,
                          [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 
-multiclass RAOINT_BASE<string OpcodeStr> {
+multiclass RAOINT_BASE<string OpcodeStr, string Suffix = ""> {
   let Predicates = [HasRAOINT] in
-    def 32mr : I<0xfc, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
-                 !strconcat("a", OpcodeStr, "{l}\t{$src, $dst|$dst, $src}"),
-                 [(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR32:$src)]>,
-               Sched<[WriteALURMW]>;
+    def 32mr#Suffix : I<0xfc, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                        !strconcat("a", OpcodeStr, "{l}\t{$src, $dst|$dst, $src}"),
+                        [(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR32:$src)]>,
+                      Sched<[WriteALURMW]>;
 
   let Predicates = [HasRAOINT, In64BitMode] in
-    def 64mr : I<0xfc, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                 !strconcat("a", OpcodeStr, "{q}\t{$src, $dst|$dst, $src}"),
-                 [(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR64:$src)]>,
-               Sched<[WriteALURMW]>, REX_W;
+    def 64mr#Suffix : I<0xfc, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                        !strconcat("a", OpcodeStr, "{q}\t{$src, $dst|$dst, $src}"),
+                        [(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR64:$src)]>,
+                      Sched<[WriteALURMW]>, REX_W;
 }
 
+let Predicates = [HasRAOINT, NoEGPR] in {
 defm AADD : RAOINT_BASE<"add">, T8;
 defm AAND : RAOINT_BASE<"and">, T8, PD;
 defm AOR  : RAOINT_BASE<"or" >, T8, XD;
 defm AXOR : RAOINT_BASE<"xor">, T8, XS;
+}
+
+let Predicates = [HasRAOINT, HasEGPR, In64BitMode] in {
+defm AADD : RAOINT_BASE<"add", "_EVEX">, EVEX, NoCD8, T_MAP4;
+defm AAND : RAOINT_BASE<"and", "_EVEX">, EVEX, NoCD8, T_MAP4, PD;
+defm AOR  : RAOINT_BASE<"or", "_EVEX">, EVEX, NoCD8, T_MAP4, XD;
+defm AXOR : RAOINT_BASE<"xor", "_EVEX">, EVEX, NoCD8, T_MAP4, XS;
+}
diff --git a/llvm/test/CodeGen/X86/movbe.ll b/llvm/test/CodeGen/X86/movbe.ll
index ef22fcaf698ca14..60d98ce05a9d866 100644
--- a/llvm/test/CodeGen/X86/movbe.ll
+++ b/llvm/test/CodeGen/X86/movbe.ll
@@ -1,66 +1,164 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc -mtriple=x86_64-linux -mcpu=atom < %s | FileCheck %s
 ; RUN: llc -mtriple=x86_64-linux -mcpu=slm < %s | FileCheck %s -check-prefix=SLM
+; RUN: llc -mtriple=x86_64-linux -mcpu=slm -mattr=+egpr --show-mc-encoding < %s | FileCheck %s -check-prefix=EGPR
 
 declare i16 @llvm.bswap.i16(i16) nounwind readnone
 declare i32 @llvm.bswap.i32(i32) nounwind readnone
 declare i64 @llvm.bswap.i64(i64) nounwind readnone
 
 define void @test1(ptr nocapture %x, i16 %y) nounwind {
+; CHECK-LABEL: test1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movbew %si, (%rdi)
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    retq
+;
+; SLM-LABEL: test1:
+; SLM:       # %bb.0:
+; SLM-NEXT:    movbew %si, (%rdi)
+; SLM-NEXT:    retq
+;
+; EGPR-LABEL: test1:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movbew %si, (%rdi) # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf1,0x37]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %bswap = call i16 @llvm.bswap.i16(i16 %y)
   store i16 %bswap, ptr %x, align 2
   ret void
-; CHECK-LABEL: test1:
-; CHECK: movbew %si, (%rdi)
-; SLM-LABEL: test1:
-; SLM: movbew   %si, (%rdi)
 }
 
 define i16 @test2(ptr %x) nounwind {
+; CHECK-LABEL: test2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movbew (%rdi), %ax
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    retq
+;
+; SLM-LABEL: test2:
+; SLM:       # %bb.0:
+; SLM-NEXT:    movbew (%rdi), %ax
+; SLM-NEXT:    retq
+;
+; EGPR-LABEL: test2:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movbew (%rdi), %ax # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf0,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %load = load i16, ptr %x, align 2
   %bswap = call i16 @llvm.bswap.i16(i16 %load)
   ret i16 %bswap
-; CHECK-LABEL: test2:
-; CHECK: movbew (%rdi), %ax
-; SLM-LABEL: test2:
-; SLM: movbew   (%rdi), %ax
 }
 
 define void @test3(ptr nocapture %x, i32 %y) nounwind {
+; CHECK-LABEL: test3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movbel %esi, (%rdi)
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    retq
+;
+; SLM-LABEL: test3:
+; SLM:       # %bb.0:
+; SLM-NEXT:    movbel %esi, (%rdi)
+; SLM-NEXT:    retq
+;
+; EGPR-LABEL: test3:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movbel %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf1,0x37]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %bswap = call i32 @llvm.bswap.i32(i32 %y)
   store i32 %bswap, ptr %x, align 4
   ret void
-; CHECK-LABEL: test3:
-; CHECK: movbel	%esi, (%rdi)
-; SLM-LABEL: test3:
-; SLM: movbel	%esi, (%rdi)
 }
 
 define i32 @test4(ptr %x) nounwind {
+; CHECK-LABEL: test4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movbel (%rdi), %eax
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    retq
+;
+; SLM-LABEL: test4:
+; SLM:       # %bb.0:
+; SLM-NEXT:    movbel (%rdi), %eax
+; SLM-NEXT:    retq
+;
+; EGPR-LABEL: test4:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movbel (%rdi), %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf0,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %load = load i32, ptr %x, align 4
   %bswap = call i32 @llvm.bswap.i32(i32 %load)
   ret i32 %bswap
-; CHECK-LABEL: test4:
-; CHECK: movbel	(%rdi), %eax
-; SLM-LABEL: test4:
-; SLM: movbel	(%rdi), %eax
 }
 
 define void @test5(ptr %x, i64 %y) nounwind {
+; CHECK-LABEL: test5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movbeq %rsi, (%rdi)
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    retq
+;
+; SLM-LABEL: test5:
+; SLM:       # %bb.0:
+; SLM-NEXT:    movbeq %rsi, (%rdi)
+; SLM-NEXT:    retq
+;
+; EGPR-LABEL: test5:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movbeq %rsi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf1,0x37]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %bswap = call i64 @llvm.bswap.i64(i64 %y)
   store i64 %bswap, ptr %x, align 8
   ret void
-; CHECK-LABEL: test5:
-; CHECK: movbeq	%rsi, (%rdi)
-; SLM-LABEL: test5:
-; SLM: movbeq	%rsi, (%rdi)
 }
 
 define i64 @test6(ptr %x) nounwind {
+; CHECK-LABEL: test6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movbeq (%rdi), %rax
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    retq
+;
+; SLM-LABEL: test6:
+; SLM:       # %bb.0:
+; SLM-NEXT:    movbeq (%rdi), %rax
+; SLM-NEXT:    retq
+;
+; EGPR-LABEL: test6:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movbeq (%rdi), %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf0,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %load = load i64, ptr %x, align 8
   %bswap = call i64 @llvm.bswap.i64(i64 %load)
   ret i64 %bswap
-; CHECK-LABEL: test6:
-; CHECK: movbeq	(%rdi), %rax
-; SLM-LABEL: test6:
-; SLM: movbeq	(%rdi), %rax
 }
diff --git a/llvm/test/CodeGen/X86/raoint-intrinsics-32.ll b/llvm/test/CodeGen/X86/raoint-intrinsics-32.ll
index 9715c8f4c034894..20b27cd43f5701a 100644
--- a/llvm/test/CodeGen/X86/raoint-intrinsics-32.ll
+++ b/llvm/test/CodeGen/X86/raoint-intrinsics-32.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+raoint | FileCheck %s --check-prefixes=X64
 ; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+raoint | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+raoint,+egpr --show-mc-encoding | FileCheck %s --check-prefixes=EGPR
 
 define void @test_int_x86_aadd32(ptr %A, i32 %B) {
 ; X64-LABEL: test_int_x86_aadd32:
@@ -14,6 +15,11 @@ define void @test_int_x86_aadd32(ptr %A, i32 %B) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
 ; X86-NEXT:    aaddl %eax, (%ecx) # encoding: [0x0f,0x38,0xfc,0x01]
 ; X86-NEXT:    retl # encoding: [0xc3]
+;
+; EGPR-LABEL: test_int_x86_aadd32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    aaddl %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xfc,0x37]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   call void @llvm.x86.aadd32(ptr %A, i32 %B)
   ret  void
 }
@@ -31,6 +37,11 @@ define void @test_int_x86_aand32(ptr %A, i32 %B) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
 ; X86-NEXT:    aandl %eax, (%ecx) # encoding: [0x66,0x0f,0x38,0xfc,0x01]
 ; X86-NEXT:    retl # encoding: [0xc3]
+;
+; EGPR-LABEL: test_int_x86_aand32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    aandl %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xfc,0x37]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   call void @llvm.x86.aand32(ptr %A, i32 %B)
   ret  void
 }
@@ -48,6 +59,11 @@ define void @test_int_x86_aor32(ptr %A, i32 %B) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
 ; X86-NEXT:    aorl %eax, (%ecx) # encoding: [0xf2,0x0f,0x38,0xfc,0x01]
 ; X86-NEXT:    retl # encoding: [0xc3]
+;
+; EGPR-LABEL: test_int_x86_aor32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    aorl %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0xf2,0x0f,0x38,0xfc,0x37]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   call void @llvm.x86.aor32(ptr %A, i32 %B)
   ret  void
 }
@@ -65,6 +81,11 @@ define void @test_int_x86_axor32(ptr %A, i32 %B) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
 ; X86-NEXT:    axorl %eax, (%ecx) # encoding: [0xf3,0x0f,0x38,0xfc,0x01]
 ; X86-NEXT:    retl # encoding: [0xc3]
+;
+; EGPR-LABEL: test_int_x86_axor32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    axorl %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0xf3,0x0f,0x38,0xfc,0x37]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   call void @llvm.x86.axor32(ptr %A, i32 %B)
   ret  void
 }
diff --git a/llvm/test/CodeGen/X86/raoint-intrinsics-64.ll b/llvm/test/CodeGen/X86/raoint-intrinsics-64.ll
index 9d4fec591b76299..6b684615a0261bf 100644
--- a/llvm/test/CodeGen/X86/raoint-intrinsics-64.ll
+++ b/llvm/test/CodeGen/X86/raoint-intrinsics-64.ll
@@ -1,11 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+raoint | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+raoint,+egpr | FileCheck %s --check-prefixes=EGPR
 
 define void @test_int_x86_aadd64(ptr %A, i64 %B) {
 ; X64-LABEL: test_int_x86_aadd64:
 ; X64:       # %bb.0:
 ; X64-NEXT:    aaddq %rsi, (%rdi) # encoding: [0x48,0x0f,0x38,0xfc,0x37]
 ; X64-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_int_x86_aadd64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    aaddq %rsi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xfc,0x37]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   call void @llvm.x86.aadd64(ptr %A, i64 %B)
   ret  void
 }
@@ -16,6 +22,11 @@ define void @test_int_x86_aand64(ptr %A, i64 %B) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    aandq %rsi, (%rdi) # encoding: [0x66,0x48,0x0f,0x38,0xfc,0x37]
 ; X64-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_int_x86_aand64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    aandq %rsi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x66,0x48,0x0f,0x38,0xfc,0x37]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   call void @llvm.x86.aand64(ptr %A, i64 %B)
   ret  void
 }
@@ -26,6 +37,11 @@ define void @test_int_x86_aor64(ptr %A, i64 %B) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    aorq %rsi, (%rdi) # encoding: [0xf2,0x48,0x0f,0x38,0xfc,0x37]
 ; X64-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_int_x86_aor64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    aorq %rsi, (%rdi) # EVEX TO LEGACY Compression encoding: [0xf2,0x48,0x0f,0x38,0xfc,0x37]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   call void @llvm.x86.aor64(ptr %A, i64 %B)
   ret  void
 }
@@ -36,6 +52,11 @@ define void @test_int_x86_axor64(ptr %A, i64 %B) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    axorq %rsi, (%rdi) # encoding: [0xf3,0x48,0x0f,0x38,0xfc,0x37]
 ; X64-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_int_x86_axor64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    axorq %rsi, (%rdi) # EVEX TO LEGACY Compression encoding: [0xf3,0x48,0x0f,0x38,0xfc,0x37]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   call void @llvm.x86.axor64(ptr %A, i64 %B)
   ret  void
 }
diff --git a/llvm/test/MC/Disassembler/X86/apx/movbe.txt b/llvm/test/MC/Disassembler/X86/apx/movbe.txt
new file mode 100644
index 000000000000000..716bd169a022217
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/apx/movbe.txt
@@ -0,0 +1,86 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:   movbew	%dx, %ax
+# INTEL: movbe	ax, dx
+0x62,0xf4,0x7d,0x08,0x61,0xd0
+
+# ATT:   movbew	%dx, 123(%rax,%rbx,4)
+# INTEL: movbe	word ptr [rax + 4*rbx + 123], dx
+0x62,0xf4,0x7d,0x08,0x61,0x54,0x98,0x7b
+
+# ATT:   movbel	%ecx, %edx
+# INTEL: movbe	edx, ecx
+0x62,0xf4,0x7c,0x08,0x61,0xca
+
+# ATT:   movbel	%ecx, 123(%rax,%rbx,4)
+# INTEL: movbe	dword ptr [rax + 4*rbx + 123], ecx
+0x62,0xf4,0x7c,0x08,0x61,0x4c,0x98,0x7b
+
+# ATT:   movbeq	%r9, %r15
+# INTEL: movbe	r15, r9
+0x62,0x54,0xfc,0x08,0x61,0xcf
+
+# ATT:   movbeq	%r9, 123(%rax,%rbx,4)
+# INTEL: movbe	qword ptr [rax + 4*rbx + 123], r9
+0x62,0x74,0xfc,0x08,0x61,0x4c,0x98,0x7b
+
+# ATT:   movbew	123(%rax,%rbx,4), %dx
+# INTEL: movbe	dx, word ptr [rax + 4*rbx + 123]
+0x62,0xf4,0x7d,0x08,0x60,0x54,0x98,0x7b
+
+# ATT:   movbel	123(%rax,%rbx,4), %ecx
+# INTEL: movbe	ecx, dword ptr [rax + 4*rbx + 123]
+0x62,0xf4,0x7c,0x08,0x60,0x4c,0x98,0x7b
+
+# ATT:   movbeq	123(%rax,%rbx,4), %r9
+# INTEL: movbe	r9, qword ptr [rax + 4*rbx + 123]
+0x62,0x74,0xfc,0x08,0x60,0x4c,0x98,0x7b
+
+# ATT:   movbew	%r17w, %r21w
+# INTEL: movbe	r21w, r17w
+0x62,0xec,0x7d,0x08,0x61,0xcd
+
+# ATT:   movbew	%r17w, 291(%r28,%r29,4)
+# INTEL: movbe	word ptr [r28 + 4*r29 + 291], r17w
+0x62,0x8c,0x79,0x08,0x61,0x8c,0xac,0x23,0x01,0x00,0x00
+
+# ATT:   movbel	%r18d, %r22d
+# INTEL: movbe	r22d, r18d
+0x62,0xec,0x7c,0x08,0x61,0xd6
+
+# ATT:   movbel	%r18d, 291(%r28,%r29,4)
+# INTEL: movbe	dword ptr [r28 + 4*r29 + 291], r18d
+0x62,0x8c,0x78,0x08,0x61,0x94,0xac,0x23,0x01,0x00,0x00
+
+# ATT:   movbeq	%r19, %r23
+# INTEL: movbe	r23, r19
+0x62,0xec,0xfc,0x08,0x61,0xdf
+
+# ATT:   movbeq	%r19, 291(%r28,%r29,4)
+# INTEL: movbe	qword ptr [r28 + 4*r29 + 291], r19
+0x62,0x8c,0xf8,0x08,0x61,0x9c,0xac,0x23,0x01,0x00,0x00
+
+# ATT:   movbew	291(%r28,%r29,4), %r17w
+# INTEL: movbe	r17w, word ptr [r28 + 4*r29 + 291]
+0x62,0x8c,0x79,0x08,0x60,0x8c,0xac,0x23,0x01,0x00,0x00
+
+# ATT:   movbel	291(%r28,%r29,4), %r18d
+# INTEL: movbe	r18d, dword ptr [r28 + 4*r29 + 291]
+0x62,0x8c,0x78,0x08,0x60,0x94,0xac,0x23,0x01,0x00,0x00
+
+# ATT:   movbeq	291(%r28,%r29,4), %r19
+# INTEL: movbe	r19, qword ptr [r28 + 4*r29 + 291]
+0x62,0x8c,0xf8,0x08,0x60,0x9c,0xac,0x23,0x01,0x00,0x00
+
+# ATT:   movbew	 %r16w, %r17w
+# INTEL: movbe	r17w, r16w
+0x62,0xec,0x7d,0x08,0x60,0xc8
+
+# ATT:   movbel	 %r16d, %r17d
+# INTEL: movbe	r17d, r16d
+0x62,0xec,0x7c,0x08,0x60,0xc8
+
+# ATT:   movbeq	 %r16, %r17
+# INTEL: movbe	r17, r16
+0x62,0xec,0xfc,0x08,0x60,0xc8
diff --git a/llvm/test/MC/Disassembler/X86/apx/rao-int.txt b/llvm/test/MC/Disassembler/X86/apx/rao-int.txt
new file mode 100644
index 000000000000000..5db6c4b60825880
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/apx/rao-int.txt
@@ -0,0 +1,74 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+## aadd
+
+# ATT:   aaddl	%ecx, 123(%rax,%rbx,4)
+# INTEL: aadd	dword ptr [rax + 4*rbx + 123], ecx
+0x62,0xf4,0x7c,0x08,0xfc,0x4c,0x98,0x7b
+
+# ATT:   aaddq	%r9, 123(%rax,%rbx,4)
+# INTEL: aadd	qword ptr [rax + 4*rbx + 123], r9
+0x62,0x74,0xfc,0x08,0xfc,0x4c,0x98,0x7b
+
+# ATT:   aaddl	%r18d, 291(%r28,%r29,4)
+# INTEL: aadd	dword ptr [r28 + 4*r29 + 291], r18d
+0x62,0x8c,0x78,0x08,0xfc,0x94,0xac,0x23,0x01,0x00,0x00
+
+# ATT:   aaddq	%r19, 291(%r28,%r29,4)
+# INTEL: aadd	qword ptr [r28 + 4*r29 + 291], r19
+0x62,0x8c,0xf8,0x08,0xfc,0x9c,0xac,0x23,0x01,0x00,0x00
+
+## aand
+
+# ATT:   aandl	%ecx, 123(%rax,%rbx,4)
+# INTEL: aand	dword ptr [rax + 4*rbx + 123], ecx
+0x62,0xf4,0x7d,0x08,0xfc,0x4c,0x98,0x7b
+
+# ATT:   aandq	%r9, 123(%rax,%rbx,4)
+# INTEL: aand	qword ptr [rax + 4*rbx + 123], r9
+0x62,0x74,0xfd,0x08,0xfc,0x4c,0x98,0x7b
+
+# ATT:   aandl	%r18d, 291(%r28,%r29,4)
+# INTEL: aand	dword ptr [r28 + 4*r29 + 291], r18d
+0x62,0x8c,0x79,0x08,0xfc,0x94,0xac,0x23,0x01,0x00,0x00
+
+# ATT:   aandq	%r19, 291(%r28,%r29,4)
+# INTEL: aand	qword ptr [r28 + 4*r29 + 291], r19
+0x62,0x8c,0xf9,0x08,0xfc,0x9c,0xac,0x23,0x01,0x00,0x00
+
+## aor
+
+# ATT:   aorl	%ecx, 123(%rax,%rbx,4)
+# INTEL: aor	dword ptr [rax + 4*rbx + 123], ecx
+0x62,0xf4,0x7f,0x08,0xfc,0x4c,0x98,0x7b
+
+# ATT:   aorq	%r9, 123(%rax,%rbx,4)
+# INTEL: aor	qword ptr [rax + 4*rbx + 123], r9
+0x62,0x74,0xff,0x08,0xfc,0x4c,0x98,0x7b
+
+# ATT:   aorl	%r18d, 291(%r28,%r29,4)
+# INTEL: aor	dword ptr [r28 + 4*r29 + 291], r18d
+0x62,0x8c,0x7b,0x08,0xfc,0x94,0xac,0x23,0x01,0x00,0x00
+
+# ATT:   aorq	%r19, 291(%r28,%r29,4)
+# INTEL: aor	qword ptr [r28 + 4*r29 + 291], r19
+0x62,0x8c,0xfb,0x08,0xfc,0x9c,0xac,0x23,0x01,0x00,0x00
+
+## axor
+
+# ATT:   axorl	%ecx, 123(%rax,%rbx,4)
+# INTEL: axor	dword ptr [rax + 4*rbx + 123], ecx
+0x62,0xf4,0x7e,0x08,0xfc,0x4c,0x98,0x7b
+
+# ATT:   axorq	%r9, 123(%rax,%rbx,4)
+# INTEL: axor	qword ptr [rax + 4*rbx + 123], r9
+0x62,0x74,0xfe,0x08,0xfc,0x4c,0x98,0x7b
+
+# ATT:   axorl	%r18d, 291(%r28,%r29,4)
+# INTEL: axor	dword ptr [r28 + 4*r29 + 291], r18d
+0x62,0x8c,0x7a,0x08,0xfc,0x94,0xac,0x23,0x01,0x00,0x00
+
+# ATT:   axorq	%r19, 291(%r28,%r29,4)
+# INTEL: axor	qword ptr [r28 + 4*r29 + 291], r19
+0x62,0x8c,0xfa,0x08,0xfc,0x9c,0xac,0x23,0x01,0x00,0x00
diff --git a/llvm/test/MC/X86/apx/movbe-att.s b/llvm/test/MC/X86/apx/movbe-att.s
new file mode 100644
index 000000000000000..c9019efa6e5f131
--- /dev/null
+++ b/llvm/test/MC/X86/apx/movbe-att.s
@@ -0,0 +1,76 @@
+# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+# ERROR-COUNT-18: error:
+# ERROR-NOT: error:
+# CHECK: {evex}	movbe	ax, dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0x61,0xd0]
+         {evex}	movbe	ax, dx
+
+# CHECK: {evex}	movbe	word ptr [rax + 4*rbx + 123], dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0x61,0x54,0x98,0x7b]
+         {evex}	movbe	word ptr [rax + 4*rbx + 123], dx
+
+# CHECK: {evex}	movbe	edx, ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0x61,0xca]
+         {evex}	movbe	edx, ecx
+
+# CHECK: {evex}	movbe	dword ptr [rax + 4*rbx + 123], ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0x61,0x4c,0x98,0x7b]
+         {evex}	movbe	dword ptr [rax + 4*rbx + 123], ecx
+
+# CHECK: {evex}	movbe	r15, r9
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0x61,0xcf]
+         {evex}	movbe	r15, r9
+
+# CHECK: {evex}	movbe	qword ptr [rax + 4*rbx + 123], r9
+# CHECK: encoding: [0x62,0x74,0xfc,0x08,0x61,0x4c,0x98,0x7b]
+         {evex}	movbe	qword ptr [rax + 4*rbx + 123], r9
+
+# CHECK: {evex}	movbe	dx, word ptr [rax + 4*rbx + 123]
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0x60,0x54,0x98,0x7b]
+         {evex}	movbe	dx, word ptr [rax + 4*rbx + 123]
+
+# CHECK: {evex}	movbe	ecx, dword ptr [rax + 4*rbx + 123]
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0x60,0x4c,0x98,0x7b]
+         {evex}	movbe	ecx, dword ptr [rax + 4*rbx + 123]
+
+# CHECK: {evex}	movbe	r9, qword ptr [rax + 4*rbx + 123]
+# CHECK: encoding: [0x62,0x74,0xfc,0x08,0x60,0x4c,0x98,0x7b]
+         {evex}	movbe	r9, qword ptr [rax + 4*rbx + 123]
+
+# CHECK: movbe	r21w, r17w
+# CHECK: encoding: [0x62,0xec,0x7d,0x08,0x61,0xcd]
+         movbe	r21w, r17w
+
+# CHECK: movbe	word ptr [r28 + 4*r29 + 291], r17w
+# CHECK: encoding: [0x62,0x8c,0x79,0x08,0x61,0x8c,0xac,0x23,0x01,0x00,0x00]
+         movbe	word ptr [r28 + 4*r29 + 291], r17w
+
+# CHECK: movbe	r22d, r18d
+# CHECK: encoding: [0x62,0xec,0x7c,0x08,0x61,0xd6]
+         movbe	r22d, r18d
+
+# CHECK: movbe	dword ptr [r28 + 4*r29 + 291], r18d
+# CHECK: encoding: [0x62,0x8c,0x78,0x08,0x61,0x94,0xac,0x23,0x01,0x00,0x00]
+         movbe	dword ptr [r28 + 4*r29 + 291], r18d
+
+# CHECK: movbe	r23, r19
+# CHECK: encoding: [0x62,0xec,0xfc,0x08,0x61,0xdf]
+         movbe	r23, r19
+
+# CHECK: movbe	qword ptr [r28 + 4*r29 + 291], r19
+# CHECK: encoding: [0x62,0x8c,0xf8,0x08,0x61,0x9c,0xac,0x23,0x01,0x00,0x00]
+         movbe	qword ptr [r28 + 4*r29 + 291], r19
+
+# CHECK: movbe	r17w, word ptr [r28 + 4*r29 + 291]
+# CHECK: encoding: [0x62,0x8c,0x79,0x08,0x60,0x8c,0xac,0x23,0x01,0x00,0x00]
+         movbe	r17w, word ptr [r28 + 4*r29 + 291]
+
+# CHECK: movbe	r18d, dword ptr [r28 + 4*r29 + 291]
+# CHECK: encoding: [0x62,0x8c,0x78,0x08,0x60,0x94,0xac,0x23,0x01,0x00,0x00]
+         movbe	r18d, dword ptr [r28 + 4*r29 + 291]
+
+# CHECK: movbe	r19, qword ptr [r28 + 4*r29 + 291]
+# CHECK: encoding: [0x62,0x8c,0xf8,0x08,0x60,0x9c,0xac,0x23,0x01,0x00,0x00]
+         movbe	r19, qword ptr [r28 + 4*r29 + 291]
diff --git a/llvm/test/MC/X86/apx/movbe-intel.s b/llvm/test/MC/X86/apx/movbe-intel.s
new file mode 100644
index 000000000000000..30439902323e1b8
--- /dev/null
+++ b/llvm/test/MC/X86/apx/movbe-intel.s
@@ -0,0 +1,73 @@
+# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+# CHECK: {evex}	movbe	ax, dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0x61,0xd0]
+         {evex}	movbe	ax, dx
+
+# CHECK: {evex}	movbe	word ptr [rax + 4*rbx + 123], dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0x61,0x54,0x98,0x7b]
+         {evex}	movbe	word ptr [rax + 4*rbx + 123], dx
+
+# CHECK: {evex}	movbe	edx, ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0x61,0xca]
+         {evex}	movbe	edx, ecx
+
+# CHECK: {evex}	movbe	dword ptr [rax + 4*rbx + 123], ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0x61,0x4c,0x98,0x7b]
+         {evex}	movbe	dword ptr [rax + 4*rbx + 123], ecx
+
+# CHECK: {evex}	movbe	r15, r9
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0x61,0xcf]
+         {evex}	movbe	r15, r9
+
+# CHECK: {evex}	movbe	qword ptr [rax + 4*rbx + 123], r9
+# CHECK: encoding: [0x62,0x74,0xfc,0x08,0x61,0x4c,0x98,0x7b]
+         {evex}	movbe	qword ptr [rax + 4*rbx + 123], r9
+
+# CHECK: {evex}	movbe	dx, word ptr [rax + 4*rbx + 123]
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0x60,0x54,0x98,0x7b]
+         {evex}	movbe	dx, word ptr [rax + 4*rbx + 123]
+
+# CHECK: {evex}	movbe	ecx, dword ptr [rax + 4*rbx + 123]
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0x60,0x4c,0x98,0x7b]
+         {evex}	movbe	ecx, dword ptr [rax + 4*rbx + 123]
+
+# CHECK: {evex}	movbe	r9, qword ptr [rax + 4*rbx + 123]
+# CHECK: encoding: [0x62,0x74,0xfc,0x08,0x60,0x4c,0x98,0x7b]
+         {evex}	movbe	r9, qword ptr [rax + 4*rbx + 123]
+
+# CHECK: movbe	r21w, r17w
+# CHECK: encoding: [0x62,0xec,0x7d,0x08,0x61,0xcd]
+         movbe	r21w, r17w
+
+# CHECK: movbe	word ptr [r28 + 4*r29 + 291], r17w
+# CHECK: encoding: [0x62,0x8c,0x79,0x08,0x61,0x8c,0xac,0x23,0x01,0x00,0x00]
+         movbe	word ptr [r28 + 4*r29 + 291], r17w
+
+# CHECK: movbe	r22d, r18d
+# CHECK: encoding: [0x62,0xec,0x7c,0x08,0x61,0xd6]
+         movbe	r22d, r18d
+
+# CHECK: movbe	dword ptr [r28 + 4*r29 + 291], r18d
+# CHECK: encoding: [0x62,0x8c,0x78,0x08,0x61,0x94,0xac,0x23,0x01,0x00,0x00]
+         movbe	dword ptr [r28 + 4*r29 + 291], r18d
+
+# CHECK: movbe	r23, r19
+# CHECK: encoding: [0x62,0xec,0xfc,0x08,0x61,0xdf]
+         movbe	r23, r19
+
+# CHECK: movbe	qword ptr [r28 + 4*r29 + 291], r19
+# CHECK: encoding: [0x62,0x8c,0xf8,0x08,0x61,0x9c,0xac,0x23,0x01,0x00,0x00]
+         movbe	qword ptr [r28 + 4*r29 + 291], r19
+
+# CHECK: movbe	r17w, word ptr [r28 + 4*r29 + 291]
+# CHECK: encoding: [0x62,0x8c,0x79,0x08,0x60,0x8c,0xac,0x23,0x01,0x00,0x00]
+         movbe	r17w, word ptr [r28 + 4*r29 + 291]
+
+# CHECK: movbe	r18d, dword ptr [r28 + 4*r29 + 291]
+# CHECK: encoding: [0x62,0x8c,0x78,0x08,0x60,0x94,0xac,0x23,0x01,0x00,0x00]
+         movbe	r18d, dword ptr [r28 + 4*r29 + 291]
+
+# CHECK: movbe	r19, qword ptr [r28 + 4*r29 + 291]
+# CHECK: encoding: [0x62,0x8c,0xf8,0x08,0x60,0x9c,0xac,0x23,0x01,0x00,0x00]
+         movbe	r19, qword ptr [r28 + 4*r29 + 291]
diff --git a/llvm/test/MC/X86/apx/rao-int-att.s b/llvm/test/MC/X86/apx/rao-int-att.s
new file mode 100644
index 000000000000000..aaed85a36247882
--- /dev/null
+++ b/llvm/test/MC/X86/apx/rao-int-att.s
@@ -0,0 +1,77 @@
+# RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+# ERROR-COUNT-16: error:
+# ERROR-NOT: error:
+
+## aadd
+
+# CHECK: {evex}	aaddl	%ecx, 123(%rax,%rbx,4)
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xfc,0x4c,0x98,0x7b]
+         {evex}	aaddl	%ecx, 123(%rax,%rbx,4)
+
+# CHECK: {evex}	aaddq	%r9, 123(%rax,%rbx,4)
+# CHECK: encoding: [0x62,0x74,0xfc,0x08,0xfc,0x4c,0x98,0x7b]
+         {evex}	aaddq	%r9, 123(%rax,%rbx,4)
+
+# CHECK: aaddl	%r18d, 291(%r28,%r29,4)
+# CHECK: encoding: [0x62,0x8c,0x78,0x08,0xfc,0x94,0xac,0x23,0x01,0x00,0x00]
+         aaddl	%r18d, 291(%r28,%r29,4)
+
+# CHECK: aaddq	%r19, 291(%r28,%r29,4)
+# CHECK: encoding: [0x62,0x8c,0xf8,0x08,0xfc,0x9c,0xac,0x23,0x01,0x00,0x00]
+         aaddq	%r19, 291(%r28,%r29,4)
+
+## aand
+
+# CHECK: {evex}	aandl	%ecx, 123(%rax,%rbx,4)
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xfc,0x4c,0x98,0x7b]
+         {evex}	aandl	%ecx, 123(%rax,%rbx,4)
+
+# CHECK: {evex}	aandq	%r9, 123(%rax,%rbx,4)
+# CHECK: encoding: [0x62,0x74,0xfd,0x08,0xfc,0x4c,0x98,0x7b]
+         {evex}	aandq	%r9, 123(%rax,%rbx,4)
+
+# CHECK: aandl	%r18d, 291(%r28,%r29,4)
+# CHECK: encoding: [0x62,0x8c,0x79,0x08,0xfc,0x94,0xac,0x23,0x01,0x00,0x00]
+         aandl	%r18d, 291(%r28,%r29,4)
+
+# CHECK: aandq	%r19, 291(%r28,%r29,4)
+# CHECK: encoding: [0x62,0x8c,0xf9,0x08,0xfc,0x9c,0xac,0x23,0x01,0x00,0x00]
+         aandq	%r19, 291(%r28,%r29,4)
+
+## aor
+
+# CHECK: {evex}	aorl	%ecx, 123(%rax,%rbx,4)
+# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0xfc,0x4c,0x98,0x7b]
+         {evex}	aorl	%ecx, 123(%rax,%rbx,4)
+
+# CHECK: {evex}	aorq	%r9, 123(%rax,%rbx,4)
+# CHECK: encoding: [0x62,0x74,0xff,0x08,0xfc,0x4c,0x98,0x7b]
+         {evex}	aorq	%r9, 123(%rax,%rbx,4)
+
+# CHECK: aorl	%r18d, 291(%r28,%r29,4)
+# CHECK: encoding: [0x62,0x8c,0x7b,0x08,0xfc,0x94,0xac,0x23,0x01,0x00,0x00]
+         aorl	%r18d, 291(%r28,%r29,4)
+
+# CHECK: aorq	%r19, 291(%r28,%r29,4)
+# CHECK: encoding: [0x62,0x8c,0xfb,0x08,0xfc,0x9c,0xac,0x23,0x01,0x00,0x00]
+         aorq	%r19, 291(%r28,%r29,4)
+
+## axor
+
+# CHECK: {evex}	axorl	%ecx, 123(%rax,%rbx,4)
+# CHECK: encoding: [0x62,0xf4,0x7e,0x08,0xfc,0x4c,0x98,0x7b]
+         {evex}	axorl	%ecx, 123(%rax,%rbx,4)
+
+# CHECK: {evex}	axorq	%r9, 123(%rax,%rbx,4)
+# CHECK: encoding: [0x62,0x74,0xfe,0x08,0xfc,0x4c,0x98,0x7b]
+         {evex}	axorq	%r9, 123(%rax,%rbx,4)
+
+# CHECK: axorl	%r18d, 291(%r28,%r29,4)
+# CHECK: encoding: [0x62,0x8c,0x7a,0x08,0xfc,0x94,0xac,0x23,0x01,0x00,0x00]
+         axorl	%r18d, 291(%r28,%r29,4)
+
+# CHECK: axorq	%r19, 291(%r28,%r29,4)
+# CHECK: encoding: [0x62,0x8c,0xfa,0x08,0xfc,0x9c,0xac,0x23,0x01,0x00,0x00]
+         axorq	%r19, 291(%r28,%r29,4)
diff --git a/llvm/test/MC/X86/apx/rao-int-intel.s b/llvm/test/MC/X86/apx/rao-int-intel.s
new file mode 100644
index 000000000000000..8842bc9f0d3467b
--- /dev/null
+++ b/llvm/test/MC/X86/apx/rao-int-intel.s
@@ -0,0 +1,73 @@
+# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+## aadd
+
+# CHECK: {evex}	aadd	dword ptr [rax + 4*rbx + 123], ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xfc,0x4c,0x98,0x7b]
+         {evex}	aadd	dword ptr [rax + 4*rbx + 123], ecx
+
+# CHECK: {evex}	aadd	qword ptr [rax + 4*rbx + 123], r9
+# CHECK: encoding: [0x62,0x74,0xfc,0x08,0xfc,0x4c,0x98,0x7b]
+         {evex}	aadd	qword ptr [rax + 4*rbx + 123], r9
+
+# CHECK: aadd	dword ptr [r28 + 4*r29 + 291], r18d
+# CHECK: encoding: [0x62,0x8c,0x78,0x08,0xfc,0x94,0xac,0x23,0x01,0x00,0x00]
+         aadd	dword ptr [r28 + 4*r29 + 291], r18d
+
+# CHECK: aadd	qword ptr [r28 + 4*r29 + 291], r19
+# CHECK: encoding: [0x62,0x8c,0xf8,0x08,0xfc,0x9c,0xac,0x23,0x01,0x00,0x00]
+         aadd	qword ptr [r28 + 4*r29 + 291], r19
+
+## aand
+
+# CHECK: {evex}	aand	dword ptr [rax + 4*rbx + 123], ecx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xfc,0x4c,0x98,0x7b]
+         {evex}	aand	dword ptr [rax + 4*rbx + 123], ecx
+
+# CHECK: {evex}	aand	qword ptr [rax + 4*rbx + 123], r9
+# CHECK: encoding: [0x62,0x74,0xfd,0x08,0xfc,0x4c,0x98,0x7b]
+         {evex}	aand	qword ptr [rax + 4*rbx + 123], r9
+
+# CHECK: aand	dword ptr [r28 + 4*r29 + 291], r18d
+# CHECK: encoding: [0x62,0x8c,0x79,0x08,0xfc,0x94,0xac,0x23,0x01,0x00,0x00]
+         aand	dword ptr [r28 + 4*r29 + 291], r18d
+
+# CHECK: aand	qword ptr [r28 + 4*r29 + 291], r19
+# CHECK: encoding: [0x62,0x8c,0xf9,0x08,0xfc,0x9c,0xac,0x23,0x01,0x00,0x00]
+         aand	qword ptr [r28 + 4*r29 + 291], r19
+
+## aor
+
+# CHECK: {evex}	aor	dword ptr [rax + 4*rbx + 123], ecx
+# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0xfc,0x4c,0x98,0x7b]
+         {evex}	aor	dword ptr [rax + 4*rbx + 123], ecx
+
+# CHECK: {evex}	aor	qword ptr [rax + 4*rbx + 123], r9
+# CHECK: encoding: [0x62,0x74,0xff,0x08,0xfc,0x4c,0x98,0x7b]
+         {evex}	aor	qword ptr [rax + 4*rbx + 123], r9
+
+# CHECK: aor	dword ptr [r28 + 4*r29 + 291], r18d
+# CHECK: encoding: [0x62,0x8c,0x7b,0x08,0xfc,0x94,0xac,0x23,0x01,0x00,0x00]
+         aor	dword ptr [r28 + 4*r29 + 291], r18d
+
+# CHECK: aor	qword ptr [r28 + 4*r29 + 291], r19
+# CHECK: encoding: [0x62,0x8c,0xfb,0x08,0xfc,0x9c,0xac,0x23,0x01,0x00,0x00]
+         aor	qword ptr [r28 + 4*r29 + 291], r19
+
+## axor
+
+# CHECK: {evex}	axor	dword ptr [rax + 4*rbx + 123], ecx
+# CHECK: encoding: [0x62,0xf4,0x7e,0x08,0xfc,0x4c,0x98,0x7b]
+         {evex}	axor	dword ptr [rax + 4*rbx + 123], ecx
+
+# CHECK: {evex}	axor	qword ptr [rax + 4*rbx + 123], r9
+# CHECK: encoding: [0x62,0x74,0xfe,0x08,0xfc,0x4c,0x98,0x7b]
+         {evex}	axor	qword ptr [rax + 4*rbx + 123], r9
+
+# CHECK: axor	dword ptr [r28 + 4*r29 + 291], r18d
+# CHECK: encoding: [0x62,0x8c,0x7a,0x08,0xfc,0x94,0xac,0x23,0x01,0x00,0x00]
+         axor	dword ptr [r28 + 4*r29 + 291], r18d
+
+# CHECK: axor	qword ptr [r28 + 4*r29 + 291], r19
+# CHECK: encoding: [0x62,0x8c,0xfa,0x08,0xfc,0x9c,0xac,0x23,0x01,0x00,0x00]
+         axor	qword ptr [r28 + 4*r29 + 291], r19
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 31691006edf53b4..f8b58ac54409298 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -421,6 +421,9 @@ static const X86FoldTableEntry Table0[] = {
   {X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE|TB_NO_REVERSE},
   {X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_16},
   {X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_16},
+  {X86::MOVBE16rr_EVEX, X86::MOVBE16mr_EVEX, TB_FOLDED_LOAD},
+  {X86::MOVBE32rr_EVEX, X86::MOVBE32mr_EVEX, TB_FOLDED_LOAD},
+  {X86::MOVBE64rr_EVEX, X86::MOVBE64mr_EVEX, TB_FOLDED_LOAD},
   {X86::MOVDI2SSrr, X86::MOV32mr, TB_FOLDED_STORE|TB_NO_REVERSE},
   {X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE|TB_ALIGN_16},
   {X86::MOVDQUrr, X86::MOVDQUmr, TB_FOLDED_STORE},

>From 0919f4d630d5c20ffc0ab0940800b3d16fd1a626 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Wed, 10 Jan 2024 00:55:34 -0800
Subject: [PATCH 02/13] add pattern to movberr

---
 llvm/lib/Target/X86/X86InstrMisc.td    | 15 +++++++++------
 llvm/test/CodeGen/X86/movbe.ll         | 26 ++++++++++++++++++++++++++
 llvm/test/TableGen/x86-fold-tables.inc |  9 ++++++---
 3 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index 6b1c7ce3d72b0f6..88fa3236d8c4046 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1151,14 +1151,17 @@ let Predicates = [HasMOVBE, HasEGPR, In64BitMode] in {
                        EVEX, NoCD8, T_MAP4;
   }
   let SchedRW = [WriteALU] in {
-  def MOVBE16rr_EVEX : I<0x61, MRMDestReg, (outs), (ins GR16:$dst, GR16:$src),
-                         "movbe{w}\t{$src, $dst|$dst, $src}", []>,
+  def MOVBE16rr_EVEX : I<0x61, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+                         "movbe{w}\t{$src, $dst|$dst, $src}", 
+                         [(set GR16:$dst, (bswap GR16:$src))]>,
                        EVEX, NoCD8, T_MAP4, PD;
-  def MOVBE32rr_EVEX : I<0x61, MRMDestReg, (outs), (ins GR32:$dst, GR32:$src),
-                         "movbe{l}\t{$src, $dst|$dst, $src}", []>,
+  def MOVBE32rr_EVEX : I<0x61, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+                         "movbe{l}\t{$src, $dst|$dst, $src}", 
+                         [(set GR32:$dst, (bswap GR32:$src))]>,
                        EVEX, NoCD8, T_MAP4;
-  def MOVBE64rr_EVEX : RI<0x61, MRMDestReg, (outs), (ins GR64:$dst, GR64:$src),
-                          "movbe{q}\t{$src, $dst|$dst, $src}", []>,
+  def MOVBE64rr_EVEX : RI<0x61, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+                          "movbe{q}\t{$src, $dst|$dst, $src}", 
+                          [(set GR64:$dst, (bswap GR64:$src))]>,
                        EVEX, NoCD8, T_MAP4;
 
   def MOVBE16rr_EVEX_REV : I<0x60, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
diff --git a/llvm/test/CodeGen/X86/movbe.ll b/llvm/test/CodeGen/X86/movbe.ll
index 60d98ce05a9d866..1ed5e876c993b91 100644
--- a/llvm/test/CodeGen/X86/movbe.ll
+++ b/llvm/test/CodeGen/X86/movbe.ll
@@ -162,3 +162,29 @@ define i64 @test6(ptr %x) nounwind {
   %bswap = call i64 @llvm.bswap.i64(i64 %load)
   ret i64 %bswap
 }
+
+define i64 @test7(i64 %x) nounwind {
+; CHECK-LABEL: test7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    bswapq %rax
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    retq
+;
+; SLM-LABEL: test7:
+; SLM:       # %bb.0:
+; SLM-NEXT:    movq %rdi, %rax
+; SLM-NEXT:    bswapq %rax
+; SLM-NEXT:    retq
+;
+; EGPR-LABEL: test7:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; EGPR-NEXT:    bswapq %rax # encoding: [0x48,0x0f,0xc8]
+; EGPR-NEXT:    retq # encoding: [0xc3]
+  %bswap = call i64 @llvm.bswap.i64(i64 %x)
+  ret i64 %bswap
+}
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index f8b58ac54409298..f46b08c168d60e0 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -421,9 +421,9 @@ static const X86FoldTableEntry Table0[] = {
   {X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE|TB_NO_REVERSE},
   {X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_16},
   {X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_16},
-  {X86::MOVBE16rr_EVEX, X86::MOVBE16mr_EVEX, TB_FOLDED_LOAD},
-  {X86::MOVBE32rr_EVEX, X86::MOVBE32mr_EVEX, TB_FOLDED_LOAD},
-  {X86::MOVBE64rr_EVEX, X86::MOVBE64mr_EVEX, TB_FOLDED_LOAD},
+  {X86::MOVBE16rr_EVEX, X86::MOVBE16mr_EVEX, TB_FOLDED_STORE},
+  {X86::MOVBE32rr_EVEX, X86::MOVBE32mr_EVEX, TB_FOLDED_STORE},
+  {X86::MOVBE64rr_EVEX, X86::MOVBE64mr_EVEX, TB_FOLDED_STORE},
   {X86::MOVDI2SSrr, X86::MOV32mr, TB_FOLDED_STORE|TB_NO_REVERSE},
   {X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE|TB_ALIGN_16},
   {X86::MOVDQUrr, X86::MOVDQUmr, TB_FOLDED_STORE},
@@ -775,6 +775,9 @@ static const X86FoldTableEntry Table1[] = {
   {X86::MOV8rr, X86::MOV8rm, 0},
   {X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16},
   {X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16},
+  {X86::MOVBE16rr_EVEX, X86::MOVBE16rm_EVEX, 0},
+  {X86::MOVBE32rr_EVEX, X86::MOVBE32rm_EVEX, 0},
+  {X86::MOVBE64rr_EVEX, X86::MOVBE64rm_EVEX, 0},
   {X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE},
   {X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0},
   {X86::MOVDI2SSrr, X86::MOVSSrm_alt, 0},

>From ba05382491c3d800b9879819dc74cd7eed928b85 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Wed, 10 Jan 2024 01:03:46 -0800
Subject: [PATCH 03/13] update predicates

---
 llvm/lib/Target/X86/X86InstrMisc.td | 2 +-
 llvm/test/CodeGen/X86/movbe.ll      | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index 88fa3236d8c4046..876ed426cf3b7fb 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -229,7 +229,7 @@ def PUSHA16  : I<0x60, RawFrm, (outs), (ins), "pushaw", []>,
                OpSize16, Requires<[Not64BitMode]>;
 }
 
-let Constraints = "$src = $dst", SchedRW = [WriteBSWAP32] in {
+let Constraints = "$src = $dst", SchedRW = [WriteBSWAP32], Predicates = [NoEGPR] in {
 // This instruction is a consequence of BSWAP32r observing operand size. The
 // encoding is valid, but the behavior is undefined.
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
diff --git a/llvm/test/CodeGen/X86/movbe.ll b/llvm/test/CodeGen/X86/movbe.ll
index 1ed5e876c993b91..325bf34c8825baf 100644
--- a/llvm/test/CodeGen/X86/movbe.ll
+++ b/llvm/test/CodeGen/X86/movbe.ll
@@ -182,8 +182,7 @@ define i64 @test7(i64 %x) nounwind {
 ;
 ; EGPR-LABEL: test7:
 ; EGPR:       # %bb.0:
-; EGPR-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
-; EGPR-NEXT:    bswapq %rax # encoding: [0x48,0x0f,0xc8]
+; EGPR-NEXT:    movbeq %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0x61,0xf8]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
   %bswap = call i64 @llvm.bswap.i64(i64 %x)
   ret i64 %bswap

>From 799d4d1fee675994fe80946f81de2f0b16ee9aa4 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Wed, 10 Jan 2024 22:36:48 -0800
Subject: [PATCH 04/13] resolve comment

---
 llvm/lib/Target/X86/X86InstrMisc.td | 49 +++++++++++++++--------------
 llvm/test/CodeGen/X86/movbe.ll      |  2 +-
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index 876ed426cf3b7fb..356259925c4b2db 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -229,7 +229,7 @@ def PUSHA16  : I<0x60, RawFrm, (outs), (ins), "pushaw", []>,
                OpSize16, Requires<[Not64BitMode]>;
 }
 
-let Constraints = "$src = $dst", SchedRW = [WriteBSWAP32], Predicates = [NoEGPR] in {
+let Constraints = "$src = $dst", SchedRW = [WriteBSWAP32], Predicates = [NoNDD] in {
 // This instruction is a consequence of BSWAP32r observing operand size. The
 // encoding is valid, but the behavior is undefined.
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
@@ -1150,30 +1150,31 @@ let Predicates = [HasMOVBE, HasEGPR, In64BitMode] in {
                          [(store (bswap GR64:$src), addr:$dst)]>,
                        EVEX, NoCD8, T_MAP4;
   }
-  let SchedRW = [WriteALU] in {
-  def MOVBE16rr_EVEX : I<0x61, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
-                         "movbe{w}\t{$src, $dst|$dst, $src}", 
-                         [(set GR16:$dst, (bswap GR16:$src))]>,
-                       EVEX, NoCD8, T_MAP4, PD;
-  def MOVBE32rr_EVEX : I<0x61, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
-                         "movbe{l}\t{$src, $dst|$dst, $src}", 
-                         [(set GR32:$dst, (bswap GR32:$src))]>,
-                       EVEX, NoCD8, T_MAP4;
-  def MOVBE64rr_EVEX : RI<0x61, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
-                          "movbe{q}\t{$src, $dst|$dst, $src}", 
-                          [(set GR64:$dst, (bswap GR64:$src))]>,
-                       EVEX, NoCD8, T_MAP4;
+}
 
-  def MOVBE16rr_EVEX_REV : I<0x60, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
-                             "movbe{w}\t{$src, $dst|$dst, $src}", []>,
-                           EVEX, NoCD8, T_MAP4, PD;
-  def MOVBE32rr_EVEX_REV : I<0x60, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
-                             "movbe{l}\t{$src, $dst|$dst, $src}", []>,
-                           EVEX, NoCD8, T_MAP4;
-  def MOVBE64rr_EVEX_REV : RI<0x60, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
-                              "movbe{q}\t{$src, $dst|$dst, $src}", []>,
-                           EVEX, NoCD8, T_MAP4;
-  }
+let SchedRW = [WriteALU], Predicates = [HasMOVBE, HasNDD, In64BitMode] in {
+def MOVBE16rr_EVEX : I<0x61, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+                        "movbe{w}\t{$src, $dst|$dst, $src}",
+                        [(set GR16:$dst, (bswap GR16:$src))]>,
+                      EVEX, NoCD8, T_MAP4, PD;
+def MOVBE32rr_EVEX : I<0x61, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+                        "movbe{l}\t{$src, $dst|$dst, $src}",
+                        [(set GR32:$dst, (bswap GR32:$src))]>,
+                      EVEX, NoCD8, T_MAP4;
+def MOVBE64rr_EVEX : RI<0x61, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+                        "movbe{q}\t{$src, $dst|$dst, $src}",
+                        [(set GR64:$dst, (bswap GR64:$src))]>,
+                      EVEX, NoCD8, T_MAP4;
+
+def MOVBE16rr_EVEX_REV : I<0x60, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                            "movbe{w}\t{$src, $dst|$dst, $src}", []>,
+                          EVEX, NoCD8, T_MAP4, PD;
+def MOVBE32rr_EVEX_REV : I<0x60, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                            "movbe{l}\t{$src, $dst|$dst, $src}", []>,
+                          EVEX, NoCD8, T_MAP4;
+def MOVBE64rr_EVEX_REV : RI<0x60, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                            "movbe{q}\t{$src, $dst|$dst, $src}", []>,
+                          EVEX, NoCD8, T_MAP4;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/X86/movbe.ll b/llvm/test/CodeGen/X86/movbe.ll
index 325bf34c8825baf..52edb19aca151e3 100644
--- a/llvm/test/CodeGen/X86/movbe.ll
+++ b/llvm/test/CodeGen/X86/movbe.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc -mtriple=x86_64-linux -mcpu=atom < %s | FileCheck %s
 ; RUN: llc -mtriple=x86_64-linux -mcpu=slm < %s | FileCheck %s -check-prefix=SLM
-; RUN: llc -mtriple=x86_64-linux -mcpu=slm -mattr=+egpr --show-mc-encoding < %s | FileCheck %s -check-prefix=EGPR
+; RUN: llc -mtriple=x86_64-linux -mcpu=slm -mattr=+egpr,+ndd --show-mc-encoding < %s | FileCheck %s -check-prefix=EGPR
 
 declare i16 @llvm.bswap.i16(i16) nounwind readnone
 declare i32 @llvm.bswap.i32(i32) nounwind readnone

>From 74f97e839810aa7a216d0211291a6ae549831074 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Tue, 16 Jan 2024 01:34:50 -0800
Subject: [PATCH 05/13] resolve comment

---
 llvm/lib/Target/X86/X86InstrMisc.td                |  6 +++---
 llvm/test/MC/Disassembler/X86/apx/movbe.txt        | 12 ------------
 .../MC/Disassembler/X86/apx/reverse-encoding.txt   | 14 ++++++++++++++
 3 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index 356259925c4b2db..39a242ff18be584 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1168,13 +1168,13 @@ def MOVBE64rr_EVEX : RI<0x61, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
 
 def MOVBE16rr_EVEX_REV : I<0x60, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                             "movbe{w}\t{$src, $dst|$dst, $src}", []>,
-                          EVEX, NoCD8, T_MAP4, PD;
+                          EVEX, NoCD8, T_MAP4, PD, DisassembleOnly;
 def MOVBE32rr_EVEX_REV : I<0x60, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                             "movbe{l}\t{$src, $dst|$dst, $src}", []>,
-                          EVEX, NoCD8, T_MAP4;
+                          EVEX, NoCD8, T_MAP4, DisassembleOnly;
 def MOVBE64rr_EVEX_REV : RI<0x60, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
                             "movbe{q}\t{$src, $dst|$dst, $src}", []>,
-                          EVEX, NoCD8, T_MAP4;
+                          EVEX, NoCD8, T_MAP4, DisassembleOnly;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/Disassembler/X86/apx/movbe.txt b/llvm/test/MC/Disassembler/X86/apx/movbe.txt
index 716bd169a022217..6570dcd0b55b1e6 100644
--- a/llvm/test/MC/Disassembler/X86/apx/movbe.txt
+++ b/llvm/test/MC/Disassembler/X86/apx/movbe.txt
@@ -72,15 +72,3 @@
 # ATT:   movbeq	291(%r28,%r29,4), %r19
 # INTEL: movbe	r19, qword ptr [r28 + 4*r29 + 291]
 0x62,0x8c,0xf8,0x08,0x60,0x9c,0xac,0x23,0x01,0x00,0x00
-
-# ATT:   movbew	 %r16w, %r17w
-# INTEL: movbe	r17w, r16w
-0x62,0xec,0x7d,0x08,0x60,0xc8
-
-# ATT:   movbel	 %r16d, %r17d
-# INTEL: movbe	r17d, r16d
-0x62,0xec,0x7c,0x08,0x60,0xc8
-
-# ATT:   movbeq	 %r16, %r17
-# INTEL: movbe	r17, r16
-0x62,0xec,0xfc,0x08,0x60,0xc8
diff --git a/llvm/test/MC/Disassembler/X86/apx/reverse-encoding.txt b/llvm/test/MC/Disassembler/X86/apx/reverse-encoding.txt
index e686e5aa28af010..94499299477de75 100644
--- a/llvm/test/MC/Disassembler/X86/apx/reverse-encoding.txt
+++ b/llvm/test/MC/Disassembler/X86/apx/reverse-encoding.txt
@@ -398,3 +398,17 @@
 # ATT:   sbbq	%r17, %r16, %r18
 # INTEL: sbb	r18, r16, r17
 0x62,0xec,0xec,0x10,0x1b,0xc1
+
+## movbe
+
+# ATT:   movbew	 %r16w, %r17w
+# INTEL: movbe	r17w, r16w
+0x62,0xec,0x7d,0x08,0x60,0xc8
+
+# ATT:   movbel	 %r16d, %r17d
+# INTEL: movbe	r17d, r16d
+0x62,0xec,0x7c,0x08,0x60,0xc8
+
+# ATT:   movbeq	 %r16, %r17
+# INTEL: movbe	r17, r16
+0x62,0xec,0xfc,0x08,0x60,0xc8

>From 3c6cb40428ae4cc3ff7aff687f755d75d1cf5125 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Fri, 19 Jan 2024 00:20:09 -0800
Subject: [PATCH 06/13] rewrite with ITy

---
 llvm/lib/Target/X86/X86InstrMisc.td | 108 ++++++++--------------------
 1 file changed, 30 insertions(+), 78 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index 39a242ff18be584..998b377b8f74369 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1090,91 +1090,43 @@ def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
 //===----------------------------------------------------------------------===//
 // MOVBE Instructions
 //
+multiclass Movbe<bits<8> o, X86TypeInfo t, string suffix = ""> {
+  let SchedRW = [WriteALULd] in
+  def rm#suffix : ITy<o, MRMSrcMem, t, (outs t.RegClass:$dst),
+                      (ins t.MemOperand:$src1), "movbe", unaryop_ndd_args,
+                      [(set t.RegClass:$dst, (bswap (t.LoadNode addr:$src1)))]>;
+  let SchedRW = [WriteStore] in
+  def mr#suffix : ITy<!add(o, 1), MRMDestMem, t, (outs),
+                      (ins t.MemOperand:$dst, t.RegClass:$src1),
+                      "movbe", unaryop_ndd_args,
+                      [(store (bswap t.RegClass:$src1), addr:$dst)]>;
+}
+
 let Predicates = [HasMOVBE, NoEGPR] in {
-  let SchedRW = [WriteALULd] in {
-  def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
-                    "movbe{w}\t{$src, $dst|$dst, $src}",
-                    [(set GR16:$dst, (bswap (loadi16 addr:$src)))]>,
-                    OpSize16, T8;
-  def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
-                    "movbe{l}\t{$src, $dst|$dst, $src}",
-                    [(set GR32:$dst, (bswap (loadi32 addr:$src)))]>,
-                    OpSize32, T8;
-  def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                     "movbe{q}\t{$src, $dst|$dst, $src}",
-                     [(set GR64:$dst, (bswap (loadi64 addr:$src)))]>,
-                     T8;
-  }
-  let SchedRW = [WriteStore] in {
-  def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
-                    "movbe{w}\t{$src, $dst|$dst, $src}",
-                    [(store (bswap GR16:$src), addr:$dst)]>,
-                    OpSize16, T8;
-  def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
-                    "movbe{l}\t{$src, $dst|$dst, $src}",
-                    [(store (bswap GR32:$src), addr:$dst)]>,
-                    OpSize32, T8;
-  def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                     "movbe{q}\t{$src, $dst|$dst, $src}",
-                     [(store (bswap GR64:$src), addr:$dst)]>,
-                     T8;
-  }
+  defm MOVBE16 : Movbe<0xF0, Xi16>, OpSize16, T8;
+  defm MOVBE32 : Movbe<0xF0, Xi32>, OpSize32, T8;
+  defm MOVBE64 : Movbe<0xF0, Xi64>, T8;
 }
 
 let Predicates = [HasMOVBE, HasEGPR, In64BitMode] in {
-  let SchedRW = [WriteALULd] in {
-  def MOVBE16rm_EVEX : I<0x60, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
-                         "movbe{w}\t{$src, $dst|$dst, $src}",
-                         [(set GR16:$dst, (bswap (loadi16 addr:$src)))]>,
-                       EVEX, NoCD8, T_MAP4, PD;
-  def MOVBE32rm_EVEX : I<0x60, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
-                         "movbe{l}\t{$src, $dst|$dst, $src}",
-                         [(set GR32:$dst, (bswap (loadi32 addr:$src)))]>,
-                       EVEX, NoCD8, T_MAP4;
-  def MOVBE64rm_EVEX : RI<0x60, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                         "movbe{q}\t{$src, $dst|$dst, $src}",
-                         [(set GR64:$dst, (bswap (loadi64 addr:$src)))]>,
-                       EVEX, NoCD8, T_MAP4;
-  }
-  let SchedRW = [WriteStore] in {
-  def MOVBE16mr_EVEX : I<0x61, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
-                         "movbe{w}\t{$src, $dst|$dst, $src}",
-                         [(store (bswap GR16:$src), addr:$dst)]>,
-                       EVEX, NoCD8, T_MAP4, PD;
-  def MOVBE32mr_EVEX : I<0x61, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
-                         "movbe{l}\t{$src, $dst|$dst, $src}",
-                         [(store (bswap GR32:$src), addr:$dst)]>,
-                       EVEX, NoCD8, T_MAP4;
-  def MOVBE64mr_EVEX : RI<0x61, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                         "movbe{q}\t{$src, $dst|$dst, $src}",
-                         [(store (bswap GR64:$src), addr:$dst)]>,
-                       EVEX, NoCD8, T_MAP4;
-  }
+  defm MOVBE16 : Movbe<0x60, Xi16, "_EVEX">, EVEX, T_MAP4, PD;
+  defm MOVBE32 : Movbe<0x60, Xi32, "_EVEX">, EVEX, T_MAP4;
+  defm MOVBE64 : Movbe<0x60, Xi64, "_EVEX">, EVEX, T_MAP4;
 }
 
+multiclass Movberr<X86TypeInfo t> {
+  def rr_EVEX : ITy<0x61, MRMDestReg, t, (outs t.RegClass:$dst),
+                    (ins t.RegClass:$src1), "movbe", unaryop_ndd_args,
+                    [(set t.RegClass:$dst, (bswap t.RegClass:$src1))]>,
+                EVEX, T_MAP4;
+  def rr_EVEX_REV : ITy<0x60, MRMSrcReg, t, (outs t.RegClass:$dst),
+                        (ins t.RegClass:$src1), "movbe", unaryop_ndd_args, []>,
+                    EVEX, T_MAP4, DisassembleOnly;
+}
 let SchedRW = [WriteALU], Predicates = [HasMOVBE, HasNDD, In64BitMode] in {
-def MOVBE16rr_EVEX : I<0x61, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
-                        "movbe{w}\t{$src, $dst|$dst, $src}",
-                        [(set GR16:$dst, (bswap GR16:$src))]>,
-                      EVEX, NoCD8, T_MAP4, PD;
-def MOVBE32rr_EVEX : I<0x61, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
-                        "movbe{l}\t{$src, $dst|$dst, $src}",
-                        [(set GR32:$dst, (bswap GR32:$src))]>,
-                      EVEX, NoCD8, T_MAP4;
-def MOVBE64rr_EVEX : RI<0x61, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
-                        "movbe{q}\t{$src, $dst|$dst, $src}",
-                        [(set GR64:$dst, (bswap GR64:$src))]>,
-                      EVEX, NoCD8, T_MAP4;
-
-def MOVBE16rr_EVEX_REV : I<0x60, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
-                            "movbe{w}\t{$src, $dst|$dst, $src}", []>,
-                          EVEX, NoCD8, T_MAP4, PD, DisassembleOnly;
-def MOVBE32rr_EVEX_REV : I<0x60, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
-                            "movbe{l}\t{$src, $dst|$dst, $src}", []>,
-                          EVEX, NoCD8, T_MAP4, DisassembleOnly;
-def MOVBE64rr_EVEX_REV : RI<0x60, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
-                            "movbe{q}\t{$src, $dst|$dst, $src}", []>,
-                          EVEX, NoCD8, T_MAP4, DisassembleOnly;
+  defm MOVBE16 : Movberr<Xi16>, PD;
+  defm MOVBE32 : Movberr<Xi32>;
+  defm MOVBE64 : Movberr<Xi64>;
 }
 
 //===----------------------------------------------------------------------===//

>From 306a3c67d99b9c8c201ea861e5affc4f98205f23 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Fri, 19 Jan 2024 01:32:50 -0800
Subject: [PATCH 07/13] update test

---
 llvm/test/MC/X86/apx/movbe-att.s | 74 ++++++++++++++++----------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/llvm/test/MC/X86/apx/movbe-att.s b/llvm/test/MC/X86/apx/movbe-att.s
index c9019efa6e5f131..77128bed9f1aded 100644
--- a/llvm/test/MC/X86/apx/movbe-att.s
+++ b/llvm/test/MC/X86/apx/movbe-att.s
@@ -1,76 +1,76 @@
-# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+# RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
 # RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR
 
 # ERROR-COUNT-18: error:
 # ERROR-NOT: error:
-# CHECK: {evex}	movbe	ax, dx
+# CHECK: {evex}	movbew	%dx, %ax
 # CHECK: encoding: [0x62,0xf4,0x7d,0x08,0x61,0xd0]
-         {evex}	movbe	ax, dx
+         {evex}	movbew	%dx, %ax
 
-# CHECK: {evex}	movbe	word ptr [rax + 4*rbx + 123], dx
+# CHECK: {evex}	movbew	%dx, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0xf4,0x7d,0x08,0x61,0x54,0x98,0x7b]
-         {evex}	movbe	word ptr [rax + 4*rbx + 123], dx
+         {evex}	movbew	%dx, 123(%rax,%rbx,4)
 
-# CHECK: {evex}	movbe	edx, ecx
+# CHECK: {evex}	movbel	%ecx, %edx
 # CHECK: encoding: [0x62,0xf4,0x7c,0x08,0x61,0xca]
-         {evex}	movbe	edx, ecx
+         {evex}	movbel	%ecx, %edx
 
-# CHECK: {evex}	movbe	dword ptr [rax + 4*rbx + 123], ecx
+# CHECK: {evex}	movbel	%ecx, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0xf4,0x7c,0x08,0x61,0x4c,0x98,0x7b]
-         {evex}	movbe	dword ptr [rax + 4*rbx + 123], ecx
+         {evex}	movbel	%ecx, 123(%rax,%rbx,4)
 
-# CHECK: {evex}	movbe	r15, r9
+# CHECK: {evex}	movbeq	%r9, %r15
 # CHECK: encoding: [0x62,0x54,0xfc,0x08,0x61,0xcf]
-         {evex}	movbe	r15, r9
+         {evex}	movbeq	%r9, %r15
 
-# CHECK: {evex}	movbe	qword ptr [rax + 4*rbx + 123], r9
+# CHECK: {evex}	movbeq	%r9, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0x74,0xfc,0x08,0x61,0x4c,0x98,0x7b]
-         {evex}	movbe	qword ptr [rax + 4*rbx + 123], r9
+         {evex}	movbeq	%r9, 123(%rax,%rbx,4)
 
-# CHECK: {evex}	movbe	dx, word ptr [rax + 4*rbx + 123]
+# CHECK: {evex}	movbew	123(%rax,%rbx,4), %dx
 # CHECK: encoding: [0x62,0xf4,0x7d,0x08,0x60,0x54,0x98,0x7b]
-         {evex}	movbe	dx, word ptr [rax + 4*rbx + 123]
+         {evex}	movbew	123(%rax,%rbx,4), %dx
 
-# CHECK: {evex}	movbe	ecx, dword ptr [rax + 4*rbx + 123]
+# CHECK: {evex}	movbel	123(%rax,%rbx,4), %ecx
 # CHECK: encoding: [0x62,0xf4,0x7c,0x08,0x60,0x4c,0x98,0x7b]
-         {evex}	movbe	ecx, dword ptr [rax + 4*rbx + 123]
+         {evex}	movbel	123(%rax,%rbx,4), %ecx
 
-# CHECK: {evex}	movbe	r9, qword ptr [rax + 4*rbx + 123]
+# CHECK: {evex}	movbeq	123(%rax,%rbx,4), %r9
 # CHECK: encoding: [0x62,0x74,0xfc,0x08,0x60,0x4c,0x98,0x7b]
-         {evex}	movbe	r9, qword ptr [rax + 4*rbx + 123]
+         {evex}	movbeq	123(%rax,%rbx,4), %r9
 
-# CHECK: movbe	r21w, r17w
+# CHECK: movbew	%r17w, %r21w
 # CHECK: encoding: [0x62,0xec,0x7d,0x08,0x61,0xcd]
-         movbe	r21w, r17w
+         movbew	%r17w, %r21w
 
-# CHECK: movbe	word ptr [r28 + 4*r29 + 291], r17w
+# CHECK: movbew	%r17w, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8c,0x79,0x08,0x61,0x8c,0xac,0x23,0x01,0x00,0x00]
-         movbe	word ptr [r28 + 4*r29 + 291], r17w
+         movbew	%r17w, 291(%r28,%r29,4)
 
-# CHECK: movbe	r22d, r18d
+# CHECK: movbel	%r18d, %r22d
 # CHECK: encoding: [0x62,0xec,0x7c,0x08,0x61,0xd6]
-         movbe	r22d, r18d
+         movbel	%r18d, %r22d
 
-# CHECK: movbe	dword ptr [r28 + 4*r29 + 291], r18d
+# CHECK: movbel	%r18d, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8c,0x78,0x08,0x61,0x94,0xac,0x23,0x01,0x00,0x00]
-         movbe	dword ptr [r28 + 4*r29 + 291], r18d
+         movbel	%r18d, 291(%r28,%r29,4)
 
-# CHECK: movbe	r23, r19
+# CHECK: movbeq	%r19, %r23
 # CHECK: encoding: [0x62,0xec,0xfc,0x08,0x61,0xdf]
-         movbe	r23, r19
+         movbeq	%r19, %r23
 
-# CHECK: movbe	qword ptr [r28 + 4*r29 + 291], r19
+# CHECK: movbeq	%r19, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8c,0xf8,0x08,0x61,0x9c,0xac,0x23,0x01,0x00,0x00]
-         movbe	qword ptr [r28 + 4*r29 + 291], r19
+         movbeq	%r19, 291(%r28,%r29,4)
 
-# CHECK: movbe	r17w, word ptr [r28 + 4*r29 + 291]
+# CHECK: movbew	291(%r28,%r29,4), %r17w
 # CHECK: encoding: [0x62,0x8c,0x79,0x08,0x60,0x8c,0xac,0x23,0x01,0x00,0x00]
-         movbe	r17w, word ptr [r28 + 4*r29 + 291]
+         movbew	291(%r28,%r29,4), %r17w
 
-# CHECK: movbe	r18d, dword ptr [r28 + 4*r29 + 291]
+# CHECK: movbel	291(%r28,%r29,4), %r18d
 # CHECK: encoding: [0x62,0x8c,0x78,0x08,0x60,0x94,0xac,0x23,0x01,0x00,0x00]
-         movbe	r18d, dword ptr [r28 + 4*r29 + 291]
+         movbel	291(%r28,%r29,4), %r18d
 
-# CHECK: movbe	r19, qword ptr [r28 + 4*r29 + 291]
+# CHECK: movbeq	291(%r28,%r29,4), %r19
 # CHECK: encoding: [0x62,0x8c,0xf8,0x08,0x60,0x9c,0xac,0x23,0x01,0x00,0x00]
-         movbe	r19, qword ptr [r28 + 4*r29 + 291]
+         movbeq	291(%r28,%r29,4), %r19

>From a978e80bd6329b2d5cc555340bc46480a25e8eac Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Mon, 22 Jan 2024 02:49:55 -0800
Subject: [PATCH 08/13] resolve comments

---
 llvm/lib/Target/X86/X86InstrMisc.td    | 14 +++++++-------
 llvm/lib/Target/X86/X86InstrRAOINT.td  | 26 +++++++++++++-------------
 llvm/test/TableGen/x86-fold-tables.inc | 12 ++++++------
 3 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index 998b377b8f74369..5b6ecf475d59c5f 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1115,13 +1115,13 @@ let Predicates = [HasMOVBE, HasEGPR, In64BitMode] in {
 }
 
 multiclass Movberr<X86TypeInfo t> {
-  def rr_EVEX : ITy<0x61, MRMDestReg, t, (outs t.RegClass:$dst),
-                    (ins t.RegClass:$src1), "movbe", unaryop_ndd_args,
-                    [(set t.RegClass:$dst, (bswap t.RegClass:$src1))]>,
-                EVEX, T_MAP4;
-  def rr_EVEX_REV : ITy<0x60, MRMSrcReg, t, (outs t.RegClass:$dst),
-                        (ins t.RegClass:$src1), "movbe", unaryop_ndd_args, []>,
-                    EVEX, T_MAP4, DisassembleOnly;
+  def rr : ITy<0x61, MRMDestReg, t, (outs t.RegClass:$dst),
+               (ins t.RegClass:$src1), "movbe", unaryop_ndd_args,
+               [(set t.RegClass:$dst, (bswap t.RegClass:$src1))]>,
+           EVEX, T_MAP4;
+  def rr_REV : ITy<0x60, MRMSrcReg, t, (outs t.RegClass:$dst),
+                   (ins t.RegClass:$src1), "movbe", unaryop_ndd_args, []>,
+               EVEX, T_MAP4, DisassembleOnly;
 }
 let SchedRW = [WriteALU], Predicates = [HasMOVBE, HasNDD, In64BitMode] in {
   defm MOVBE16 : Movberr<Xi16>, PD;
diff --git a/llvm/lib/Target/X86/X86InstrRAOINT.td b/llvm/lib/Target/X86/X86InstrRAOINT.td
index c2f202bd616a51d..20fe59a4152a1e1 100644
--- a/llvm/lib/Target/X86/X86InstrRAOINT.td
+++ b/llvm/lib/Target/X86/X86InstrRAOINT.td
@@ -25,18 +25,18 @@ def X86rao_xor  : SDNode<"X86ISD::AXOR", SDTRAOBinaryArith,
 def X86rao_and  : SDNode<"X86ISD::AAND", SDTRAOBinaryArith,
                          [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 
-multiclass RAOINT_BASE<string OpcodeStr, string Suffix = ""> {
+multiclass RAOINT_BASE<string m, string suffix = ""> {
   let Predicates = [HasRAOINT] in
-    def 32mr#Suffix : I<0xfc, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
-                        !strconcat("a", OpcodeStr, "{l}\t{$src, $dst|$dst, $src}"),
-                        [(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR32:$src)]>,
-                      Sched<[WriteALURMW]>;
+    def 32mr#suffix : I<0xfc, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+                        !strconcat("a", m, "{l}\t", binop_args),
+                        [(!cast<SDNode>("X86rao_" # m) addr:$src1, GR32:$src2)]>,
+                      NoCD8, Sched<[WriteALURMW]>;
 
   let Predicates = [HasRAOINT, In64BitMode] in
-    def 64mr#Suffix : I<0xfc, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                        !strconcat("a", OpcodeStr, "{q}\t{$src, $dst|$dst, $src}"),
-                        [(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR64:$src)]>,
-                      Sched<[WriteALURMW]>, REX_W;
+    def 64mr#suffix : I<0xfc, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+                        !strconcat("a", m, "{q}\t", binop_args),
+                        [(!cast<SDNode>("X86rao_" # m) addr:$src1, GR64:$src2)]>,
+                      NoCD8, Sched<[WriteALURMW]>, REX_W;
 }
 
 let Predicates = [HasRAOINT, NoEGPR] in {
@@ -47,8 +47,8 @@ defm AXOR : RAOINT_BASE<"xor">, T8, XS;
 }
 
 let Predicates = [HasRAOINT, HasEGPR, In64BitMode] in {
-defm AADD : RAOINT_BASE<"add", "_EVEX">, EVEX, NoCD8, T_MAP4;
-defm AAND : RAOINT_BASE<"and", "_EVEX">, EVEX, NoCD8, T_MAP4, PD;
-defm AOR  : RAOINT_BASE<"or", "_EVEX">, EVEX, NoCD8, T_MAP4, XD;
-defm AXOR : RAOINT_BASE<"xor", "_EVEX">, EVEX, NoCD8, T_MAP4, XS;
+defm AADD : RAOINT_BASE<"add", "_EVEX">, EVEX, T_MAP4;
+defm AAND : RAOINT_BASE<"and", "_EVEX">, EVEX, T_MAP4, PD;
+defm AOR  : RAOINT_BASE<"or", "_EVEX">,  EVEX, T_MAP4, XD;
+defm AXOR : RAOINT_BASE<"xor", "_EVEX">, EVEX, T_MAP4, XS;
 }
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index f46b08c168d60e0..029beeff0564438 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -421,9 +421,9 @@ static const X86FoldTableEntry Table0[] = {
   {X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE|TB_NO_REVERSE},
   {X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_16},
   {X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_16},
-  {X86::MOVBE16rr_EVEX, X86::MOVBE16mr_EVEX, TB_FOLDED_STORE},
-  {X86::MOVBE32rr_EVEX, X86::MOVBE32mr_EVEX, TB_FOLDED_STORE},
-  {X86::MOVBE64rr_EVEX, X86::MOVBE64mr_EVEX, TB_FOLDED_STORE},
+  {X86::MOVBE16rr, X86::MOVBE16mr_EVEX, TB_FOLDED_STORE},
+  {X86::MOVBE32rr, X86::MOVBE32mr_EVEX, TB_FOLDED_STORE},
+  {X86::MOVBE64rr, X86::MOVBE64mr_EVEX, TB_FOLDED_STORE},
   {X86::MOVDI2SSrr, X86::MOV32mr, TB_FOLDED_STORE|TB_NO_REVERSE},
   {X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE|TB_ALIGN_16},
   {X86::MOVDQUrr, X86::MOVDQUmr, TB_FOLDED_STORE},
@@ -775,9 +775,9 @@ static const X86FoldTableEntry Table1[] = {
   {X86::MOV8rr, X86::MOV8rm, 0},
   {X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16},
   {X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16},
-  {X86::MOVBE16rr_EVEX, X86::MOVBE16rm_EVEX, 0},
-  {X86::MOVBE32rr_EVEX, X86::MOVBE32rm_EVEX, 0},
-  {X86::MOVBE64rr_EVEX, X86::MOVBE64rm_EVEX, 0},
+  {X86::MOVBE16rr, X86::MOVBE16rm_EVEX, 0},
+  {X86::MOVBE32rr, X86::MOVBE32rm_EVEX, 0},
+  {X86::MOVBE64rr, X86::MOVBE64rm_EVEX, 0},
   {X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE},
   {X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0},
   {X86::MOVDI2SSrr, X86::MOVSSrm_alt, 0},

>From 2f4b6074dccbbf81ac15dcd162b882946a2b2e58 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Tue, 23 Jan 2024 19:11:36 -0800
Subject: [PATCH 09/13] rewrite raoint multi class

---
 llvm/lib/Target/X86/X86InstrRAOINT.td | 34 ++++++++++++---------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrRAOINT.td b/llvm/lib/Target/X86/X86InstrRAOINT.td
index 20fe59a4152a1e1..6558de82e6af653 100644
--- a/llvm/lib/Target/X86/X86InstrRAOINT.td
+++ b/llvm/lib/Target/X86/X86InstrRAOINT.td
@@ -25,30 +25,26 @@ def X86rao_xor  : SDNode<"X86ISD::AXOR", SDTRAOBinaryArith,
 def X86rao_and  : SDNode<"X86ISD::AAND", SDTRAOBinaryArith,
                          [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 
-multiclass RAOINT_BASE<string m, string suffix = ""> {
-  let Predicates = [HasRAOINT] in
-    def 32mr#suffix : I<0xfc, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
-                        !strconcat("a", m, "{l}\t", binop_args),
-                        [(!cast<SDNode>("X86rao_" # m) addr:$src1, GR32:$src2)]>,
-                      NoCD8, Sched<[WriteALURMW]>;
+multiclass RaoInt<string m, string suffix = ""> {
+  let SchedRW = [WriteALURMW, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault] in {
+    def 32mr#suffix : BinOpMR<0xfc, "a"#m, binop_args, Xi32, (outs),
+                        [(!cast<SDNode>("X86rao_" # m) addr:$src1, GR32:$src2)]>;
 
-  let Predicates = [HasRAOINT, In64BitMode] in
-    def 64mr#suffix : I<0xfc, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
-                        !strconcat("a", m, "{q}\t", binop_args),
-                        [(!cast<SDNode>("X86rao_" # m) addr:$src1, GR64:$src2)]>,
-                      NoCD8, Sched<[WriteALURMW]>, REX_W;
+    def 64mr#suffix : BinOpMR<0xfc, "a"#m, binop_args, Xi64, (outs),
+                        [(!cast<SDNode>("X86rao_" # m) addr:$src1, GR64:$src2)]>, REX_W;
+  }
 }
 
 let Predicates = [HasRAOINT, NoEGPR] in {
-defm AADD : RAOINT_BASE<"add">, T8;
-defm AAND : RAOINT_BASE<"and">, T8, PD;
-defm AOR  : RAOINT_BASE<"or" >, T8, XD;
-defm AXOR : RAOINT_BASE<"xor">, T8, XS;
+defm AADD : RaoInt<"add">, T8;
+defm AAND : RaoInt<"and">, T8, PD;
+defm AOR  : RaoInt<"or" >, T8, XD;
+defm AXOR : RaoInt<"xor">, T8, XS;
 }
 
 let Predicates = [HasRAOINT, HasEGPR, In64BitMode] in {
-defm AADD : RAOINT_BASE<"add", "_EVEX">, EVEX, T_MAP4;
-defm AAND : RAOINT_BASE<"and", "_EVEX">, EVEX, T_MAP4, PD;
-defm AOR  : RAOINT_BASE<"or", "_EVEX">,  EVEX, T_MAP4, XD;
-defm AXOR : RAOINT_BASE<"xor", "_EVEX">, EVEX, T_MAP4, XS;
+defm AADD : RaoInt<"add", "_EVEX">, EVEX, T_MAP4;
+defm AAND : RaoInt<"and", "_EVEX">, EVEX, T_MAP4, PD;
+defm AOR  : RaoInt<"or",  "_EVEX">, EVEX, T_MAP4, XD;
+defm AXOR : RaoInt<"xor", "_EVEX">, EVEX, T_MAP4, XS;
 }

>From 9d0b8cb35ba80138ff8363437ed57fc21e4796f6 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Wed, 24 Jan 2024 00:26:26 -0800
Subject: [PATCH 10/13] support movberr to bswap

---
 llvm/lib/Target/X86/X86InstrMisc.td                 |  2 +-
 llvm/lib/Target/X86/X86InstrPredicates.td           |  1 +
 llvm/test/CodeGen/X86/apx/compress-evex.mir         | 10 ++++++++++
 llvm/test/CodeGen/X86/movbe.ll                      |  2 +-
 llvm/utils/TableGen/X86ManualCompressEVEXTables.def |  2 ++
 5 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index 5b6ecf475d59c5f..c736e11e096178b 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -229,7 +229,7 @@ def PUSHA16  : I<0x60, RawFrm, (outs), (ins), "pushaw", []>,
                OpSize16, Requires<[Not64BitMode]>;
 }
 
-let Constraints = "$src = $dst", SchedRW = [WriteBSWAP32], Predicates = [NoNDD] in {
+let Constraints = "$src = $dst", SchedRW = [WriteBSWAP32], Predicates = [NoNDD_Or_NoMOVBE] in {
 // This instruction is a consequence of BSWAP32r observing operand size. The
 // encoding is valid, but the behavior is undefined.
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td
index cb751639a057532..7dd51ba6c027aec 100644
--- a/llvm/lib/Target/X86/X86InstrPredicates.td
+++ b/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -122,6 +122,7 @@ def HasTBM       : Predicate<"Subtarget->hasTBM()">;
 def NoTBM        : Predicate<"!Subtarget->hasTBM()">;
 def HasLWP       : Predicate<"Subtarget->hasLWP()">;
 def HasMOVBE     : Predicate<"Subtarget->hasMOVBE()">;
+def NoNDD_Or_NoMOVBE     : Predicate<"!Subtarget->hasNDD() || !Subtarget->hasMOVBE()">;
 def HasRDRAND    : Predicate<"Subtarget->hasRDRAND()">;
 def HasF16C      : Predicate<"Subtarget->hasF16C()">;
 def HasFSGSBase  : Predicate<"Subtarget->hasFSGSBase()">;
diff --git a/llvm/test/CodeGen/X86/apx/compress-evex.mir b/llvm/test/CodeGen/X86/apx/compress-evex.mir
index 5a3d7ceb10c432e..541a4e44244da27 100644
--- a/llvm/test/CodeGen/X86/apx/compress-evex.mir
+++ b/llvm/test/CodeGen/X86/apx/compress-evex.mir
@@ -71,3 +71,13 @@ body:             |
     renamable $rax = XOR64rr_NF_ND killed renamable $rax, killed renamable $r16
     RET64 $rax
 ...
+---
+name:            bswapr_to_movberr
+body:             |
+  bb.0.entry:
+    liveins: $rdi
+    ; CHECK: bswapq  %rax                            # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0xcf]
+    renamable $rax = MOVBE64rr killed renamable $rdi
+    RET64 killed $rax
+
+...
diff --git a/llvm/test/CodeGen/X86/movbe.ll b/llvm/test/CodeGen/X86/movbe.ll
index 52edb19aca151e3..26565e6f2f149fc 100644
--- a/llvm/test/CodeGen/X86/movbe.ll
+++ b/llvm/test/CodeGen/X86/movbe.ll
@@ -182,7 +182,7 @@ define i64 @test7(i64 %x) nounwind {
 ;
 ; EGPR-LABEL: test7:
 ; EGPR:       # %bb.0:
-; EGPR-NEXT:    movbeq %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0x61,0xf8]
+; EGPR-NEXT:    bswapq %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0xcf]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
   %bswap = call i64 @llvm.bswap.i64(i64 %x)
   ret i64 %bswap
diff --git a/llvm/utils/TableGen/X86ManualCompressEVEXTables.def b/llvm/utils/TableGen/X86ManualCompressEVEXTables.def
index 58ca10e9e10f8df..77cf65be6842566 100644
--- a/llvm/utils/TableGen/X86ManualCompressEVEXTables.def
+++ b/llvm/utils/TableGen/X86ManualCompressEVEXTables.def
@@ -328,4 +328,6 @@ ENTRY(VBROADCASTSDZ256rm, VBROADCASTSDYrm)
 ENTRY(VBROADCASTSDZ256rr, VBROADCASTSDYrr)
 ENTRY(VPBROADCASTQZ256rm, VPBROADCASTQYrm)
 ENTRY(VPBROADCASTQZ256rr, VPBROADCASTQYrr)
+ENTRY(MOVBE32rr, BSWAP32r)
+ENTRY(MOVBE64rr, BSWAP64r)
 #undef ENTRY

>From a881b3c143ed6cdaeb7ead7d2dc97330cc8edce4 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Wed, 24 Jan 2024 23:04:14 -0800
Subject: [PATCH 11/13] resolve comments

---
 llvm/lib/Target/X86/X86InstrMisc.td           |  8 ++++----
 llvm/lib/Target/X86/X86InstrRAOINT.td         | 20 +++++++++----------
 llvm/test/CodeGen/X86/apx/compress-evex.mir   | 10 ----------
 llvm/test/CodeGen/X86/movbe.ll                |  2 +-
 .../TableGen/X86ManualCompressEVEXTables.def  |  2 --
 5 files changed, 15 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index c736e11e096178b..408ccfad8029e2a 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1091,15 +1091,15 @@ def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
 // MOVBE Instructions
 //
 multiclass Movbe<bits<8> o, X86TypeInfo t, string suffix = ""> {
-  let SchedRW = [WriteALULd] in
   def rm#suffix : ITy<o, MRMSrcMem, t, (outs t.RegClass:$dst),
                       (ins t.MemOperand:$src1), "movbe", unaryop_ndd_args,
-                      [(set t.RegClass:$dst, (bswap (t.LoadNode addr:$src1)))]>;
-  let SchedRW = [WriteStore] in
+                      [(set t.RegClass:$dst, (bswap (t.LoadNode addr:$src1)))]>,
+                  Sched<[WriteALULd]>;
   def mr#suffix : ITy<!add(o, 1), MRMDestMem, t, (outs),
                       (ins t.MemOperand:$dst, t.RegClass:$src1),
                       "movbe", unaryop_ndd_args,
-                      [(store (bswap t.RegClass:$src1), addr:$dst)]>;
+                      [(store (bswap t.RegClass:$src1), addr:$dst)]>,
+                  Sched<[WriteStore]>;
 }
 
 let Predicates = [HasMOVBE, NoEGPR] in {
diff --git a/llvm/lib/Target/X86/X86InstrRAOINT.td b/llvm/lib/Target/X86/X86InstrRAOINT.td
index 6558de82e6af653..676d90a87bb285c 100644
--- a/llvm/lib/Target/X86/X86InstrRAOINT.td
+++ b/llvm/lib/Target/X86/X86InstrRAOINT.td
@@ -27,24 +27,24 @@ def X86rao_and  : SDNode<"X86ISD::AAND", SDTRAOBinaryArith,
 
 multiclass RaoInt<string m, string suffix = ""> {
   let SchedRW = [WriteALURMW, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault] in {
-    def 32mr#suffix : BinOpMR<0xfc, "a"#m, binop_args, Xi32, (outs),
+    def 32mr#suffix : BinOpMR<0xfc, "a" # m, binop_args, Xi32, (outs),
                         [(!cast<SDNode>("X86rao_" # m) addr:$src1, GR32:$src2)]>;
 
-    def 64mr#suffix : BinOpMR<0xfc, "a"#m, binop_args, Xi64, (outs),
+    def 64mr#suffix : BinOpMR<0xfc, "a" # m, binop_args, Xi64, (outs),
                         [(!cast<SDNode>("X86rao_" # m) addr:$src1, GR64:$src2)]>, REX_W;
   }
 }
 
 let Predicates = [HasRAOINT, NoEGPR] in {
-defm AADD : RaoInt<"add">, T8;
-defm AAND : RaoInt<"and">, T8, PD;
-defm AOR  : RaoInt<"or" >, T8, XD;
-defm AXOR : RaoInt<"xor">, T8, XS;
+  defm AADD : RaoInt<"add">, T8;
+  defm AAND : RaoInt<"and">, T8, PD;
+  defm AOR  : RaoInt<"or" >, T8, XD;
+  defm AXOR : RaoInt<"xor">, T8, XS;
 }
 
 let Predicates = [HasRAOINT, HasEGPR, In64BitMode] in {
-defm AADD : RaoInt<"add", "_EVEX">, EVEX, T_MAP4;
-defm AAND : RaoInt<"and", "_EVEX">, EVEX, T_MAP4, PD;
-defm AOR  : RaoInt<"or",  "_EVEX">, EVEX, T_MAP4, XD;
-defm AXOR : RaoInt<"xor", "_EVEX">, EVEX, T_MAP4, XS;
+  defm AADD : RaoInt<"add", "_EVEX">, EVEX, T_MAP4;
+  defm AAND : RaoInt<"and", "_EVEX">, EVEX, T_MAP4, PD;
+  defm AOR  : RaoInt<"or",  "_EVEX">, EVEX, T_MAP4, XD;
+  defm AXOR : RaoInt<"xor", "_EVEX">, EVEX, T_MAP4, XS;
 }
diff --git a/llvm/test/CodeGen/X86/apx/compress-evex.mir b/llvm/test/CodeGen/X86/apx/compress-evex.mir
index 541a4e44244da27..5a3d7ceb10c432e 100644
--- a/llvm/test/CodeGen/X86/apx/compress-evex.mir
+++ b/llvm/test/CodeGen/X86/apx/compress-evex.mir
@@ -71,13 +71,3 @@ body:             |
     renamable $rax = XOR64rr_NF_ND killed renamable $rax, killed renamable $r16
     RET64 $rax
 ...
----
-name:            bswapr_to_movberr
-body:             |
-  bb.0.entry:
-    liveins: $rdi
-    ; CHECK: bswapq  %rax                            # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0xcf]
-    renamable $rax = MOVBE64rr killed renamable $rdi
-    RET64 killed $rax
-
-...
diff --git a/llvm/test/CodeGen/X86/movbe.ll b/llvm/test/CodeGen/X86/movbe.ll
index 26565e6f2f149fc..52edb19aca151e3 100644
--- a/llvm/test/CodeGen/X86/movbe.ll
+++ b/llvm/test/CodeGen/X86/movbe.ll
@@ -182,7 +182,7 @@ define i64 @test7(i64 %x) nounwind {
 ;
 ; EGPR-LABEL: test7:
 ; EGPR:       # %bb.0:
-; EGPR-NEXT:    bswapq %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0xcf]
+; EGPR-NEXT:    movbeq %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0x61,0xf8]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
   %bswap = call i64 @llvm.bswap.i64(i64 %x)
   ret i64 %bswap
diff --git a/llvm/utils/TableGen/X86ManualCompressEVEXTables.def b/llvm/utils/TableGen/X86ManualCompressEVEXTables.def
index 77cf65be6842566..58ca10e9e10f8df 100644
--- a/llvm/utils/TableGen/X86ManualCompressEVEXTables.def
+++ b/llvm/utils/TableGen/X86ManualCompressEVEXTables.def
@@ -328,6 +328,4 @@ ENTRY(VBROADCASTSDZ256rm, VBROADCASTSDYrm)
 ENTRY(VBROADCASTSDZ256rr, VBROADCASTSDYrr)
 ENTRY(VPBROADCASTQZ256rm, VPBROADCASTQYrm)
 ENTRY(VPBROADCASTQZ256rr, VPBROADCASTQYrr)
-ENTRY(MOVBE32rr, BSWAP32r)
-ENTRY(MOVBE64rr, BSWAP64r)
 #undef ENTRY

>From 95148bcc5d90827a67c3c190b830251a9d479021 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Thu, 25 Jan 2024 02:15:18 -0800
Subject: [PATCH 12/13] add test for ndd,-movbe

---
 llvm/test/CodeGen/X86/movbe.ll | 45 ++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/X86/movbe.ll b/llvm/test/CodeGen/X86/movbe.ll
index 52edb19aca151e3..2ad50e256504c64 100644
--- a/llvm/test/CodeGen/X86/movbe.ll
+++ b/llvm/test/CodeGen/X86/movbe.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc -mtriple=x86_64-linux -mcpu=atom < %s | FileCheck %s
 ; RUN: llc -mtriple=x86_64-linux -mcpu=slm < %s | FileCheck %s -check-prefix=SLM
-; RUN: llc -mtriple=x86_64-linux -mcpu=slm -mattr=+egpr,+ndd --show-mc-encoding < %s | FileCheck %s -check-prefix=EGPR
-
+; RUN: llc -mtriple=x86_64-linux -mattr=+egpr,+ndd,+movbe --show-mc-encoding < %s | FileCheck %s -check-prefix=EGPR
+; RUN: llc -mtriple=x86_64-linux -mattr=+egpr,+ndd --show-mc-encoding < %s | FileCheck %s -check-prefix=NOMOVBE
 declare i16 @llvm.bswap.i16(i16) nounwind readnone
 declare i32 @llvm.bswap.i32(i32) nounwind readnone
 declare i64 @llvm.bswap.i64(i64) nounwind readnone
@@ -24,6 +24,12 @@ define void @test1(ptr nocapture %x, i16 %y) nounwind {
 ; SLM-NEXT:    movbew %si, (%rdi)
 ; SLM-NEXT:    retq
 ;
+; NOMOVBE-LABEL: test1:
+; NOMOVBE:       # %bb.0:
+; NOMOVBE-NEXT:    rolw $8, %si, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xc1,0xc6,0x08]
+; NOMOVBE-NEXT:    movw %ax, (%rdi) # encoding: [0x66,0x89,0x07]
+; NOMOVBE-NEXT:    retq # encoding: [0xc3]
+;
 ; EGPR-LABEL: test1:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movbew %si, (%rdi) # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf1,0x37]
@@ -50,6 +56,11 @@ define i16 @test2(ptr %x) nounwind {
 ; SLM-NEXT:    movbew (%rdi), %ax
 ; SLM-NEXT:    retq
 ;
+; NOMOVBE-LABEL: test2:
+; NOMOVBE:       # %bb.0:
+; NOMOVBE-NEXT:    rolw $8, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0xc1,0x07,0x08]
+; NOMOVBE-NEXT:    retq # encoding: [0xc3]
+;
 ; EGPR-LABEL: test2:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movbew (%rdi), %ax # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf0,0x07]
@@ -76,6 +87,12 @@ define void @test3(ptr nocapture %x, i32 %y) nounwind {
 ; SLM-NEXT:    movbel %esi, (%rdi)
 ; SLM-NEXT:    retq
 ;
+; NOMOVBE-LABEL: test3:
+; NOMOVBE:       # %bb.0:
+; NOMOVBE-NEXT:    bswapl %esi # encoding: [0x0f,0xce]
+; NOMOVBE-NEXT:    movl %esi, (%rdi) # encoding: [0x89,0x37]
+; NOMOVBE-NEXT:    retq # encoding: [0xc3]
+;
 ; EGPR-LABEL: test3:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movbel %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf1,0x37]
@@ -102,6 +119,12 @@ define i32 @test4(ptr %x) nounwind {
 ; SLM-NEXT:    movbel (%rdi), %eax
 ; SLM-NEXT:    retq
 ;
+; NOMOVBE-LABEL: test4:
+; NOMOVBE:       # %bb.0:
+; NOMOVBE-NEXT:    movl (%rdi), %eax # encoding: [0x8b,0x07]
+; NOMOVBE-NEXT:    bswapl %eax # encoding: [0x0f,0xc8]
+; NOMOVBE-NEXT:    retq # encoding: [0xc3]
+;
 ; EGPR-LABEL: test4:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movbel (%rdi), %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf0,0x07]
@@ -128,6 +151,12 @@ define void @test5(ptr %x, i64 %y) nounwind {
 ; SLM-NEXT:    movbeq %rsi, (%rdi)
 ; SLM-NEXT:    retq
 ;
+; NOMOVBE-LABEL: test5:
+; NOMOVBE:       # %bb.0:
+; NOMOVBE-NEXT:    bswapq %rsi # encoding: [0x48,0x0f,0xce]
+; NOMOVBE-NEXT:    movq %rsi, (%rdi) # encoding: [0x48,0x89,0x37]
+; NOMOVBE-NEXT:    retq # encoding: [0xc3]
+;
 ; EGPR-LABEL: test5:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movbeq %rsi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf1,0x37]
@@ -154,6 +183,12 @@ define i64 @test6(ptr %x) nounwind {
 ; SLM-NEXT:    movbeq (%rdi), %rax
 ; SLM-NEXT:    retq
 ;
+; NOMOVBE-LABEL: test6:
+; NOMOVBE:       # %bb.0:
+; NOMOVBE-NEXT:    movq (%rdi), %rax # encoding: [0x48,0x8b,0x07]
+; NOMOVBE-NEXT:    bswapq %rax # encoding: [0x48,0x0f,0xc8]
+; NOMOVBE-NEXT:    retq # encoding: [0xc3]
+;
 ; EGPR-LABEL: test6:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movbeq (%rdi), %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf0,0x07]
@@ -180,6 +215,12 @@ define i64 @test7(i64 %x) nounwind {
 ; SLM-NEXT:    bswapq %rax
 ; SLM-NEXT:    retq
 ;
+; NOMOVBE-LABEL: test7:
+; NOMOVBE:       # %bb.0:
+; NOMOVBE-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; NOMOVBE-NEXT:    bswapq %rax # encoding: [0x48,0x0f,0xc8]
+; NOMOVBE-NEXT:    retq # encoding: [0xc3]
+;
 ; EGPR-LABEL: test7:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movbeq %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0x61,0xf8]

>From 2d2658b008bae74407ca1670e53a7cec03b9b81a Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Thu, 25 Jan 2024 02:16:39 -0800
Subject: [PATCH 13/13] reorder

---
 llvm/test/CodeGen/X86/movbe.ll | 70 +++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/llvm/test/CodeGen/X86/movbe.ll b/llvm/test/CodeGen/X86/movbe.ll
index 2ad50e256504c64..721823344867d0c 100644
--- a/llvm/test/CodeGen/X86/movbe.ll
+++ b/llvm/test/CodeGen/X86/movbe.ll
@@ -24,16 +24,16 @@ define void @test1(ptr nocapture %x, i16 %y) nounwind {
 ; SLM-NEXT:    movbew %si, (%rdi)
 ; SLM-NEXT:    retq
 ;
+; EGPR-LABEL: test1:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movbew %si, (%rdi) # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf1,0x37]
+; EGPR-NEXT:    retq # encoding: [0xc3]
+;
 ; NOMOVBE-LABEL: test1:
 ; NOMOVBE:       # %bb.0:
 ; NOMOVBE-NEXT:    rolw $8, %si, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xc1,0xc6,0x08]
 ; NOMOVBE-NEXT:    movw %ax, (%rdi) # encoding: [0x66,0x89,0x07]
 ; NOMOVBE-NEXT:    retq # encoding: [0xc3]
-;
-; EGPR-LABEL: test1:
-; EGPR:       # %bb.0:
-; EGPR-NEXT:    movbew %si, (%rdi) # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf1,0x37]
-; EGPR-NEXT:    retq # encoding: [0xc3]
   %bswap = call i16 @llvm.bswap.i16(i16 %y)
   store i16 %bswap, ptr %x, align 2
   ret void
@@ -56,15 +56,15 @@ define i16 @test2(ptr %x) nounwind {
 ; SLM-NEXT:    movbew (%rdi), %ax
 ; SLM-NEXT:    retq
 ;
-; NOMOVBE-LABEL: test2:
-; NOMOVBE:       # %bb.0:
-; NOMOVBE-NEXT:    rolw $8, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0xc1,0x07,0x08]
-; NOMOVBE-NEXT:    retq # encoding: [0xc3]
-;
 ; EGPR-LABEL: test2:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movbew (%rdi), %ax # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf0,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
+;
+; NOMOVBE-LABEL: test2:
+; NOMOVBE:       # %bb.0:
+; NOMOVBE-NEXT:    rolw $8, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0xc1,0x07,0x08]
+; NOMOVBE-NEXT:    retq # encoding: [0xc3]
   %load = load i16, ptr %x, align 2
   %bswap = call i16 @llvm.bswap.i16(i16 %load)
   ret i16 %bswap
@@ -87,16 +87,16 @@ define void @test3(ptr nocapture %x, i32 %y) nounwind {
 ; SLM-NEXT:    movbel %esi, (%rdi)
 ; SLM-NEXT:    retq
 ;
+; EGPR-LABEL: test3:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movbel %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf1,0x37]
+; EGPR-NEXT:    retq # encoding: [0xc3]
+;
 ; NOMOVBE-LABEL: test3:
 ; NOMOVBE:       # %bb.0:
 ; NOMOVBE-NEXT:    bswapl %esi # encoding: [0x0f,0xce]
 ; NOMOVBE-NEXT:    movl %esi, (%rdi) # encoding: [0x89,0x37]
 ; NOMOVBE-NEXT:    retq # encoding: [0xc3]
-;
-; EGPR-LABEL: test3:
-; EGPR:       # %bb.0:
-; EGPR-NEXT:    movbel %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf1,0x37]
-; EGPR-NEXT:    retq # encoding: [0xc3]
   %bswap = call i32 @llvm.bswap.i32(i32 %y)
   store i32 %bswap, ptr %x, align 4
   ret void
@@ -119,16 +119,16 @@ define i32 @test4(ptr %x) nounwind {
 ; SLM-NEXT:    movbel (%rdi), %eax
 ; SLM-NEXT:    retq
 ;
+; EGPR-LABEL: test4:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movbel (%rdi), %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf0,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
+;
 ; NOMOVBE-LABEL: test4:
 ; NOMOVBE:       # %bb.0:
 ; NOMOVBE-NEXT:    movl (%rdi), %eax # encoding: [0x8b,0x07]
 ; NOMOVBE-NEXT:    bswapl %eax # encoding: [0x0f,0xc8]
 ; NOMOVBE-NEXT:    retq # encoding: [0xc3]
-;
-; EGPR-LABEL: test4:
-; EGPR:       # %bb.0:
-; EGPR-NEXT:    movbel (%rdi), %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf0,0x07]
-; EGPR-NEXT:    retq # encoding: [0xc3]
   %load = load i32, ptr %x, align 4
   %bswap = call i32 @llvm.bswap.i32(i32 %load)
   ret i32 %bswap
@@ -151,16 +151,16 @@ define void @test5(ptr %x, i64 %y) nounwind {
 ; SLM-NEXT:    movbeq %rsi, (%rdi)
 ; SLM-NEXT:    retq
 ;
+; EGPR-LABEL: test5:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movbeq %rsi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf1,0x37]
+; EGPR-NEXT:    retq # encoding: [0xc3]
+;
 ; NOMOVBE-LABEL: test5:
 ; NOMOVBE:       # %bb.0:
 ; NOMOVBE-NEXT:    bswapq %rsi # encoding: [0x48,0x0f,0xce]
 ; NOMOVBE-NEXT:    movq %rsi, (%rdi) # encoding: [0x48,0x89,0x37]
 ; NOMOVBE-NEXT:    retq # encoding: [0xc3]
-;
-; EGPR-LABEL: test5:
-; EGPR:       # %bb.0:
-; EGPR-NEXT:    movbeq %rsi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf1,0x37]
-; EGPR-NEXT:    retq # encoding: [0xc3]
   %bswap = call i64 @llvm.bswap.i64(i64 %y)
   store i64 %bswap, ptr %x, align 8
   ret void
@@ -183,16 +183,16 @@ define i64 @test6(ptr %x) nounwind {
 ; SLM-NEXT:    movbeq (%rdi), %rax
 ; SLM-NEXT:    retq
 ;
+; EGPR-LABEL: test6:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movbeq (%rdi), %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf0,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
+;
 ; NOMOVBE-LABEL: test6:
 ; NOMOVBE:       # %bb.0:
 ; NOMOVBE-NEXT:    movq (%rdi), %rax # encoding: [0x48,0x8b,0x07]
 ; NOMOVBE-NEXT:    bswapq %rax # encoding: [0x48,0x0f,0xc8]
 ; NOMOVBE-NEXT:    retq # encoding: [0xc3]
-;
-; EGPR-LABEL: test6:
-; EGPR:       # %bb.0:
-; EGPR-NEXT:    movbeq (%rdi), %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf0,0x07]
-; EGPR-NEXT:    retq # encoding: [0xc3]
   %load = load i64, ptr %x, align 8
   %bswap = call i64 @llvm.bswap.i64(i64 %load)
   ret i64 %bswap
@@ -215,16 +215,16 @@ define i64 @test7(i64 %x) nounwind {
 ; SLM-NEXT:    bswapq %rax
 ; SLM-NEXT:    retq
 ;
+; EGPR-LABEL: test7:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movbeq %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0x61,0xf8]
+; EGPR-NEXT:    retq # encoding: [0xc3]
+;
 ; NOMOVBE-LABEL: test7:
 ; NOMOVBE:       # %bb.0:
 ; NOMOVBE-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
 ; NOMOVBE-NEXT:    bswapq %rax # encoding: [0x48,0x0f,0xc8]
 ; NOMOVBE-NEXT:    retq # encoding: [0xc3]
-;
-; EGPR-LABEL: test7:
-; EGPR:       # %bb.0:
-; EGPR-NEXT:    movbeq %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0x61,0xf8]
-; EGPR-NEXT:    retq # encoding: [0xc3]
   %bswap = call i64 @llvm.bswap.i64(i64 %x)
   ret i64 %bswap
 }



More information about the llvm-commits mailing list