[llvm] [X86] Support encoding/decoding and lowering for APX variant SHL/SHR/SAR/ROL/ROR/RCL/RCR/SHLD/SHRD (PR #78853)

Shengchen Kan via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 22 01:04:46 PST 2024


https://github.com/KanRobert updated https://github.com/llvm/llvm-project/pull/78853

>From 08c88cde74a8b6feb0a0a7de72ba62b246edf393 Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Thu, 18 Jan 2024 23:01:18 +0800
Subject: [PATCH 1/4] [X86] Support encoding/decoding and lowering for APX
 variant SHL/SHR/SAR/SHLD/SHRD

---
 llvm/lib/Target/X86/X86InstrShiftRotate.td | 409 ++++++++++++++--
 llvm/lib/Target/X86/X86InstrUtils.td       |  50 +-
 llvm/test/CodeGen/X86/apx/rol.ll           | 530 +++++++++++++++++++++
 llvm/test/CodeGen/X86/apx/ror.ll           | 530 +++++++++++++++++++++
 llvm/test/CodeGen/X86/apx/sar.ll           | 434 +++++++++++++++++
 llvm/test/CodeGen/X86/apx/shl.ll           | 434 +++++++++++++++++
 llvm/test/CodeGen/X86/apx/shld.ll          | 228 +++++++++
 llvm/test/CodeGen/X86/apx/shr.ll           | 434 +++++++++++++++++
 llvm/test/CodeGen/X86/apx/shrd.ll          | 240 ++++++++++
 llvm/test/MC/Disassembler/X86/apx/rcl.txt  | 194 ++++++++
 llvm/test/MC/Disassembler/X86/apx/rcr.txt  | 194 ++++++++
 llvm/test/MC/Disassembler/X86/apx/rol.txt  | 386 +++++++++++++++
 llvm/test/MC/Disassembler/X86/apx/ror.txt  | 386 +++++++++++++++
 llvm/test/MC/Disassembler/X86/apx/sar.txt  | 386 +++++++++++++++
 llvm/test/MC/Disassembler/X86/apx/shl.txt  | 386 +++++++++++++++
 llvm/test/MC/Disassembler/X86/apx/shld.txt | 194 ++++++++
 llvm/test/MC/Disassembler/X86/apx/shr.txt  | 386 +++++++++++++++
 llvm/test/MC/Disassembler/X86/apx/shrd.txt | 194 ++++++++
 llvm/test/MC/X86/apx/rcl-att.s             | 146 ++++++
 llvm/test/MC/X86/apx/rcl-intel.s           | 143 ++++++
 llvm/test/MC/X86/apx/rcr-att.s             | 146 ++++++
 llvm/test/MC/X86/apx/rcr-intel.s           | 143 ++++++
 llvm/test/MC/X86/apx/rol-att.s             | 287 +++++++++++
 llvm/test/MC/X86/apx/rol-intel.s           | 284 +++++++++++
 llvm/test/MC/X86/apx/ror-att.s             | 287 +++++++++++
 llvm/test/MC/X86/apx/ror-intel.s           | 284 +++++++++++
 llvm/test/MC/X86/apx/sar-att.s             | 287 +++++++++++
 llvm/test/MC/X86/apx/sar-intel.s           | 284 +++++++++++
 llvm/test/MC/X86/apx/shl-att.s             | 287 +++++++++++
 llvm/test/MC/X86/apx/shl-intel.s           | 284 +++++++++++
 llvm/test/MC/X86/apx/shld-att.s            | 149 ++++++
 llvm/test/MC/X86/apx/shld-intel.s          | 146 ++++++
 llvm/test/MC/X86/apx/shr-att.s             | 287 +++++++++++
 llvm/test/MC/X86/apx/shr-intel.s           | 284 +++++++++++
 llvm/test/MC/X86/apx/shrd-att.s            | 149 ++++++
 llvm/test/MC/X86/apx/shrd-intel.s          | 146 ++++++
 llvm/test/TableGen/x86-fold-tables.inc     | 240 ++++++++++
 37 files changed, 10196 insertions(+), 62 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/apx/rol.ll
 create mode 100644 llvm/test/CodeGen/X86/apx/ror.ll
 create mode 100644 llvm/test/CodeGen/X86/apx/sar.ll
 create mode 100644 llvm/test/CodeGen/X86/apx/shl.ll
 create mode 100644 llvm/test/CodeGen/X86/apx/shld.ll
 create mode 100644 llvm/test/CodeGen/X86/apx/shr.ll
 create mode 100644 llvm/test/CodeGen/X86/apx/shrd.ll
 create mode 100644 llvm/test/MC/Disassembler/X86/apx/rcl.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/apx/rcr.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/apx/rol.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/apx/ror.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/apx/sar.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/apx/shl.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/apx/shld.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/apx/shr.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/apx/shrd.txt
 create mode 100644 llvm/test/MC/X86/apx/rcl-att.s
 create mode 100644 llvm/test/MC/X86/apx/rcl-intel.s
 create mode 100644 llvm/test/MC/X86/apx/rcr-att.s
 create mode 100644 llvm/test/MC/X86/apx/rcr-intel.s
 create mode 100644 llvm/test/MC/X86/apx/rol-att.s
 create mode 100644 llvm/test/MC/X86/apx/rol-intel.s
 create mode 100644 llvm/test/MC/X86/apx/ror-att.s
 create mode 100644 llvm/test/MC/X86/apx/ror-intel.s
 create mode 100644 llvm/test/MC/X86/apx/sar-att.s
 create mode 100644 llvm/test/MC/X86/apx/sar-intel.s
 create mode 100644 llvm/test/MC/X86/apx/shl-att.s
 create mode 100644 llvm/test/MC/X86/apx/shl-intel.s
 create mode 100644 llvm/test/MC/X86/apx/shld-att.s
 create mode 100644 llvm/test/MC/X86/apx/shld-intel.s
 create mode 100644 llvm/test/MC/X86/apx/shr-att.s
 create mode 100644 llvm/test/MC/X86/apx/shr-intel.s
 create mode 100644 llvm/test/MC/X86/apx/shrd-att.s
 create mode 100644 llvm/test/MC/X86/apx/shrd-intel.s

diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td
index 7166e0bc39179c..7e2893f340973a 100644
--- a/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -20,22 +20,66 @@ multiclass ShiftRotate<string m, Format RegMRM, Format MemMRM, SDPatternOperator
 
   let Uses = uses in {
     let isConvertibleToThreeAddress = !if(!eq(m, "shl"), 1, 0) in {
-      def 8ri  : BinOpRI8U_R<m, RegMRM, Xi8, node>, Sched<[ri]>, DefEFLAGS;
-      def 16ri : BinOpRI8U_R<m, RegMRM, Xi16, node>, Sched<[ri]>, DefEFLAGS, OpSize16;
-      def 32ri : BinOpRI8U_R<m, RegMRM, Xi32, node>, Sched<[ri]>, DefEFLAGS, OpSize32;
-      def 64ri : BinOpRI8U_R<m, RegMRM, Xi64, node>, Sched<[ri]>, DefEFLAGS;
+      let Predicates = [NoNDD] in {
+        def 8ri  : BinOpRI8U_R<m, RegMRM, Xi8, node>, Sched<[ri]>, DefEFLAGS;
+        def 16ri : BinOpRI8U_R<m, RegMRM, Xi16, node>, Sched<[ri]>, DefEFLAGS, OpSize16;
+        def 32ri : BinOpRI8U_R<m, RegMRM, Xi32, node>, Sched<[ri]>, DefEFLAGS, OpSize32;
+        def 64ri : BinOpRI8U_R<m, RegMRM, Xi64, node>, Sched<[ri]>, DefEFLAGS;
+      }
+      let Predicates = [HasNDD, In64BitMode] in {
+        def 8ri_ND  : BinOpRI8U_R<m, RegMRM, Xi8, node, 1>, Sched<[ri]>, DefEFLAGS;
+        def 16ri_ND : BinOpRI8U_R<m, RegMRM, Xi16, node, 1>, Sched<[ri]>, DefEFLAGS, PD;
+        def 32ri_ND : BinOpRI8U_R<m, RegMRM, Xi32, node, 1>, Sched<[ri]>, DefEFLAGS;
+        def 64ri_ND : BinOpRI8U_R<m, RegMRM, Xi64, node, 1>, Sched<[ri]>, DefEFLAGS;
+      }
+      let Predicates = [In64BitMode] in {
+        def 8ri_EVEX  : BinOpRI8U_R<m, RegMRM, Xi8, null_frag>, Sched<[ri]>, DefEFLAGS, PL;
+        def 16ri_EVEX : BinOpRI8U_R<m, RegMRM, Xi16, null_frag>, Sched<[ri]>, DefEFLAGS, PL, PD;
+        def 32ri_EVEX : BinOpRI8U_R<m, RegMRM, Xi32, null_frag>, Sched<[ri]>, DefEFLAGS, PL;
+        def 64ri_EVEX : BinOpRI8U_R<m, RegMRM, Xi64, null_frag>, Sched<[ri]>, DefEFLAGS, PL;
+      }
     }
 
     def 8mi  : BinOpMI8U_M<m, MemMRM, Xi8, node>, Sched<[mi, WriteRMW]>, DefEFLAGS;
     def 16mi : BinOpMI8U_M<m, MemMRM, Xi16, node>, Sched<[mi, WriteRMW]>, DefEFLAGS, OpSize16;
     def 32mi : BinOpMI8U_M<m, MemMRM, Xi32, node>, Sched<[mi, WriteRMW]>, DefEFLAGS, OpSize32;
     def 64mi : BinOpMI8U_M<m, MemMRM, Xi64, node>, Sched<[mi, WriteRMW]>, DefEFLAGS, Requires<[In64BitMode]>;
+    let Predicates = [HasNDD, In64BitMode] in {
+      def 8mi_ND  : BinOpMI8U_R<m, MemMRM, Xi8, node>, Sched<[mi, ri]>, DefEFLAGS;
+      def 16mi_ND : BinOpMI8U_R<m, MemMRM, Xi16, node>, Sched<[mi, ri]>, DefEFLAGS, PD;
+      def 32mi_ND : BinOpMI8U_R<m, MemMRM, Xi32, node>, Sched<[mi, ri]>, DefEFLAGS;
+      def 64mi_ND : BinOpMI8U_R<m, MemMRM, Xi64, node>, Sched<[mi, ri]>, DefEFLAGS;
+    }
+    let Predicates = [In64BitMode] in {
+      def 8mi_EVEX  : BinOpMI8U_M<m, MemMRM, Xi8, null_frag>, Sched<[mi, WriteRMW]>, DefEFLAGS, PL;
+      def 16mi_EVEX : BinOpMI8U_M<m, MemMRM, Xi16, null_frag>, Sched<[mi, WriteRMW]>, DefEFLAGS, PL, PD;
+      def 32mi_EVEX : BinOpMI8U_M<m, MemMRM, Xi32, null_frag>, Sched<[mi, WriteRMW]>, DefEFLAGS, PL;
+      def 64mi_EVEX : BinOpMI8U_M<m, MemMRM, Xi64, null_frag>, Sched<[mi, WriteRMW]>, DefEFLAGS, PL;
+    }
 
     let SchedRW = [ri] in {
       def 8r1  : UnaryOpR_RF<0xD1, RegMRM, m, Xi8, null_frag>;
       def 16r1 : UnaryOpR_RF<0xD1, RegMRM, m, Xi16, null_frag>, OpSize16;
       def 32r1 : UnaryOpR_RF<0xD1, RegMRM, m, Xi32, null_frag>, OpSize32;
       def 64r1 : UnaryOpR_RF<0xD1, RegMRM, m, Xi64, null_frag>;
+
+      // FIXME: Assembler can't tell whether it's 8r1_ND or 8rCL when the source register is cl, e.g.
+      //
+      //  shlb %cl, %al
+      //
+      // GNU binutils distinguish them by adding an explicit $1 to asm string of 8r1_ND. But we haven't support
+      // constant immediate in ams string for X86 in TD. So we add DisassembleOnly for 8r1_ND for the time being.
+      let Predicates = [In64BitMode] in {
+        def 8r1_ND  : UnaryOpR_RF<0xD1, RegMRM, m, Xi8, null_frag, 1>, DisassembleOnly;
+        def 16r1_ND : UnaryOpR_RF<0xD1, RegMRM, m, Xi16, null_frag, 1>, PD;
+        def 32r1_ND : UnaryOpR_RF<0xD1, RegMRM, m, Xi32, null_frag, 1>;
+        def 64r1_ND : UnaryOpR_RF<0xD1, RegMRM, m, Xi64, null_frag, 1>;
+
+        def 8r1_EVEX  : UnaryOpR_RF<0xD1, RegMRM, m, Xi8, null_frag>, PL;
+        def 16r1_EVEX : UnaryOpR_RF<0xD1, RegMRM, m, Xi16, null_frag>, PL, PD;
+        def 32r1_EVEX : UnaryOpR_RF<0xD1, RegMRM, m, Xi32, null_frag>, PL;
+        def 64r1_EVEX : UnaryOpR_RF<0xD1, RegMRM, m, Xi64, null_frag>, PL;
+      }
     }
 
     let SchedRW = [mi, WriteRMW] in {
@@ -43,22 +87,142 @@ multiclass ShiftRotate<string m, Format RegMRM, Format MemMRM, SDPatternOperator
       def 16m1 : UnaryOpM_MF<0xD1, MemMRM, m, Xi16, null_frag>, OpSize16;
       def 32m1 : UnaryOpM_MF<0xD1, MemMRM, m, Xi32, null_frag>, OpSize32;
       def 64m1 : UnaryOpM_MF<0xD1, MemMRM, m, Xi64, null_frag>, Requires<[In64BitMode]>;
+
+      let Predicates = [In64BitMode] in {
+        def 8m1_EVEX  : UnaryOpM_MF<0xD1, MemMRM, m, Xi8, null_frag>, PL;
+        def 16m1_EVEX : UnaryOpM_MF<0xD1, MemMRM, m, Xi16, null_frag>, PL, PD;
+        def 32m1_EVEX : UnaryOpM_MF<0xD1, MemMRM, m, Xi32, null_frag>, PL;
+        def 64m1_EVEX : UnaryOpM_MF<0xD1, MemMRM, m, Xi64, null_frag>, PL;
+      }
+    }
+    let SchedRW = [mi, ri], Predicates = [In64BitMode] in {
+      def 8m1_ND  : UnaryOpM_RF<0xD1, MemMRM, m, Xi8, null_frag>;
+      def 16m1_ND : UnaryOpM_RF<0xD1, MemMRM, m, Xi16, null_frag>, PD;
+      def 32m1_ND : UnaryOpM_RF<0xD1, MemMRM, m, Xi32, null_frag>;
+      def 64m1_ND : UnaryOpM_RF<0xD1, MemMRM, m, Xi64, null_frag>;
     }
   }
 
   let Uses = !listconcat([CL], uses) in {
-    def 8rCL  : BinOpRC_R<m, RegMRM, Xi8, node>, Sched<[rCL]>, DefEFLAGS;
-    def 16rCL : BinOpRC_R<m, RegMRM, Xi16, node>, Sched<[rCL]>, DefEFLAGS, OpSize16;
-    def 32rCL : BinOpRC_R<m, RegMRM, Xi32, node>, Sched<[rCL]>, DefEFLAGS, OpSize32;
-    def 64rCL : BinOpRC_R<m, RegMRM, Xi64, node>, Sched<[rCL]>, DefEFLAGS;
+    let Predicates = [NoNDD] in {
+      def 8rCL  : BinOpRC_R<m, RegMRM, Xi8, node>, Sched<[rCL]>, DefEFLAGS;
+      def 16rCL : BinOpRC_R<m, RegMRM, Xi16, node>, Sched<[rCL]>, DefEFLAGS, OpSize16;
+      def 32rCL : BinOpRC_R<m, RegMRM, Xi32, node>, Sched<[rCL]>, DefEFLAGS, OpSize32;
+      def 64rCL : BinOpRC_R<m, RegMRM, Xi64, node>, Sched<[rCL]>, DefEFLAGS;
+    }
+    let Predicates = [HasNDD, In64BitMode] in {
+      def 8rCL_ND  : BinOpRC_R<m, RegMRM, Xi8, node, 1>, Sched<[rCL]>, DefEFLAGS;
+      def 16rCL_ND : BinOpRC_R<m, RegMRM, Xi16, node, 1>, Sched<[rCL]>, DefEFLAGS, PD;
+      def 32rCL_ND : BinOpRC_R<m, RegMRM, Xi32, node, 1>, Sched<[rCL]>, DefEFLAGS;
+      def 64rCL_ND : BinOpRC_R<m, RegMRM, Xi64, node, 1>, Sched<[rCL]>, DefEFLAGS;
+    }
+    let Predicates = [In64BitMode] in {
+      def 8rCL_EVEX  : BinOpRC_R<m, RegMRM, Xi8, null_frag>, Sched<[rCL]>, DefEFLAGS, PL;
+      def 16rCL_EVEX : BinOpRC_R<m, RegMRM, Xi16, null_frag>, Sched<[rCL]>, DefEFLAGS, PL, PD;
+      def 32rCL_EVEX : BinOpRC_R<m, RegMRM, Xi32, null_frag>, Sched<[rCL]>, DefEFLAGS, PL;
+      def 64rCL_EVEX : BinOpRC_R<m, RegMRM, Xi64, null_frag>, Sched<[rCL]>, DefEFLAGS, PL;
+    }
 
     def 8mCL  : BinOpMC_M<m, MemMRM, Xi8, node>, Sched<[mCL, WriteRMW]>, DefEFLAGS;
     def 16mCL : BinOpMC_M<m, MemMRM, Xi16, node>, Sched<[mCL, WriteRMW]>, DefEFLAGS, OpSize16;
     def 32mCL : BinOpMC_M<m, MemMRM, Xi32, node>, Sched<[mCL, WriteRMW]>, DefEFLAGS, OpSize32;
     def 64mCL : BinOpMC_M<m, MemMRM, Xi64, node>, Sched<[mCL, WriteRMW]>, DefEFLAGS, Requires<[In64BitMode]>;
+
+    let Predicates = [HasNDD, In64BitMode] in {
+      def 8mCL_ND  : BinOpMC_R<m, MemMRM, Xi8, node>, Sched<[mCL, rCL]>, DefEFLAGS;
+      def 16mCL_ND : BinOpMC_R<m, MemMRM, Xi16, node>, Sched<[mCL, rCL]>, DefEFLAGS, PD;
+      def 32mCL_ND : BinOpMC_R<m, MemMRM, Xi32, node>, Sched<[mCL, rCL]>, DefEFLAGS;
+      def 64mCL_ND : BinOpMC_R<m, MemMRM, Xi64, node>, Sched<[mCL, rCL]>, DefEFLAGS;
+    }
+
+    let Predicates = [In64BitMode] in {
+      def 8mCL_EVEX  : BinOpMC_M<m, MemMRM, Xi8, null_frag>, Sched<[mCL, WriteRMW]>, DefEFLAGS, PL;
+      def 16mCL_EVEX : BinOpMC_M<m, MemMRM, Xi16, null_frag>, Sched<[mCL, WriteRMW]>, DefEFLAGS, PL, PD;
+      def 32mCL_EVEX : BinOpMC_M<m, MemMRM, Xi32, null_frag>, Sched<[mCL, WriteRMW]>, DefEFLAGS, PL;
+      def 64mCL_EVEX : BinOpMC_M<m, MemMRM, Xi64, null_frag>, Sched<[mCL, WriteRMW]>, DefEFLAGS, PL;
+    }
   }
 }
 
+multiclass ShiftRotate_NF<string m, Format RegMRM, Format MemMRM, SchedReadWrite rCL,
+                          SchedReadWrite ri, SchedReadWrite mCL, SchedReadWrite mi> {
+  let Predicates = [In64BitMode] in {
+    let isConvertibleToThreeAddress = !if(!eq(m, "shl"), 1, 0) in {
+      def 8ri_NF  : BinOpRI8U_R<m, RegMRM, Xi8, null_frag>, Sched<[ri]>, NF;
+      def 16ri_NF : BinOpRI8U_R<m, RegMRM, Xi16, null_frag>, Sched<[ri]>, NF, PD;
+      def 32ri_NF : BinOpRI8U_R<m, RegMRM, Xi32, null_frag>, Sched<[ri]>, NF;
+      def 64ri_NF : BinOpRI8U_R<m, RegMRM, Xi64, null_frag>, Sched<[ri]>, NF;
+
+      def 8ri_NF_ND  : BinOpRI8U_R<m, RegMRM, Xi8, null_frag, 1>, Sched<[ri]>, EVEX_NF;
+      def 16ri_NF_ND : BinOpRI8U_R<m, RegMRM, Xi16, null_frag, 1>, Sched<[ri]>, EVEX_NF, PD;
+      def 32ri_NF_ND : BinOpRI8U_R<m, RegMRM, Xi32, null_frag, 1>, Sched<[ri]>, EVEX_NF;
+      def 64ri_NF_ND : BinOpRI8U_R<m, RegMRM, Xi64, null_frag, 1>, Sched<[ri]>, EVEX_NF;
+    }
+
+    def 8mi_NF  : BinOpMI8U_M<m, MemMRM, Xi8, null_frag>, Sched<[mi, WriteRMW]>, NF;
+    def 16mi_NF : BinOpMI8U_M<m, MemMRM, Xi16, null_frag>, Sched<[mi, WriteRMW]>, NF, PD;
+    def 32mi_NF : BinOpMI8U_M<m, MemMRM, Xi32, null_frag>, Sched<[mi, WriteRMW]>, NF;
+    def 64mi_NF : BinOpMI8U_M<m, MemMRM, Xi64, null_frag>, Sched<[mi, WriteRMW]>, NF;
+
+    def 8mi_NF_ND  : BinOpMI8U_R<m, MemMRM, Xi8, null_frag>, Sched<[mi, ri]>, EVEX_NF;
+    def 16mi_NF_ND : BinOpMI8U_R<m, MemMRM, Xi16, null_frag>, Sched<[mi, ri]>, EVEX_NF, PD;
+    def 32mi_NF_ND : BinOpMI8U_R<m, MemMRM, Xi32, null_frag>, Sched<[mi, ri]>, EVEX_NF;
+    def 64mi_NF_ND : BinOpMI8U_R<m, MemMRM, Xi64, null_frag>, Sched<[mi, ri]>, EVEX_NF;
+
+    let SchedRW = [ri] in {
+      // FIXME: Assembler can't tell whether it's 8r1_NF_ND or 8rCL_NF when the source register is cl, e.g.
+      //
+      //  {nf} shlb %cl, %al
+      //
+      // GNU binutils distinguish them by adding an explicit $1 to asm string of 8r1_NF_ND. But we haven't support
+      // constant immediate in ams string for X86 in TD. So we add DisassembleOnly for 8r1_NF_ND for the time being.
+      def 8r1_NF  : UnaryOpR_R<0xD1, RegMRM, m, Xi8, null_frag>, NF;
+      def 16r1_NF : UnaryOpR_R<0xD1, RegMRM, m, Xi16, null_frag>, NF, PD;
+      def 32r1_NF : UnaryOpR_R<0xD1, RegMRM, m, Xi32, null_frag>, NF;
+      def 64r1_NF : UnaryOpR_R<0xD1, RegMRM, m, Xi64, null_frag>, NF;
+
+      def 8r1_NF_ND  : UnaryOpR_R<0xD1, RegMRM, m, Xi8, null_frag, 1>, EVEX_NF, DisassembleOnly;
+      def 16r1_NF_ND : UnaryOpR_R<0xD1, RegMRM, m, Xi16, null_frag, 1>, EVEX_NF, PD;
+      def 32r1_NF_ND : UnaryOpR_R<0xD1, RegMRM, m, Xi32, null_frag, 1>, EVEX_NF;
+      def 64r1_NF_ND : UnaryOpR_R<0xD1, RegMRM, m, Xi64, null_frag, 1>, EVEX_NF;
+    }
+
+    let SchedRW = [mi, WriteRMW] in {
+      def 8m1_NF  : UnaryOpM_M<0xD1, MemMRM, m, Xi8, null_frag>, NF;
+      def 16m1_NF : UnaryOpM_M<0xD1, MemMRM, m, Xi16, null_frag>, NF, PD;
+      def 32m1_NF : UnaryOpM_M<0xD1, MemMRM, m, Xi32, null_frag>, NF;
+      def 64m1_NF : UnaryOpM_M<0xD1, MemMRM, m, Xi64, null_frag>, NF;
+    }
+    let SchedRW = [mi, ri] in {
+      def 8m1_NF_ND  : UnaryOpM_R<0xD1, MemMRM, m, Xi8, null_frag>, EVEX_NF;
+      def 16m1_NF_ND : UnaryOpM_R<0xD1, MemMRM, m, Xi16, null_frag>, EVEX_NF, PD;
+      def 32m1_NF_ND : UnaryOpM_R<0xD1, MemMRM, m, Xi32, null_frag>, EVEX_NF;
+      def 64m1_NF_ND : UnaryOpM_R<0xD1, MemMRM, m, Xi64, null_frag>, EVEX_NF;
+    }
+
+    let Uses = [CL] in {
+      def 8rCL_NF  : BinOpRC_R<m, RegMRM, Xi8, null_frag>, Sched<[rCL]>, NF;
+      def 16rCL_NF : BinOpRC_R<m, RegMRM, Xi16, null_frag>, Sched<[rCL]>, NF, PD;
+      def 32rCL_NF : BinOpRC_R<m, RegMRM, Xi32, null_frag>, Sched<[rCL]>, NF;
+      def 64rCL_NF : BinOpRC_R<m, RegMRM, Xi64, null_frag>, Sched<[rCL]>, NF;
+
+      def 8rCL_NF_ND  : BinOpRC_R<m, RegMRM, Xi8, null_frag, 1>, Sched<[rCL]>, EVEX_NF;
+      def 16rCL_NF_ND : BinOpRC_R<m, RegMRM, Xi16, null_frag, 1>, Sched<[rCL]>, EVEX_NF, PD;
+      def 32rCL_NF_ND : BinOpRC_R<m, RegMRM, Xi32, null_frag, 1>, Sched<[rCL]>, EVEX_NF;
+      def 64rCL_NF_ND : BinOpRC_R<m, RegMRM, Xi64, null_frag, 1>, Sched<[rCL]>, EVEX_NF;
+
+      def 8mCL_NF  : BinOpMC_M<m, MemMRM, Xi8, null_frag>, Sched<[mCL, WriteRMW]>, NF;
+      def 16mCL_NF : BinOpMC_M<m, MemMRM, Xi16, null_frag>, Sched<[mCL, WriteRMW]>, NF, PD;
+      def 32mCL_NF : BinOpMC_M<m, MemMRM, Xi32, null_frag>, Sched<[mCL, WriteRMW]>, NF;
+      def 64mCL_NF : BinOpMC_M<m, MemMRM, Xi64, null_frag>, Sched<[mCL, WriteRMW]>, NF;
+
+      def 8mCL_NF_ND  : BinOpMC_R<m, MemMRM, Xi8, null_frag>, Sched<[mCL, rCL]>, EVEX_NF;
+      def 16mCL_NF_ND : BinOpMC_R<m, MemMRM, Xi16, null_frag>, Sched<[mCL, rCL]>, EVEX_NF, PD;
+      def 32mCL_NF_ND : BinOpMC_R<m, MemMRM, Xi32, null_frag>, Sched<[mCL, rCL]>, EVEX_NF;
+      def 64mCL_NF_ND : BinOpMC_R<m, MemMRM, Xi64, null_frag>, Sched<[mCL, rCL]>, EVEX_NF;
+    }
+  }
+}
 defm SHL: ShiftRotate<"shl", MRM4r, MRM4m, shl, WriteShiftCL, WriteShift, WriteShiftCLLd, WriteShiftLd>;
 defm SHR: ShiftRotate<"shr", MRM5r, MRM5m, srl, WriteShiftCL, WriteShift, WriteShiftCLLd, WriteShiftLd>;
 defm SAR: ShiftRotate<"sar", MRM7r, MRM7m, sra, WriteShiftCL, WriteShift, WriteShiftCLLd, WriteShiftLd>;
@@ -68,15 +232,34 @@ defm ROR: ShiftRotate<"ror", MRM1r, MRM1m, rotr, WriteRotateCL, WriteRotate, Wri
 defm RCL: ShiftRotate<"rcl", MRM2r, MRM2m, null_frag, WriteRotateCL, WriteRotate, WriteRotateCLLd, WriteRotateLd, [EFLAGS]>;
 defm RCR: ShiftRotate<"rcr", MRM3r, MRM3m, null_frag, WriteRotateCL, WriteRotate, WriteRotateCLLd, WriteRotateLd, [EFLAGS]>;
 
+defm SHL: ShiftRotate_NF<"shl", MRM4r, MRM4m, WriteShiftCL, WriteShift, WriteShiftCLLd, WriteShiftLd>;
+defm SHR: ShiftRotate_NF<"shr", MRM5r, MRM5m, WriteShiftCL, WriteShift, WriteShiftCLLd, WriteShiftLd>;
+defm SAR: ShiftRotate_NF<"sar", MRM7r, MRM7m, WriteShiftCL, WriteShift, WriteShiftCLLd, WriteShiftLd>;
+
+defm ROL: ShiftRotate_NF<"rol", MRM0r, MRM0m, WriteRotateCL, WriteRotate, WriteRotateCLLd, WriteRotateLd>;
+defm ROR: ShiftRotate_NF<"ror", MRM1r, MRM1m, WriteRotateCL, WriteRotate, WriteRotateCLLd, WriteRotateLd>;
+
 // Use the opposite rotate if allows us to use the rotate by 1 instruction.
-def : Pat<(rotl GR8:$src1,  (i8 7)),  (ROR8r1  GR8:$src1)>;
-def : Pat<(rotl GR16:$src1, (i8 15)), (ROR16r1 GR16:$src1)>;
-def : Pat<(rotl GR32:$src1, (i8 31)), (ROR32r1 GR32:$src1)>;
-def : Pat<(rotl GR64:$src1, (i8 63)), (ROR64r1 GR64:$src1)>;
-def : Pat<(rotr GR8:$src1,  (i8 7)),  (ROL8r1  GR8:$src1)>;
-def : Pat<(rotr GR16:$src1, (i8 15)), (ROL16r1 GR16:$src1)>;
-def : Pat<(rotr GR32:$src1, (i8 31)), (ROL32r1 GR32:$src1)>;
-def : Pat<(rotr GR64:$src1, (i8 63)), (ROL64r1 GR64:$src1)>;
+let Predicates = [NoNDD] in {
+  def : Pat<(rotl GR8:$src1,  (i8 7)),  (ROR8r1  GR8:$src1)>;
+  def : Pat<(rotl GR16:$src1, (i8 15)), (ROR16r1 GR16:$src1)>;
+  def : Pat<(rotl GR32:$src1, (i8 31)), (ROR32r1 GR32:$src1)>;
+  def : Pat<(rotl GR64:$src1, (i8 63)), (ROR64r1 GR64:$src1)>;
+  def : Pat<(rotr GR8:$src1,  (i8 7)),  (ROL8r1  GR8:$src1)>;
+  def : Pat<(rotr GR16:$src1, (i8 15)), (ROL16r1 GR16:$src1)>;
+  def : Pat<(rotr GR32:$src1, (i8 31)), (ROL32r1 GR32:$src1)>;
+  def : Pat<(rotr GR64:$src1, (i8 63)), (ROL64r1 GR64:$src1)>;
+}
+let Predicates = [HasNDD] in {
+  def : Pat<(rotl GR8:$src1,  (i8 7)),  (ROR8r1_ND  GR8:$src1)>;
+  def : Pat<(rotl GR16:$src1, (i8 15)), (ROR16r1_ND GR16:$src1)>;
+  def : Pat<(rotl GR32:$src1, (i8 31)), (ROR32r1_ND GR32:$src1)>;
+  def : Pat<(rotl GR64:$src1, (i8 63)), (ROR64r1_ND GR64:$src1)>;
+  def : Pat<(rotr GR8:$src1,  (i8 7)),  (ROL8r1_ND  GR8:$src1)>;
+  def : Pat<(rotr GR16:$src1, (i8 15)), (ROL16r1_ND GR16:$src1)>;
+  def : Pat<(rotr GR32:$src1, (i8 31)), (ROL32r1_ND GR32:$src1)>;
+  def : Pat<(rotr GR64:$src1, (i8 63)), (ROL64r1_ND GR64:$src1)>;
+}
 
 def : Pat<(store (rotl (loadi8 addr:$dst), (i8 7)), addr:$dst),
           (ROR8m1 addr:$dst)>;
@@ -96,34 +279,74 @@ def : Pat<(store (rotr (loadi32 addr:$dst), (i8 31)), addr:$dst),
 def : Pat<(store (rotr (loadi64 addr:$dst), (i8 63)), addr:$dst),
           (ROL64m1 addr:$dst)>, Requires<[In64BitMode]>;
 
+let Predicates = [HasNDD] in {
+def : Pat<(rotl (loadi8 addr:$src), (i8 7)),
+          (ROR8m1_ND addr:$src)>;
+def : Pat<(rotl (loadi16 addr:$src), (i8 15)),
+          (ROR16m1_ND addr:$src)>;
+def : Pat<(rotl (loadi32 addr:$src), (i8 31)),
+          (ROR32m1_ND addr:$src)>;
+def : Pat<(rotl (loadi64 addr:$src), (i8 63)),
+          (ROR64m1_ND addr:$src)>;
+
+def : Pat<(rotr (loadi8 addr:$src), (i8 7)),
+          (ROL8m1_ND addr:$src)>;
+def : Pat<(rotr (loadi16 addr:$src), (i8 15)),
+          (ROL16m1_ND addr:$src)>;
+def : Pat<(rotr (loadi32 addr:$src), (i8 31)),
+          (ROL32m1_ND addr:$src)>;
+def : Pat<(rotr (loadi64 addr:$src), (i8 63)),
+          (ROL64m1_ND addr:$src)>;
+}
 
 // Patterns for rotate with relocImm for the immediate field.
-def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),
-          (ROL8ri GR8:$src1, relocImm:$src2)>;
-def : Pat<(rotl GR16:$src1, (i8 relocImm:$src2)),
-          (ROL16ri GR16:$src1, relocImm:$src2)>;
-def : Pat<(rotl GR32:$src1, (i8 relocImm:$src2)),
-          (ROL32ri GR32:$src1, relocImm:$src2)>;
-def : Pat<(rotl GR64:$src1, (i8 relocImm:$src2)),
-          (ROL64ri GR64:$src1, relocImm:$src2)>;
-
-def : Pat<(rotr GR8:$src1, (i8 relocImm:$src2)),
-          (ROR8ri GR8:$src1, relocImm:$src2)>;
-def : Pat<(rotr GR16:$src1, (i8 relocImm:$src2)),
-          (ROR16ri GR16:$src1, relocImm:$src2)>;
-def : Pat<(rotr GR32:$src1, (i8 relocImm:$src2)),
-          (ROR32ri GR32:$src1, relocImm:$src2)>;
-def : Pat<(rotr GR64:$src1, (i8 relocImm:$src2)),
-          (ROR64ri GR64:$src1, relocImm:$src2)>;
+let Predicates = [NoNDD] in {
+  def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),
+            (ROL8ri GR8:$src1, relocImm:$src2)>;
+  def : Pat<(rotl GR16:$src1, (i8 relocImm:$src2)),
+            (ROL16ri GR16:$src1, relocImm:$src2)>;
+  def : Pat<(rotl GR32:$src1, (i8 relocImm:$src2)),
+            (ROL32ri GR32:$src1, relocImm:$src2)>;
+  def : Pat<(rotl GR64:$src1, (i8 relocImm:$src2)),
+            (ROL64ri GR64:$src1, relocImm:$src2)>;
+
+  def : Pat<(rotr GR8:$src1, (i8 relocImm:$src2)),
+            (ROR8ri GR8:$src1, relocImm:$src2)>;
+  def : Pat<(rotr GR16:$src1, (i8 relocImm:$src2)),
+            (ROR16ri GR16:$src1, relocImm:$src2)>;
+  def : Pat<(rotr GR32:$src1, (i8 relocImm:$src2)),
+            (ROR32ri GR32:$src1, relocImm:$src2)>;
+  def : Pat<(rotr GR64:$src1, (i8 relocImm:$src2)),
+            (ROR64ri GR64:$src1, relocImm:$src2)>;
+}
+let Predicates = [HasNDD] in {
+  def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),
+            (ROL8ri_ND GR8:$src1, relocImm:$src2)>;
+  def : Pat<(rotl GR16:$src1, (i8 relocImm:$src2)),
+            (ROL16ri_ND GR16:$src1, relocImm:$src2)>;
+  def : Pat<(rotl GR32:$src1, (i8 relocImm:$src2)),
+            (ROL32ri_ND GR32:$src1, relocImm:$src2)>;
+  def : Pat<(rotl GR64:$src1, (i8 relocImm:$src2)),
+            (ROL64ri_ND GR64:$src1, relocImm:$src2)>;
+
+  def : Pat<(rotr GR8:$src1, (i8 relocImm:$src2)),
+            (ROR8ri_ND GR8:$src1, relocImm:$src2)>;
+  def : Pat<(rotr GR16:$src1, (i8 relocImm:$src2)),
+            (ROR16ri_ND GR16:$src1, relocImm:$src2)>;
+  def : Pat<(rotr GR32:$src1, (i8 relocImm:$src2)),
+            (ROR32ri_ND GR32:$src1, relocImm:$src2)>;
+  def : Pat<(rotr GR64:$src1, (i8 relocImm:$src2)),
+            (ROR64ri_ND GR64:$src1, relocImm:$src2)>;
+}
 
 //===----------------------------------------------------------------------===//
 // Double precision shift instructions (generalizations of rotate)
 //===----------------------------------------------------------------------===//
 
-class ShlrdOpRRI8U_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+class ShlrdOpRRI8U_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0>
   : ITy<o, MRMDestReg, t, (outs t.RegClass:$dst),
-        (ins t.RegClass:$src1, t.RegClass:$src2, u8imm:$src3), m, triop_args,
-        []>, NDD<0, TB> {
+        (ins t.RegClass:$src1, t.RegClass:$src2, u8imm:$src3), m, !if(!eq(ndd, 0), triop_args, triop_ndd_args),
+        []>, NDD<ndd> {
   let isCommutable = 1;
   let ImmT = Imm8;
   let SchedRW = [WriteSHDrri];
@@ -132,8 +355,8 @@ class ShlrdOpRRI8U_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
                     [(set t.RegClass:$dst, (node t.RegClass:$src2, t.RegClass:$src1, (i8 imm:$src3)))]);
 }
 
-class ShlrdOpRRC_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
-  : BinOpRR<o, m, triop_cl_args, t, (outs t.RegClass:$dst), []>, NDD<0, TB> {
+class ShlrdOpRRC_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0>
+  : BinOpRR<o, m, !if(!eq(ndd, 0), triop_cl_args, triop_cl_ndd_args), t, (outs t.RegClass:$dst), []>, NDD<ndd> {
   let Uses = [CL];
   let SchedRW = [WriteSHDrrcl];
   let Pattern = !if(!eq(m, "shld"),
@@ -163,15 +386,71 @@ class ShlrdOpMRC_M<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
                     [(store (node t.RegClass:$src2, (t.LoadNode addr:$src1), CL), addr:$src1)]);
 }
 
-multiclass Shlrd<bits<8> o1, bits<8> o2, string m, SDPatternOperator node, SDPatternOperator t_node> {
+class ShlrdOpMRI8U_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+  : ITy<o, MRMDestMem, t, (outs t.RegClass:$dst), (ins t.MemOperand:$src1, t.RegClass:$src2, u8imm:$src3),
+        m, triop_ndd_args, []>, NDD<1> {
+  let ImmT = Imm8;
+  let SchedRW = [WriteSHDmri];
+  let mayLoad = 1;
+  let Pattern = !if(!eq(m, "shld"),
+                    [(set t.RegClass:$dst, (node (t.LoadNode addr:$src1), t.RegClass:$src2, (i8 imm:$src3)))],
+                    [(set t.RegClass:$dst, (node t.RegClass:$src2, (t.LoadNode addr:$src1), (i8 imm:$src3)))]);
+}
+
+class ShlrdOpMRC_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+  : BinOpMR<o, m, triop_cl_ndd_args, t, (outs t.RegClass:$dst), []>, NDD<1> {
+  let Uses = [CL];
+  let SchedRW = [WriteSHDmrcl];
+  let Pattern = !if(!eq(m, "shld"),
+                    [(set t.RegClass:$dst, (node (t.LoadNode addr:$src1), t.RegClass:$src2, CL))],
+                    [(set t.RegClass:$dst, (node t.RegClass:$src2, (t.LoadNode addr:$src1), CL))]);
+}
+
+multiclass Shlrd<bits<8> o1, bits<8> o2, bits<8> o3, string m, SDPatternOperator node, SDPatternOperator t_node> {
+  let Predicates = [NoNDD] in {
+    def 16rri8 : ShlrdOpRRI8U_R<o1, m, Xi16, t_node>, TB, DefEFLAGS, OpSize16;
+    def 32rri8 : ShlrdOpRRI8U_R<o1, m, Xi32, node>, TB, DefEFLAGS, OpSize32;
+    def 64rri8 : ShlrdOpRRI8U_R<o1, m, Xi64, node>, TB, DefEFLAGS;
+
+    def 16rrCL : ShlrdOpRRC_R<o2, m, Xi16, t_node>, TB, DefEFLAGS, OpSize16;
+    def 32rrCL : ShlrdOpRRC_R<o2, m, Xi32, node>, TB, DefEFLAGS, OpSize32;
+    def 64rrCL : ShlrdOpRRC_R<o2, m, Xi64, node>, TB, DefEFLAGS;
+  }
+  let Predicates = [HasNDD, In64BitMode] in {
+    def 16rri8_ND : ShlrdOpRRI8U_R<o3, m, Xi16, t_node, 1>, DefEFLAGS, PD;
+    def 32rri8_ND : ShlrdOpRRI8U_R<o3, m, Xi32, node, 1>, DefEFLAGS;
+    def 64rri8_ND : ShlrdOpRRI8U_R<o3, m, Xi64, node, 1>, DefEFLAGS;
+
+    def 16rrCL_ND : ShlrdOpRRC_R<o2, m, Xi16, t_node, 1>, DefEFLAGS, PD;
+    def 32rrCL_ND : ShlrdOpRRC_R<o2, m, Xi32, node, 1>, DefEFLAGS;
+    def 64rrCL_ND : ShlrdOpRRC_R<o2, m, Xi64, node, 1>, DefEFLAGS;
+  }
 
-  def 16rri8 : ShlrdOpRRI8U_R<o1, m, Xi16, t_node>, DefEFLAGS, OpSize16;
-  def 32rri8 : ShlrdOpRRI8U_R<o1, m, Xi32, node>, DefEFLAGS, OpSize32;
-  def 64rri8 : ShlrdOpRRI8U_R<o1, m, Xi64, node>, DefEFLAGS;
+  let Predicates = [In64BitMode] in {
+    def 16rri8_NF : ShlrdOpRRI8U_R<o3, m, Xi16, null_frag>, NF, PD;
+    def 32rri8_NF : ShlrdOpRRI8U_R<o3, m, Xi32, null_frag>, NF;
+    def 64rri8_NF : ShlrdOpRRI8U_R<o3, m, Xi64, null_frag>, NF;
 
-  def 16rrCL : ShlrdOpRRC_R<o2, m, Xi16, t_node>, DefEFLAGS, OpSize16;
-  def 32rrCL : ShlrdOpRRC_R<o2, m, Xi32, node>, DefEFLAGS, OpSize32;
-  def 64rrCL : ShlrdOpRRC_R<o2, m, Xi64, node>, DefEFLAGS;
+    def 16rrCL_NF : ShlrdOpRRC_R<o2, m, Xi16, null_frag>, NF, PD;
+    def 32rrCL_NF : ShlrdOpRRC_R<o2, m, Xi32, null_frag>, NF;
+    def 64rrCL_NF : ShlrdOpRRC_R<o2, m, Xi64, null_frag>, NF;
+
+    def 16rri8_NF_ND : ShlrdOpRRI8U_R<o3, m, Xi16, null_frag, 1>, EVEX_NF, PD;
+    def 32rri8_NF_ND : ShlrdOpRRI8U_R<o3, m, Xi32, null_frag, 1>, EVEX_NF;
+    def 64rri8_NF_ND : ShlrdOpRRI8U_R<o3, m, Xi64, null_frag, 1>, EVEX_NF;
+
+    def 16rrCL_NF_ND : ShlrdOpRRC_R<o2, m, Xi16, null_frag, 1>, EVEX_NF, PD;
+    def 32rrCL_NF_ND : ShlrdOpRRC_R<o2, m, Xi32, null_frag, 1>, EVEX_NF;
+    def 64rrCL_NF_ND : ShlrdOpRRC_R<o2, m, Xi64, null_frag, 1>, EVEX_NF;
+
+    def 16rri8_EVEX : ShlrdOpRRI8U_R<o3, m, Xi16, null_frag>, DefEFLAGS, PL, PD;
+    def 32rri8_EVEX : ShlrdOpRRI8U_R<o3, m, Xi32, null_frag>, DefEFLAGS, PL;
+    def 64rri8_EVEX : ShlrdOpRRI8U_R<o3, m, Xi64, null_frag>, DefEFLAGS, PL;
+
+    def 16rrCL_EVEX : ShlrdOpRRC_R<o2, m, Xi16, null_frag>, DefEFLAGS, PL, PD;
+    def 32rrCL_EVEX : ShlrdOpRRC_R<o2, m, Xi32, null_frag>, DefEFLAGS, PL;
+    def 64rrCL_EVEX : ShlrdOpRRC_R<o2, m, Xi64, null_frag>, DefEFLAGS, PL;
+  }
 
   def 16mri8 : ShlrdOpMRI8U_M<o1, m, Xi16, t_node>, DefEFLAGS, OpSize16;
   def 32mri8 : ShlrdOpMRI8U_M<o1, m, Xi32, node>, DefEFLAGS, OpSize32;
@@ -180,10 +459,46 @@ multiclass Shlrd<bits<8> o1, bits<8> o2, string m, SDPatternOperator node, SDPat
   def 16mrCL : ShlrdOpMRC_M<o2, m, Xi16, t_node>, DefEFLAGS, OpSize16;
   def 32mrCL : ShlrdOpMRC_M<o2, m, Xi32, node>, DefEFLAGS, OpSize32;
   def 64mrCL : ShlrdOpMRC_M<o2, m, Xi64, node>, DefEFLAGS;
+
+  let Predicates = [HasNDD, In64BitMode] in {
+    def 16mri8_ND : ShlrdOpMRI8U_R<o3, m, Xi16, t_node>, DefEFLAGS, PD;
+    def 32mri8_ND : ShlrdOpMRI8U_R<o3, m, Xi32, node>, DefEFLAGS;
+    def 64mri8_ND : ShlrdOpMRI8U_R<o3, m, Xi64, node>, DefEFLAGS;
+
+    def 16mrCL_ND : ShlrdOpMRC_R<o2, m, Xi16, t_node>, DefEFLAGS, PD;
+    def 32mrCL_ND : ShlrdOpMRC_R<o2, m, Xi32, node>, DefEFLAGS;
+    def 64mrCL_ND : ShlrdOpMRC_R<o2, m, Xi64, node>, DefEFLAGS;
+  }
+
+  let Predicates = [In64BitMode] in {
+    def 16mri8_NF : ShlrdOpMRI8U_M<o3, m, Xi16, null_frag>, NF, PD;
+    def 32mri8_NF : ShlrdOpMRI8U_M<o3, m, Xi32, null_frag>, NF;
+    def 64mri8_NF : ShlrdOpMRI8U_M<o3, m, Xi64, null_frag>, NF;
+
+    def 16mrCL_NF : ShlrdOpMRC_M<o2, m, Xi16, null_frag>, NF, PD;
+    def 32mrCL_NF : ShlrdOpMRC_M<o2, m, Xi32, null_frag>, NF;
+    def 64mrCL_NF : ShlrdOpMRC_M<o2, m, Xi64, null_frag>, NF;
+
+    def 16mri8_NF_ND : ShlrdOpMRI8U_R<o3, m, Xi16, null_frag>, EVEX_NF, PD;
+    def 32mri8_NF_ND : ShlrdOpMRI8U_R<o3, m, Xi32, null_frag>, EVEX_NF;
+    def 64mri8_NF_ND : ShlrdOpMRI8U_R<o3, m, Xi64, null_frag>, EVEX_NF;
+
+    def 16mrCL_NF_ND : ShlrdOpMRC_R<o2, m, Xi16, null_frag>, EVEX_NF, PD;
+    def 32mrCL_NF_ND : ShlrdOpMRC_R<o2, m, Xi32, null_frag>, EVEX_NF;
+    def 64mrCL_NF_ND : ShlrdOpMRC_R<o2, m, Xi64, null_frag>, EVEX_NF;
+
+    def 16mri8_EVEX : ShlrdOpMRI8U_M<o3, m, Xi16, null_frag>, DefEFLAGS, PL, PD;
+    def 32mri8_EVEX : ShlrdOpMRI8U_M<o3, m, Xi32, null_frag>, DefEFLAGS, PL;
+    def 64mri8_EVEX : ShlrdOpMRI8U_M<o3, m, Xi64, null_frag>, DefEFLAGS, PL;
+
+    def 16mrCL_EVEX : ShlrdOpMRC_M<o2, m, Xi16, null_frag>, DefEFLAGS, PL, PD;
+    def 32mrCL_EVEX : ShlrdOpMRC_M<o2, m, Xi32, null_frag>, DefEFLAGS, PL;
+    def 64mrCL_EVEX : ShlrdOpMRC_M<o2, m, Xi64, null_frag>, DefEFLAGS, PL;
+  }
 }
 
-defm SHLD : Shlrd<0xA4, 0xA5, "shld", fshl, X86fshl>;
-defm SHRD : Shlrd<0xAC, 0xAD, "shrd", fshr, X86fshr>;
+defm SHLD : Shlrd<0xA4, 0xA5, 0x24, "shld", fshl, X86fshl>;
+defm SHRD : Shlrd<0xAC, 0xAD, 0x2C, "shrd", fshr, X86fshr>;
 
 // Sandy Bridge and newer Intel processors support faster rotates using
 // SHLD to avoid a partial flag update on the normal rotate instructions.
diff --git a/llvm/lib/Target/X86/X86InstrUtils.td b/llvm/lib/Target/X86/X86InstrUtils.td
index 27aeff1cd3ae2c..93827bf0817c37 100644
--- a/llvm/lib/Target/X86/X86InstrUtils.td
+++ b/llvm/lib/Target/X86/X86InstrUtils.td
@@ -100,17 +100,20 @@ defvar unaryop_ndd_args = "{$src1, $dst|$dst, $src1}";
 defvar binop_args = "{$src2, $src1|$src1, $src2}";
 defvar binop_ndd_args = "{$src2, $src1, $dst|$dst, $src1, $src2}";
 defvar binop_cl_args = "{%cl, $src1|$src1, cl}";
+defvar binop_cl_ndd_args = "{%cl, $src1, $dst|$dst, $src1, cl}";
 defvar triop_args = "{$src3, $src2, $src1|$src1, $src2, $src3}";
+defvar triop_ndd_args = "{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}";
 defvar triop_cl_args = "{%cl, $src2, $src1|$src1, $src2, cl}";
+defvar triop_cl_ndd_args = "{%cl, $src2, $src1, $dst|$dst, $src1, $src2, cl}";
 defvar tie_dst_src1 = "$src1 = $dst";
 
 // NDD - Helper for new data destination instructions
-class NDD<bit ndd, Map map = OB> {
+class NDD<bit ndd> {
   string Constraints = !if(!eq(ndd, 0), tie_dst_src1, "");
   Encoding OpEnc = !if(!eq(ndd, 0), EncNormal, EncEVEX);
   bit hasEVEX_B = ndd;
   bit hasVEX_4V = ndd;
-  Map OpMap = !if(!eq(ndd, 0), map, T_MAP4);
+  Map OpMap = !if(!eq(ndd, 0), OB, T_MAP4);
 }
 // NF - Helper for NF (no flags update) instructions
 class NF: T_MAP4, EVEX, EVEX_NF;
@@ -1067,9 +1070,10 @@ class BinOpRI_R<bits<8> o, string m, X86TypeInfo t, Format f, bit ndd = 0>
   : BinOpRI<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst),
             []>, NDD<ndd>;
 // BinOpRI8U_R - Instructions that read "reg, u8imm" and write "reg".
-class BinOpRI8U_R<string m, Format f, X86TypeInfo t, SDPatternOperator node>
+class BinOpRI8U_R<string m, Format f, X86TypeInfo t, SDPatternOperator node, bit ndd = 0>
   : ITy<0xC1, f, t, (outs t.RegClass:$dst), (ins t.RegClass:$src1, u8imm:$src2), m,
-        binop_args, [(set t.RegClass:$dst, (node t.RegClass:$src1, (i8 imm:$src2)))]>, NDD<0> {
+        !if(!eq(ndd, 0), binop_args, binop_ndd_args),
+        [(set t.RegClass:$dst, (node t.RegClass:$src1, (i8 imm:$src2)))]>, NDD<ndd> {
   let ImmT = Imm8;
 }
 // BinOpRI_RF - Instructions that read "reg, imm" and write "reg", EFLAGS.
@@ -1232,20 +1236,22 @@ class BinOpMI8<string m, string args, X86TypeInfo t, Format f, dag out>
   let ImmT = Imm8;
   let mayLoad = 1;
 }
+// BinOpMI8U - Instructions that read "[mem], u8imm".
+class BinOpMI8U<string m, string args, X86TypeInfo t, Format f, dag out, list<dag> p>
+  : ITy<0xC1, f, t, out, (ins t.MemOperand:$src1, u8imm:$src2), m, args, p> {
+  let ImmT = Imm8;
+  let mayLoad = 1;
+}
 // BinOpMI8_F - Instructions that read "[mem], imm8" and write EFLAGS only.
 class BinOpMI8_F<string m, X86TypeInfo t, Format f>
   : BinOpMI8<m, binop_args, t, f, (outs)>, Sched<[WriteALU.Folded]>, DefEFLAGS;
 // BinOpMI8_R - Instructions that read "[mem], imm8" and write "reg".
 class BinOpMI8_R<string m, X86TypeInfo t, Format f>
   : BinOpMI8<m, binop_ndd_args, t, f, (outs t.RegClass:$dst)>, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, NDD<1>;
-// BinOpMI8U_M - Instructions that read "[mem], u8imm" and write "[mem]".
-class BinOpMI8U_M<string m, Format f, X86TypeInfo t, SDPatternOperator node>
-  : ITy<0xC1, f, t, (outs), (ins t.MemOperand:$src1, u8imm:$src2), m,
-        binop_args, [(store (node (t.LoadNode addr:$src1), (i8 imm:$src2)), addr:$src1)]> {
-  let ImmT = Imm8;
-  let mayLoad = 1;
-  let mayStore = 1;
-}
+// BinOpMI8U_R - Instructions that read "[mem], u8imm" and write "reg".
+class BinOpMI8U_R<string m, Format f, X86TypeInfo t, SDPatternOperator node>
+  : BinOpMI8U<m, binop_ndd_args, t, f, (outs t.RegClass:$dst),
+              [(set t.RegClass:$dst, (node (t.LoadNode addr:$src1), (i8 imm:$src2)))]>, NDD<1>;
 // BinOpMI8_RF - Instructions that read "[mem], imm8" and write "reg"/EFLAGS.
 class BinOpMI8_RF<string m, X86TypeInfo t, Format f>
   : BinOpMI8<m, binop_ndd_args, t, f, (outs t.RegClass:$dst)>, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, DefEFLAGS, NDD<1>;
@@ -1254,6 +1260,12 @@ class BinOpMI8_M<string m, X86TypeInfo t, Format f>
   : BinOpMI8<m, binop_args, t, f, (outs)>, Sched<[WriteALURMW]> {
   let mayStore = 1;
 }
+// BinOpMI8U_M - Instructions that read "[mem], u8imm" and write "[mem]".
+class BinOpMI8U_M<string m, Format f, X86TypeInfo t, SDPatternOperator node>
+  : BinOpMI8U<m, binop_args, t, f, (outs),
+              [(store (node (t.LoadNode addr:$src1), (i8 imm:$src2)), addr:$src1)]> {
+  let mayStore = 1;
+}
 // BinOpMI8_MF - Instructions that read "[mem], imm8" and write "[mem]", EFLAGS.
 class BinOpMI8_MF<string m, X86TypeInfo t, Format f>
   : BinOpMI8<m, binop_args, t, f, (outs)>, Sched<[WriteALURMW]>, DefEFLAGS {
@@ -1296,9 +1308,10 @@ class BinOpAIF_AF<bits<8> o, string m, X86TypeInfo t, Register areg,
   let SchedRW = [WriteADC];
 }
 // BinOpRC_R - Instructions that read "reg, cl" and write reg.
-class BinOpRC_R<string m, Format f, X86TypeInfo t, SDPatternOperator node>
-  : ITy<0xD3, f, t, (outs t.RegClass:$dst), (ins t.RegClass:$src1), m, binop_cl_args,
-        [(set t.RegClass:$dst, (node t.RegClass:$src1, CL))]>, NDD<0> {
+class BinOpRC_R<string m, Format f, X86TypeInfo t, SDPatternOperator node, bit ndd = 0>
+  : ITy<0xD3, f, t, (outs t.RegClass:$dst), (ins t.RegClass:$src1), m,
+        !if(!eq(ndd, 0), binop_cl_args, binop_cl_ndd_args),
+        [(set t.RegClass:$dst, (node t.RegClass:$src1, CL))]>, NDD<ndd> {
   let Uses = [CL];
 }
 // BinOpMC_M - Instructions that read "[mem], cl" and write [mem].
@@ -1309,6 +1322,13 @@ class BinOpMC_M<string m, Format f, X86TypeInfo t, SDPatternOperator node>
   let mayLoad = 1;
   let mayStore = 1;
 }
+// BinOpMC_R - Instructions that read "[mem], cl" and write reg.
+class BinOpMC_R<string m, Format f, X86TypeInfo t, SDPatternOperator node>
+  : ITy<0xD3, f, t, (outs t.RegClass:$dst), (ins t.MemOperand:$src1), m, binop_cl_ndd_args,
+        [(set t.RegClass:$dst, (node (t.LoadNode addr:$src1), CL))]>, NDD<1> {
+  let Uses = [CL];
+  let mayLoad = 1;
+}
 
 // UnaryOpR - Instructions that read "reg".
 class UnaryOpR<bits<8> o, Format f, string m, string args, X86TypeInfo t,
diff --git a/llvm/test/CodeGen/X86/apx/rol.ll b/llvm/test/CodeGen/X86/apx/rol.ll
new file mode 100644
index 00000000000000..f41c17ffbf6736
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/rol.ll
@@ -0,0 +1,530 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs --show-mc-encoding | FileCheck %s
+
+define i8 @rol8m1(ptr %ptr) {
+; CHECK-LABEL: rol8m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolb $1, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0x07,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %0 = shl i8 %a, 1
+  %1 = lshr i8 %a, 7
+  %rol = or i8 %0, %1
+  ret i8 %rol
+}
+
+define i16 @rol16m1(ptr %ptr) {
+; CHECK-LABEL: rol16m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolw $1, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0xc1,0x07,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %0 = shl i16 %a, 1
+  %1 = lshr i16 %a, 15
+  %rol = or i16 %0, %1
+  ret i16 %rol
+}
+
+define i32 @rol32m1(ptr %ptr) {
+; CHECK-LABEL: rol32m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    roll $1, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0x07,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %0 = shl i32 %a, 1
+  %1 = lshr i32 %a, 31
+  %rol = or i32 %0, %1
+  ret i32 %rol
+}
+
+define i64 @rol64m1(ptr %ptr) {
+; CHECK-LABEL: rol64m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolq $1, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0x07,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %0 = shl i64 %a, 1
+  %1 = lshr i64 %a, 63
+  %rol = or i64 %0, %1
+  ret i64 %rol
+}
+
+define i8 @rol8mcl(ptr %ptr, i8 %cl) {
+; CHECK-LABEL: rol8mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    rolb %cl, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %0 = shl i8 %a, %cl
+  %1 = sub i8 8, %cl
+  %2 = lshr i8 %a, %1
+  %rol = or i8 %0, %2
+  ret i8 %rol
+}
+
+define i16 @rol16mcl(ptr %ptr, i16 %cl) {
+; CHECK-LABEL: rol16mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    rolw %cl, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0xd3,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %0 = shl i16 %a, %cl
+  %1 = sub i16 16, %cl
+  %2 = lshr i16 %a, %1
+  %rol = or i16 %0, %2
+  ret i16 %rol
+}
+
+define i32 @rol32mcl(ptr %ptr, i32 %cl) {
+; CHECK-LABEL: rol32mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    roll %cl, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %0 = shl i32 %a, %cl
+  %1 = sub i32 32, %cl
+  %2 = lshr i32 %a, %1
+  %rol = or i32 %0, %2
+  ret i32 %rol
+}
+
+define i64 @rol64mcl(ptr %ptr, i64 %cl) {
+; CHECK-LABEL: rol64mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    rolq %cl, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %0 = shl i64 %a, %cl
+  %1 = sub i64 64, %cl
+  %2 = lshr i64 %a, %1
+  %rol = or i64 %0, %2
+  ret i64 %rol
+}
+
+define i8 @rol8mi(ptr %ptr) {
+; CHECK-LABEL: rol8mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolb $3, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0x07,0x03]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %0 = shl i8 %a, 3
+  %1 = lshr i8 %a, 5
+  %rol = or i8 %0, %1
+  ret i8 %rol
+}
+
+define i16 @rol16mi(ptr %ptr) {
+; CHECK-LABEL: rol16mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolw $3, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0xc1,0x07,0x03]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %0 = shl i16 %a, 3
+  %1 = lshr i16 %a, 13
+  %rol = or i16 %0, %1
+  ret i16 %rol
+}
+
+define i32 @rol32mi(ptr %ptr) {
+; CHECK-LABEL: rol32mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    roll $3, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0x07,0x03]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %0 = shl i32 %a, 3
+  %1 = lshr i32 %a, 29
+  %rol = or i32 %0, %1
+  ret i32 %rol
+}
+
+define i64 @rol64mi(ptr %ptr) {
+; CHECK-LABEL: rol64mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolq $3, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0x07,0x03]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %0 = shl i64 %a, 3
+  %1 = lshr i64 %a, 61
+  %rol = or i64 %0, %1
+  ret i64 %rol
+}
+
+define i8 @rol8r1(i8 noundef %a) {
+; CHECK-LABEL: rol8r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolb $1, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0xc7,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = shl i8 %a, 1
+  %1 = lshr i8 %a, 7
+  %rol = or i8 %0, %1
+  ret i8 %rol
+}
+
+define i16 @rol16r1(i16 noundef %a) {
+; CHECK-LABEL: rol16r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolw $1, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xc1,0xc7,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = shl i16 %a, 1
+  %1 = lshr i16 %a, 15
+  %rol = or i16 %0, %1
+  ret i16 %rol
+}
+
+define i32 @rol32r1(i32 noundef %a) {
+; CHECK-LABEL: rol32r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    roll $1, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xc7,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = shl i32 %a, 1
+  %1 = lshr i32 %a, 31
+  %rol = or i32 %0, %1
+  ret i32 %rol
+}
+
+define i64 @rol64r1(i64 noundef %a) {
+; CHECK-LABEL: rol64r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolq $1, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xc7,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = shl i64 %a, 1
+  %1 = lshr i64 %a, 63
+  %rol = or i64 %0, %1
+  ret i64 %rol
+}
+
+define i8 @rol8rcl(i8 noundef %a, i8 %cl) {
+; CHECK-LABEL: rol8rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    rolb %cl, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0xc7]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = shl i8 %a, %cl
+  %1 = sub i8 8, %cl
+  %2 = lshr i8 %a, %1
+  %rol = or i8 %0, %2
+  ret i8 %rol
+}
+
+define i16 @rol16rcl(i16 noundef %a, i16 %cl) {
+; CHECK-LABEL: rol16rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    rolw %cl, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xd3,0xc7]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = shl i16 %a, %cl
+  %1 = sub i16 16, %cl
+  %2 = lshr i16 %a, %1
+  %rol = or i16 %0, %2
+  ret i16 %rol
+}
+
+define i32 @rol32rcl(i32 noundef %a, i32 %cl) {
+; CHECK-LABEL: rol32rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    roll %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xc7]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = shl i32 %a, %cl
+  %1 = sub i32 32, %cl
+  %2 = lshr i32 %a, %1
+  %rol = or i32 %0, %2
+  ret i32 %rol
+}
+
+define i64 @rol64rcl(i64 noundef %a, i64 %cl) {
+; CHECK-LABEL: rol64rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    rolq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xc7]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = shl i64 %a, %cl
+  %1 = sub i64 64, %cl
+  %2 = lshr i64 %a, %1
+  %rol = or i64 %0, %2
+  ret i64 %rol
+}
+
+define i8 @rol8ri(i8 noundef %a) {
+; CHECK-LABEL: rol8ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolb $3, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0xc7,0x03]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = shl i8 %a, 3
+  %1 = lshr i8 %a, 5
+  %rol = or i8 %0, %1
+  ret i8 %rol
+}
+
+define i16 @rol16ri(i16 noundef %a) {
+; CHECK-LABEL: rol16ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolw $3, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xc1,0xc7,0x03]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = shl i16 %a, 3
+  %1 = lshr i16 %a, 13
+  %rol = or i16 %0, %1
+  ret i16 %rol
+}
+
+define i32 @rol32ri(i32 noundef %a) {
+; CHECK-LABEL: rol32ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    roll $3, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xc7,0x03]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = shl i32 %a, 3
+  %1 = lshr i32 %a, 29
+  %rol = or i32 %0, %1
+  ret i32 %rol
+}
+
+define i64 @rol64ri(i64 noundef %a) {
+; CHECK-LABEL: rol64ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolq $3, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xc7,0x03]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = shl i64 %a, 3
+  %1 = lshr i64 %a, 61
+  %rol = or i64 %0, %1
+  ret i64 %rol
+}
+
+define void @rol8m1_legacy(ptr %ptr) {
+; CHECK-LABEL: rol8m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolb (%rdi) # encoding: [0xd0,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %0 = shl i8 %a, 1
+  %1 = lshr i8 %a, 7
+  %rol = or i8 %0, %1
+  store i8 %rol, ptr %ptr
+  ret void
+}
+
+define void @rol16m1_legacy(ptr %ptr) {
+; CHECK-LABEL: rol16m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
+; CHECK-NEXT:    addl %eax, %eax, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x01,0xc0]
+; CHECK-NEXT:    shrl $7, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe8,0x07]
+; CHECK-NEXT:    orl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x09,0xc8]
+; CHECK-NEXT:    movw %ax, (%rdi) # encoding: [0x66,0x89,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %0 = shl i16 %a, 1
+  %1 = lshr i16 %a, 7
+  %rol = or i16 %0, %1
+  store i16 %rol, ptr %ptr
+  ret void
+}
+
+define void @rol32m1_legacy(ptr %ptr) {
+; CHECK-LABEL: rol32m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl (%rdi), %eax # encoding: [0x8b,0x07]
+; CHECK-NEXT:    addl %eax, %eax, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x01,0xc0]
+; CHECK-NEXT:    shrl $7, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe8,0x07]
+; CHECK-NEXT:    orl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x09,0xc8]
+; CHECK-NEXT:    movl %eax, (%rdi) # encoding: [0x89,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %0 = shl i32 %a, 1
+  %1 = lshr i32 %a, 7
+  %rol = or i32 %0, %1
+  store i32 %rol, ptr %ptr
+  ret void
+}
+
+define void @rol64m1_legacy(ptr %ptr) {
+; CHECK-LABEL: rol64m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq (%rdi), %rax # encoding: [0x48,0x8b,0x07]
+; CHECK-NEXT:    addq %rax, %rax, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x01,0xc0]
+; CHECK-NEXT:    shrq $7, %rax # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xe8,0x07]
+; CHECK-NEXT:    orq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x09,0xc8]
+; CHECK-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %0 = shl i64 %a, 1
+  %1 = lshr i64 %a, 7
+  %rol = or i64 %0, %1
+  store i64 %rol, ptr %ptr
+  ret void
+}
+
+define void @rol8mcl_legacy(ptr %ptr, i8 %cl) {
+; CHECK-LABEL: rol8mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    rolb %cl, (%rdi) # encoding: [0xd2,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %0 = shl i8 %a, %cl
+  %1 = sub i8 8, %cl
+  %2 = lshr i8 %a, %1
+  %rol = or i8 %0, %2
+  store i8 %rol, ptr %ptr
+  ret void
+}
+
+define void @rol16mcl_legacy(ptr %ptr, i16 %cl) {
+; CHECK-LABEL: rol16mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    rolw %cl, (%rdi) # encoding: [0x66,0xd3,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %0 = shl i16 %a, %cl
+  %1 = sub i16 16, %cl
+  %2 = lshr i16 %a, %1
+  %rol = or i16 %0, %2
+  store i16 %rol, ptr %ptr
+  ret void
+}
+
+define void @rol32mcl_legacy(ptr %ptr, i32 %cl) {
+; CHECK-LABEL: rol32mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    roll %cl, (%rdi) # encoding: [0xd3,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %0 = shl i32 %a, %cl
+  %1 = sub i32 32, %cl
+  %2 = lshr i32 %a, %1
+  %rol = or i32 %0, %2
+  store i32 %rol, ptr %ptr
+  ret void
+}
+
+define void @rol64mcl_legacy(ptr %ptr, i64 %cl) {
+; CHECK-LABEL: rol64mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    rolq %cl, (%rdi) # encoding: [0x48,0xd3,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %0 = shl i64 %a, %cl
+  %1 = sub i64 64, %cl
+  %2 = lshr i64 %a, %1
+  %rol = or i64 %0, %2
+  store i64 %rol, ptr %ptr
+  ret void
+}
+
+define void @rol8mi_legacy(ptr %ptr) {
+; CHECK-LABEL: rol8mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolb $3, (%rdi) # encoding: [0xc0,0x07,0x03]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %0 = shl i8 %a, 3
+  %1 = lshr i8 %a, 5
+  %rol = or i8 %0, %1
+  store i8 %rol, ptr %ptr
+  ret void
+}
+
+define void @rol16mi_legacy(ptr %ptr) {
+; CHECK-LABEL: rol16mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
+; CHECK-NEXT:    shll $3, %eax, %ecx # encoding: [0x62,0xf4,0x74,0x18,0xc1,0xe0,0x03]
+; CHECK-NEXT:    shrl $5, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe8,0x05]
+; CHECK-NEXT:    orl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x09,0xc8]
+; CHECK-NEXT:    movw %ax, (%rdi) # encoding: [0x66,0x89,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %0 = shl i16 %a, 3
+  %1 = lshr i16 %a, 5
+  %rol = or i16 %0, %1
+  store i16 %rol, ptr %ptr
+  ret void
+}
+
+define void @rol32mi_legacy(ptr %ptr) {
+; CHECK-LABEL: rol32mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl (%rdi), %eax # encoding: [0x8b,0x07]
+; CHECK-NEXT:    shll $3, %eax, %ecx # encoding: [0x62,0xf4,0x74,0x18,0xc1,0xe0,0x03]
+; CHECK-NEXT:    shrl $5, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe8,0x05]
+; CHECK-NEXT:    orl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x09,0xc8]
+; CHECK-NEXT:    movl %eax, (%rdi) # encoding: [0x89,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %0 = shl i32 %a, 3
+  %1 = lshr i32 %a, 5
+  %rol = or i32 %0, %1
+  store i32 %rol, ptr %ptr
+  ret void
+}
+
+define void @rol64mi_legacy(ptr %ptr) {
+; CHECK-LABEL: rol64mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq (%rdi), %rax # encoding: [0x48,0x8b,0x07]
+; CHECK-NEXT:    shlq $3, %rax, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xe0,0x03]
+; CHECK-NEXT:    shrq $5, %rax # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xe8,0x05]
+; CHECK-NEXT:    orq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x09,0xc8]
+; CHECK-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %0 = shl i64 %a, 3
+  %1 = lshr i64 %a, 5
+  %rol = or i64 %0, %1
+  store i64 %rol, ptr %ptr
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/apx/ror.ll b/llvm/test/CodeGen/X86/apx/ror.ll
new file mode 100644
index 00000000000000..8a974c11a60e67
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/ror.ll
@@ -0,0 +1,530 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs --show-mc-encoding | FileCheck %s
+
+define i8 @ror8m1(ptr %ptr) {
+; CHECK-LABEL: ror8m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rorb (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xd0,0x0f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %0 = lshr i8 %a, 1
+  %1 = shl i8 %a, 7
+  %ror = or i8 %0, %1
+  ret i8 %ror
+}
+
+define i16 @ror16m1(ptr %ptr) {
+; CHECK-LABEL: ror16m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rorw (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0xd1,0x0f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %0 = lshr i16 %a, 1
+  %1 = shl i16 %a, 15
+  %ror = or i16 %0, %1
+  ret i16 %ror
+}
+
+define i32 @ror32m1(ptr %ptr) {
+; CHECK-LABEL: ror32m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rorl (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd1,0x0f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %0 = lshr i32 %a, 1
+  %1 = shl i32 %a, 31
+  %ror = or i32 %0, %1
+  ret i32 %ror
+}
+
+define i64 @ror64m1(ptr %ptr) {
+; CHECK-LABEL: ror64m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rorq (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd1,0x0f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %0 = lshr i64 %a, 1
+  %1 = shl i64 %a, 63
+  %ror = or i64 %0, %1
+  ret i64 %ror
+}
+
+define i8 @ror8mcl(ptr %ptr, i8 %cl) {
+; CHECK-LABEL: ror8mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    rorb %cl, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0x0f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %0 = lshr i8 %a, %cl
+  %1 = sub i8 8, %cl
+  %2 = shl i8 %a, %1
+  %ror = or i8 %0, %2
+  ret i8 %ror
+}
+
+define i16 @ror16mcl(ptr %ptr, i16 %cl) {
+; CHECK-LABEL: ror16mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    rorw %cl, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0xd3,0x0f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %0 = lshr i16 %a, %cl
+  %1 = sub i16 16, %cl
+  %2 = shl i16 %a, %1
+  %ror = or i16 %0, %2
+  ret i16 %ror
+}
+
+define i32 @ror32mcl(ptr %ptr, i32 %cl) {
+; CHECK-LABEL: ror32mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    rorl %cl, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0x0f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %0 = lshr i32 %a, %cl
+  %1 = sub i32 32, %cl
+  %2 = shl i32 %a, %1
+  %ror = or i32 %0, %2
+  ret i32 %ror
+}
+
+define i64 @ror64mcl(ptr %ptr, i64 %cl) {
+; CHECK-LABEL: ror64mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    rorq %cl, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0x0f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %0 = lshr i64 %a, %cl
+  %1 = sub i64 64, %cl
+  %2 = shl i64 %a, %1
+  %ror = or i64 %0, %2
+  ret i64 %ror
+}
+
+define i8 @ror8mi(ptr %ptr) {
+; CHECK-LABEL: ror8mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolb $5, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0x07,0x05]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %0 = lshr i8 %a, 3
+  %1 = shl i8 %a, 5
+  %ror = or i8 %0, %1
+  ret i8 %ror
+}
+
+define i16 @ror16mi(ptr %ptr) {
+; CHECK-LABEL: ror16mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolw $13, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0xc1,0x07,0x0d]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %0 = lshr i16 %a, 3
+  %1 = shl i16 %a, 13
+  %ror = or i16 %0, %1
+  ret i16 %ror
+}
+
+define i32 @ror32mi(ptr %ptr) {
+; CHECK-LABEL: ror32mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    roll $29, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0x07,0x1d]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %0 = lshr i32 %a, 3
+  %1 = shl i32 %a, 29
+  %ror = or i32 %0, %1
+  ret i32 %ror
+}
+
+define i64 @ror64mi(ptr %ptr) {
+; CHECK-LABEL: ror64mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolq $61, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0x07,0x3d]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %0 = lshr i64 %a, 3
+  %1 = shl i64 %a, 61
+  %ror = or i64 %0, %1
+  ret i64 %ror
+}
+
+define i8 @ror8r1(i8 noundef %a) {
+; CHECK-LABEL: ror8r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rorb %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xd0,0xcf]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = lshr i8 %a, 1
+  %1 = shl i8 %a, 7
+  %ror = or i8 %0, %1
+  ret i8 %ror
+}
+
+define i16 @ror16r1(i16 noundef %a) {
+; CHECK-LABEL: ror16r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rorw %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xd1,0xcf]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = lshr i16 %a, 1
+  %1 = shl i16 %a, 15
+  %ror = or i16 %0, %1
+  ret i16 %ror
+}
+
+define i32 @ror32r1(i32 noundef %a) {
+; CHECK-LABEL: ror32r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rorl %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd1,0xcf]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = lshr i32 %a, 1
+  %1 = shl i32 %a, 31
+  %ror = or i32 %0, %1
+  ret i32 %ror
+}
+
+define i64 @ror64r1(i64 noundef %a) {
+; CHECK-LABEL: ror64r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rorq %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd1,0xcf]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = lshr i64 %a, 1
+  %1 = shl i64 %a, 63
+  %ror = or i64 %0, %1
+  ret i64 %ror
+}
+
+define i8 @ror8rcl(i8 noundef %a, i8 %cl) {
+; CHECK-LABEL: ror8rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    rorb %cl, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0xcf]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = lshr i8 %a, %cl
+  %1 = sub i8 8, %cl
+  %2 = shl i8 %a, %1
+  %ror = or i8 %0, %2
+  ret i8 %ror
+}
+
+define i16 @ror16rcl(i16 noundef %a, i16 %cl) {
+; CHECK-LABEL: ror16rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    rorw %cl, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xd3,0xcf]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = lshr i16 %a, %cl
+  %1 = sub i16 16, %cl
+  %2 = shl i16 %a, %1
+  %ror = or i16 %0, %2
+  ret i16 %ror
+}
+
+define i32 @ror32rcl(i32 noundef %a, i32 %cl) {
+; CHECK-LABEL: ror32rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    rorl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xcf]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = lshr i32 %a, %cl
+  %1 = sub i32 32, %cl
+  %2 = shl i32 %a, %1
+  %ror = or i32 %0, %2
+  ret i32 %ror
+}
+
+define i64 @ror64rcl(i64 noundef %a, i64 %cl) {
+; CHECK-LABEL: ror64rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    rorq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xcf]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = lshr i64 %a, %cl
+  %1 = sub i64 64, %cl
+  %2 = shl i64 %a, %1
+  %ror = or i64 %0, %2
+  ret i64 %ror
+}
+
+define i8 @ror8ri(i8 noundef %a) {
+; CHECK-LABEL: ror8ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolb $5, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0xc7,0x05]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = lshr i8 %a, 3
+  %1 = shl i8 %a, 5
+  %ror = or i8 %0, %1
+  ret i8 %ror
+}
+
+define i16 @ror16ri(i16 noundef %a) {
+; CHECK-LABEL: ror16ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolw $13, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xc1,0xc7,0x0d]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = lshr i16 %a, 3
+  %1 = shl i16 %a, 13
+  %ror = or i16 %0, %1
+  ret i16 %ror
+}
+
+define i32 @ror32ri(i32 noundef %a) {
+; CHECK-LABEL: ror32ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    roll $29, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xc7,0x1d]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = lshr i32 %a, 3
+  %1 = shl i32 %a, 29
+  %ror = or i32 %0, %1
+  ret i32 %ror
+}
+
+define i64 @ror64ri(i64 noundef %a) {
+; CHECK-LABEL: ror64ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolq $61, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xc7,0x3d]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = lshr i64 %a, 3
+  %1 = shl i64 %a, 61
+  %ror = or i64 %0, %1
+  ret i64 %ror
+}
+
+define void @ror8m1_legacy(ptr %ptr) {
+; CHECK-LABEL: ror8m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rorb (%rdi) # encoding: [0xd0,0x0f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %0 = lshr i8 %a, 1
+  %1 = shl i8 %a, 7
+  %ror = or i8 %0, %1
+  store i8 %ror, ptr %ptr
+  ret void
+}
+
+define void @ror16m1_legacy(ptr %ptr) {
+; CHECK-LABEL: ror16m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
+; CHECK-NEXT:    shrl $1, %eax, %ecx # encoding: [0x62,0xf4,0x74,0x18,0xc1,0xe8,0x01]
+; CHECK-NEXT:    shll $7, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe0,0x07]
+; CHECK-NEXT:    orl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x09,0xc8]
+; CHECK-NEXT:    movw %ax, (%rdi) # encoding: [0x66,0x89,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %0 = lshr i16 %a, 1
+  %1 = shl i16 %a, 7
+  %ror = or i16 %0, %1
+  store i16 %ror, ptr %ptr
+  ret void
+}
+
+define void @ror32m1_legacy(ptr %ptr) {
+; CHECK-LABEL: ror32m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl (%rdi), %eax # encoding: [0x8b,0x07]
+; CHECK-NEXT:    shrl $1, %eax, %ecx # encoding: [0x62,0xf4,0x74,0x18,0xc1,0xe8,0x01]
+; CHECK-NEXT:    shll $7, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe0,0x07]
+; CHECK-NEXT:    orl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x09,0xc8]
+; CHECK-NEXT:    movl %eax, (%rdi) # encoding: [0x89,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %0 = lshr i32 %a, 1
+  %1 = shl i32 %a, 7
+  %ror = or i32 %0, %1
+  store i32 %ror, ptr %ptr
+  ret void
+}
+
+define void @ror64m1_legacy(ptr %ptr) {
+; CHECK-LABEL: ror64m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq (%rdi), %rax # encoding: [0x48,0x8b,0x07]
+; CHECK-NEXT:    shrq $1, %rax, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xe8,0x01]
+; CHECK-NEXT:    shlq $7, %rax # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xe0,0x07]
+; CHECK-NEXT:    orq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x09,0xc8]
+; CHECK-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %0 = lshr i64 %a, 1
+  %1 = shl i64 %a, 7
+  %ror = or i64 %0, %1
+  store i64 %ror, ptr %ptr
+  ret void
+}
+
+define void @ror8mcl_legacy(ptr %ptr, i8 %cl) {
+; CHECK-LABEL: ror8mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    rorb %cl, (%rdi) # encoding: [0xd2,0x0f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %0 = lshr i8 %a, %cl
+  %1 = sub i8 8, %cl
+  %2 = shl i8 %a, %1
+  %ror = or i8 %0, %2
+  store i8 %ror, ptr %ptr
+  ret void
+}
+
+define void @ror16mcl_legacy(ptr %ptr, i16 %cl) {
+; CHECK-LABEL: ror16mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    rorw %cl, (%rdi) # encoding: [0x66,0xd3,0x0f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %0 = lshr i16 %a, %cl
+  %1 = sub i16 16, %cl
+  %2 = shl i16 %a, %1
+  %ror = or i16 %0, %2
+  store i16 %ror, ptr %ptr
+  ret void
+}
+
+define void @ror32mcl_legacy(ptr %ptr, i32 %cl) {
+; CHECK-LABEL: ror32mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    rorl %cl, (%rdi) # encoding: [0xd3,0x0f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %0 = lshr i32 %a, %cl
+  %1 = sub i32 32, %cl
+  %2 = shl i32 %a, %1
+  %ror = or i32 %0, %2
+  store i32 %ror, ptr %ptr
+  ret void
+}
+
+define void @ror64mcl_legacy(ptr %ptr, i64 %cl) {
+; CHECK-LABEL: ror64mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    rorq %cl, (%rdi) # encoding: [0x48,0xd3,0x0f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %0 = lshr i64 %a, %cl
+  %1 = sub i64 64, %cl
+  %2 = shl i64 %a, %1
+  %ror = or i64 %0, %2
+  store i64 %ror, ptr %ptr
+  ret void
+}
+
+define void @ror8mi_legacy(ptr %ptr) {
+; CHECK-LABEL: ror8mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rolb $5, (%rdi) # encoding: [0xc0,0x07,0x05]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %0 = lshr i8 %a, 3
+  %1 = shl i8 %a, 5
+  %ror = or i8 %0, %1
+  store i8 %ror, ptr %ptr
+  ret void
+}
+
+define void @ror16mi_legacy(ptr %ptr) {
+; CHECK-LABEL: ror16mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
+; CHECK-NEXT:    shrl $3, %eax, %ecx # encoding: [0x62,0xf4,0x74,0x18,0xc1,0xe8,0x03]
+; CHECK-NEXT:    shll $5, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe0,0x05]
+; CHECK-NEXT:    orl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x09,0xc8]
+; CHECK-NEXT:    movw %ax, (%rdi) # encoding: [0x66,0x89,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %0 = lshr i16 %a, 3
+  %1 = shl i16 %a, 5
+  %ror = or i16 %0, %1
+  store i16 %ror, ptr %ptr
+  ret void
+}
+
+define void @ror32mi_legacy(ptr %ptr) {
+; CHECK-LABEL: ror32mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl (%rdi), %eax # encoding: [0x8b,0x07]
+; CHECK-NEXT:    shrl $3, %eax, %ecx # encoding: [0x62,0xf4,0x74,0x18,0xc1,0xe8,0x03]
+; CHECK-NEXT:    shll $5, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe0,0x05]
+; CHECK-NEXT:    orl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x09,0xc8]
+; CHECK-NEXT:    movl %eax, (%rdi) # encoding: [0x89,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %0 = lshr i32 %a, 3
+  %1 = shl i32 %a, 5
+  %ror = or i32 %0, %1
+  store i32 %ror, ptr %ptr
+  ret void
+}
+
+define void @ror64mi_legacy(ptr %ptr) {
+; CHECK-LABEL: ror64mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq (%rdi), %rax # encoding: [0x48,0x8b,0x07]
+; CHECK-NEXT:    shrq $3, %rax, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xe8,0x03]
+; CHECK-NEXT:    shlq $5, %rax # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xe0,0x05]
+; CHECK-NEXT:    orq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x09,0xc8]
+; CHECK-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %0 = lshr i64 %a, 3
+  %1 = shl i64 %a, 5
+  %ror = or i64 %0, %1
+  store i64 %ror, ptr %ptr
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/apx/sar.ll b/llvm/test/CodeGen/X86/apx/sar.ll
new file mode 100644
index 00000000000000..901d80f67a9c98
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/sar.ll
@@ -0,0 +1,434 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs --show-mc-encoding | FileCheck %s
+
+define i8 @sar8m1(ptr %ptr) {
+; CHECK-LABEL: sar8m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarb $1, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0x3f,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %sar = ashr i8 %a, 1
+  ret i8 %sar
+}
+
+define i16 @sar16m1(ptr %ptr) {
+; CHECK-LABEL: sar16m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movswl (%rdi), %eax # encoding: [0x0f,0xbf,0x07]
+; CHECK-NEXT:    shrl %eax # EVEX TO LEGACY Compression encoding: [0xd1,0xe8]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %sar = ashr i16 %a, 1
+  ret i16 %sar
+}
+
+define i32 @sar32m1(ptr %ptr) {
+; CHECK-LABEL: sar32m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarl $1, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0x3f,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %sar = ashr i32 %a, 1
+  ret i32 %sar
+}
+
+define i64 @sar64m1(ptr %ptr) {
+; CHECK-LABEL: sar64m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarq $1, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0x3f,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %sar = ashr i64 %a, 1
+  ret i64 %sar
+}
+
+define i8 @sar8mcl(ptr %ptr, i8 %cl) {
+; CHECK-LABEL: sar8mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    sarb %cl, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0x3f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %sar = ashr i8 %a, %cl
+  ret i8 %sar
+}
+
+define i16 @sar16mcl(ptr %ptr, i16 %cl) {
+; CHECK-LABEL: sar16mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    movswl (%rdi), %eax # encoding: [0x0f,0xbf,0x07]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    sarl %cl, %eax # EVEX TO LEGACY Compression encoding: [0xd3,0xf8]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %sar = ashr i16 %a, %cl
+  ret i16 %sar
+}
+
+define i32 @sar32mcl(ptr %ptr, i32 %cl) {
+; CHECK-LABEL: sar32mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    sarl %cl, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0x3f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %sar = ashr i32 %a, %cl
+  ret i32 %sar
+}
+
+define i64 @sar64mcl(ptr %ptr, i64 %cl) {
+; CHECK-LABEL: sar64mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    sarq %cl, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0x3f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %sar = ashr i64 %a, %cl
+  ret i64 %sar
+}
+
+define i8 @sar8mi(ptr %ptr) {
+; CHECK-LABEL: sar8mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarb $4, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0x3f,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %sar = ashr i8 %a, 4
+  ret i8 %sar
+}
+
+define i16 @sar16mi(ptr %ptr) {
+; CHECK-LABEL: sar16mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movswl (%rdi), %eax # encoding: [0x0f,0xbf,0x07]
+; CHECK-NEXT:    shrl $4, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe8,0x04]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %sar = ashr i16 %a, 4
+  ret i16 %sar
+}
+
+define i32 @sar32mi(ptr %ptr) {
+; CHECK-LABEL: sar32mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarl $4, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0x3f,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %sar = ashr i32 %a, 4
+  ret i32 %sar
+}
+
+define i64 @sar64mi(ptr %ptr) {
+; CHECK-LABEL: sar64mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarq $4, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0x3f,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %sar = ashr i64 %a, 4
+  ret i64 %sar
+}
+
+define i8 @sar8r1(i8 noundef %a) {
+; CHECK-LABEL: sar8r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarb $1, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0xff,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %sar = ashr i8 %a, 1
+  ret i8 %sar
+}
+
+define i16 @sar16r1(i16 noundef %a) {
+; CHECK-LABEL: sar16r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movswl %di, %eax # encoding: [0x0f,0xbf,0xc7]
+; CHECK-NEXT:    shrl %eax # EVEX TO LEGACY Compression encoding: [0xd1,0xe8]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %sar = ashr i16 %a, 1
+  ret i16 %sar
+}
+
+define i32 @sar32r1(i32 noundef %a) {
+; CHECK-LABEL: sar32r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarl $1, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xff,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %sar = ashr i32 %a, 1
+  ret i32 %sar
+}
+
+define i64 @sar64r1(i64 noundef %a) {
+; CHECK-LABEL: sar64r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarq $1, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xff,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %sar = ashr i64 %a, 1
+  ret i64 %sar
+}
+
+define i8 @sar8rcl(i8 noundef %a, i8 %cl) {
+; CHECK-LABEL: sar8rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    sarb %cl, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0xff]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %sar = ashr i8 %a, %cl
+  ret i8 %sar
+}
+
+define i16 @sar16rcl(i16 noundef %a, i16 %cl) {
+; CHECK-LABEL: sar16rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    movswl %di, %eax # encoding: [0x0f,0xbf,0xc7]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    sarl %cl, %eax # EVEX TO LEGACY Compression encoding: [0xd3,0xf8]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %sar = ashr i16 %a, %cl
+  ret i16 %sar
+}
+
+define i32 @sar32rcl(i32 noundef %a, i32 %cl) {
+; CHECK-LABEL: sar32rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    sarl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xff]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %sar = ashr i32 %a, %cl
+  ret i32 %sar
+}
+
+define i64 @sar64rcl(i64 noundef %a, i64 %cl) {
+; CHECK-LABEL: sar64rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    sarq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xff]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %sar = ashr i64 %a, %cl
+  ret i64 %sar
+}
+
+define i8 @sar8ri(i8 noundef %a) {
+; CHECK-LABEL: sar8ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarb $4, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0xff,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %sar = ashr i8 %a, 4
+  ret i8 %sar
+}
+
+define i16 @sar16ri(i16 noundef %a) {
+; CHECK-LABEL: sar16ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movswl %di, %eax # encoding: [0x0f,0xbf,0xc7]
+; CHECK-NEXT:    shrl $4, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe8,0x04]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %sar = ashr i16 %a, 4
+  ret i16 %sar
+}
+
+define i32 @sar32ri(i32 noundef %a) {
+; CHECK-LABEL: sar32ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarl $4, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xff,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %sar = ashr i32 %a, 4
+  ret i32 %sar
+}
+
+define i64 @sar64ri(i64 noundef %a) {
+; CHECK-LABEL: sar64ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarq $4, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xff,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %sar = ashr i64 %a, 4
+  ret i64 %sar
+}
+
+define void @sar8m1_legacy(ptr %ptr) {
+; CHECK-LABEL: sar8m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarb (%rdi) # encoding: [0xd0,0x3f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %sar = ashr i8 %a, 1
+  store i8 %sar, ptr %ptr
+  ret void
+}
+
+define void @sar16m1_legacy(ptr %ptr) {
+; CHECK-LABEL: sar16m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarw (%rdi) # encoding: [0x66,0xd1,0x3f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %sar = ashr i16 %a, 1
+  store i16 %sar, ptr %ptr
+  ret void
+}
+
+define void @sar32m1_legacy(ptr %ptr) {
+; CHECK-LABEL: sar32m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarl (%rdi) # encoding: [0xd1,0x3f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %sar = ashr i32 %a, 1
+  store i32 %sar, ptr %ptr
+  ret void
+}
+
+define void @sar64m1_legacy(ptr %ptr) {
+; CHECK-LABEL: sar64m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarq (%rdi) # encoding: [0x48,0xd1,0x3f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %sar = ashr i64 %a, 1
+  store i64 %sar, ptr %ptr
+  ret void
+}
+
+define void @sar8mcl_legacy(ptr %ptr, i8 %cl) {
+; CHECK-LABEL: sar8mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    sarb %cl, (%rdi) # encoding: [0xd2,0x3f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %sar = ashr i8 %a, %cl
+  store i8 %sar, ptr %ptr
+  ret void
+}
+
+define void @sar16mcl_legacy(ptr %ptr, i16 %cl) {
+; CHECK-LABEL: sar16mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    sarw %cl, (%rdi) # encoding: [0x66,0xd3,0x3f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %sar = ashr i16 %a, %cl
+  store i16 %sar, ptr %ptr
+  ret void
+}
+
+define void @sar32mcl_legacy(ptr %ptr, i32 %cl) {
+; CHECK-LABEL: sar32mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    sarl %cl, (%rdi) # encoding: [0xd3,0x3f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %sar = ashr i32 %a, %cl
+  store i32 %sar, ptr %ptr
+  ret void
+}
+
+define void @sar64mcl_legacy(ptr %ptr, i64 %cl) {
+; CHECK-LABEL: sar64mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    sarq %cl, (%rdi) # encoding: [0x48,0xd3,0x3f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %sar = ashr i64 %a, %cl
+  store i64 %sar, ptr %ptr
+  ret void
+}
+
+define void @sar8mi_legacy(ptr %ptr) {
+; CHECK-LABEL: sar8mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarb $4, (%rdi) # encoding: [0xc0,0x3f,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %sar = ashr i8 %a, 4
+  store i8 %sar, ptr %ptr
+  ret void
+}
+
+define void @sar16mi_legacy(ptr %ptr) {
+; CHECK-LABEL: sar16mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarw $4, (%rdi) # encoding: [0x66,0xc1,0x3f,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %sar = ashr i16 %a, 4
+  store i16 %sar, ptr %ptr
+  ret void
+}
+
+define void @sar32mi_legacy(ptr %ptr) {
+; CHECK-LABEL: sar32mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarl $4, (%rdi) # encoding: [0xc1,0x3f,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %sar = ashr i32 %a, 4
+  store i32 %sar, ptr %ptr
+  ret void
+}
+
+define void @sar64mi_legacy(ptr %ptr) {
+; CHECK-LABEL: sar64mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sarq $4, (%rdi) # encoding: [0x48,0xc1,0x3f,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %sar = ashr i64 %a, 4
+  store i64 %sar, ptr %ptr
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/apx/shl.ll b/llvm/test/CodeGen/X86/apx/shl.ll
new file mode 100644
index 00000000000000..2ba418b8e3f547
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/shl.ll
@@ -0,0 +1,434 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs --show-mc-encoding | FileCheck %s
+
+define i8 @shl8ri(i8 noundef %a) {
+; CHECK-LABEL: shl8ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shlb $4, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0xe7,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shl = shl i8 %a, 4
+  ret i8 %shl
+}
+
+define i16 @shl16ri(i16 noundef %a) {
+; CHECK-LABEL: shl16ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shll $4, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xe7,0x04]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shl = shl i16 %a, 4
+  ret i16 %shl
+}
+
+define i32 @shl32ri(i32 noundef %a) {
+; CHECK-LABEL: shl32ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shll $4, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xe7,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shl = shl i32 %a, 4
+  ret i32 %shl
+}
+
+define i64 @shl64ri(i64 noundef %a) {
+; CHECK-LABEL: shl64ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shlq $4, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xe7,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shl = shl i64 %a, 4
+  ret i64 %shl
+}
+
+define i8 @shl8m1(ptr %ptr) {
+; CHECK-LABEL: shl8m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzbl (%rdi), %eax # encoding: [0x0f,0xb6,0x07]
+; CHECK-NEXT:    addb %al, %al # EVEX TO LEGACY Compression encoding: [0x00,0xc0]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %shl = shl i8 %a, 1
+  ret i8 %shl
+}
+
+define i16 @shl16m1(ptr %ptr) {
+; CHECK-LABEL: shl16m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
+; CHECK-NEXT:    addl %eax, %eax # EVEX TO LEGACY Compression encoding: [0x01,0xc0]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %shl = shl i16 %a, 1
+  ret i16 %shl
+}
+
+define i32 @shl32m1(ptr %ptr) {
+; CHECK-LABEL: shl32m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl (%rdi), %eax # encoding: [0x8b,0x07]
+; CHECK-NEXT:    addl %eax, %eax # EVEX TO LEGACY Compression encoding: [0x01,0xc0]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %shl = shl i32 %a, 1
+  ret i32 %shl
+}
+
+define i64 @shl64m1(ptr %ptr) {
+; CHECK-LABEL: shl64m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq (%rdi), %rax # encoding: [0x48,0x8b,0x07]
+; CHECK-NEXT:    addq %rax, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x01,0xc0]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %shl = shl i64 %a, 1
+  ret i64 %shl
+}
+
+define i8 @shl8mcl(ptr %ptr, i8 %cl) {
+; CHECK-LABEL: shl8mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shlb %cl, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0x27]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %shl = shl i8 %a, %cl
+  ret i8 %shl
+}
+
+define i16 @shl16mcl(ptr %ptr, i16 %cl) {
+; CHECK-LABEL: shl16mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shll %cl, %eax # EVEX TO LEGACY Compression encoding: [0xd3,0xe0]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %shl = shl i16 %a, %cl
+  ret i16 %shl
+}
+
+define i32 @shl32mcl(ptr %ptr, i32 %cl) {
+; CHECK-LABEL: shl32mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shll %cl, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0x27]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %shl = shl i32 %a, %cl
+  ret i32 %shl
+}
+
+define i64 @shl64mcl(ptr %ptr, i64 %cl) {
+; CHECK-LABEL: shl64mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0x27]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %shl = shl i64 %a, %cl
+  ret i64 %shl
+}
+
+define i8 @shl8mi(ptr %ptr) {
+; CHECK-LABEL: shl8mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shlb $4, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0x27,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %shl = shl i8 %a, 4
+  ret i8 %shl
+}
+
+define i16 @shl16mi(ptr %ptr) {
+; CHECK-LABEL: shl16mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
+; CHECK-NEXT:    shll $4, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe0,0x04]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %shl = shl i16 %a, 4
+  ret i16 %shl
+}
+
+define i32 @shl32mi(ptr %ptr) {
+; CHECK-LABEL: shl32mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shll $4, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0x27,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %shl = shl i32 %a, 4
+  ret i32 %shl
+}
+
+define i64 @shl64mi(ptr %ptr) {
+; CHECK-LABEL: shl64mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shlq $4, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0x27,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %shl = shl i64 %a, 4
+  ret i64 %shl
+}
+
+define i8 @shl8r1(i8 noundef %a) {
+; CHECK-LABEL: shl8r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addb %dil, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x00,0xff]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shl = shl i8 %a, 1
+  ret i8 %shl
+}
+
+define i16 @shl16r1(i16 noundef %a) {
+; CHECK-LABEL: shl16r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addl %edi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xff]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shl = shl i16 %a, 1
+  ret i16 %shl
+}
+
+define i32 @shl32r1(i32 noundef %a) {
+; CHECK-LABEL: shl32r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addl %edi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xff]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shl = shl i32 %a, 1
+  ret i32 %shl
+}
+
+define i64 @shl64r1(i64 noundef %a) {
+; CHECK-LABEL: shl64r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addq %rdi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x01,0xff]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shl = shl i64 %a, 1
+  ret i64 %shl
+}
+
+define i8 @shl8rcl(i8 noundef %a, i8 %cl) {
+; CHECK-LABEL: shl8rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shlb %cl, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0xe7]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shl = shl i8 %a, %cl
+  ret i8 %shl
+}
+
+define i16 @shl16rcl(i16 noundef %a, i16 %cl) {
+; CHECK-LABEL: shl16rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shll %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xe7]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shl = shl i16 %a, %cl
+  ret i16 %shl
+}
+
+define i32 @shl32rcl(i32 noundef %a, i32 %cl) {
+; CHECK-LABEL: shl32rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shll %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xe7]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shl = shl i32 %a, %cl
+  ret i32 %shl
+}
+
+define i64 @shl64rcl(i64 noundef %a, i64 %cl) {
+; CHECK-LABEL: shl64rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xe7]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shl = shl i64 %a, %cl
+  ret i64 %shl
+}
+
+define void @shl8m1_legacy(ptr %ptr) {
+; CHECK-LABEL: shl8m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shlb (%rdi) # encoding: [0xd0,0x27]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %shl = shl i8 %a, 1
+  store i8 %shl, ptr %ptr
+  ret void
+}
+
+define void @shl16m1_legacy(ptr %ptr) {
+; CHECK-LABEL: shl16m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shlw (%rdi) # encoding: [0x66,0xd1,0x27]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %shl = shl i16 %a, 1
+  store i16 %shl, ptr %ptr
+  ret void
+}
+
+define void @shl32m1_legacy(ptr %ptr) {
+; CHECK-LABEL: shl32m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shll (%rdi) # encoding: [0xd1,0x27]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %shl = shl i32 %a, 1
+  store i32 %shl, ptr %ptr
+  ret void
+}
+
+define void @shl64m1_legacy(ptr %ptr) {
+; CHECK-LABEL: shl64m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shlq (%rdi) # encoding: [0x48,0xd1,0x27]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %shl = shl i64 %a, 1
+  store i64 %shl, ptr %ptr
+  ret void
+}
+
+define void @shl8mi_legacy(ptr %ptr) {
+; CHECK-LABEL: shl8mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shlb $4, (%rdi) # encoding: [0xc0,0x27,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %shl = shl i8 %a, 4
+  store i8 %shl, ptr %ptr
+  ret void
+}
+
+define void @shl16mi_legacy(ptr %ptr) {
+; CHECK-LABEL: shl16mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shlw $4, (%rdi) # encoding: [0x66,0xc1,0x27,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %shl = shl i16 %a, 4
+  store i16 %shl, ptr %ptr
+  ret void
+}
+
+define void @shl32mi_legacy(ptr %ptr) {
+; CHECK-LABEL: shl32mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shll $4, (%rdi) # encoding: [0xc1,0x27,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %shl = shl i32 %a, 4
+  store i32 %shl, ptr %ptr
+  ret void
+}
+
+define void @shl64mi_legacy(ptr %ptr) {
+; CHECK-LABEL: shl64mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shlq $4, (%rdi) # encoding: [0x48,0xc1,0x27,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %shl = shl i64 %a, 4
+  store i64 %shl, ptr %ptr
+  ret void
+}
+
+define void @shl8mcl_legacy(ptr %ptr, i8 %cl) {
+; CHECK-LABEL: shl8mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shlb %cl, (%rdi) # encoding: [0xd2,0x27]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %shl = shl i8 %a, %cl
+  store i8 %shl, ptr %ptr
+  ret void
+}
+
+define void @shl16mcl_legacy(ptr %ptr, i16 %cl) {
+; CHECK-LABEL: shl16mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shlw %cl, (%rdi) # encoding: [0x66,0xd3,0x27]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %shl = shl i16 %a, %cl
+  store i16 %shl, ptr %ptr
+  ret void
+}
+
+define void @shl32mcl_legacy(ptr %ptr, i32 %cl) {
+; CHECK-LABEL: shl32mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shll %cl, (%rdi) # encoding: [0xd3,0x27]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %shl = shl i32 %a, %cl
+  store i32 %shl, ptr %ptr
+  ret void
+}
+
+define void @shl64mcl_legacy(ptr %ptr, i64 %cl) {
+; CHECK-LABEL: shl64mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, (%rdi) # encoding: [0x48,0xd3,0x27]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %shl = shl i64 %a, %cl
+  store i64 %shl, ptr %ptr
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/apx/shld.ll b/llvm/test/CodeGen/X86/apx/shld.ll
new file mode 100644
index 00000000000000..a8a26ccdd87a13
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/shld.ll
@@ -0,0 +1,228 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs | FileCheck %s
+
+declare i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 %cl)
+declare i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %cl)
+declare i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %cl)
+
+define i16 @shld16rrcl(i16 noundef %a, i16 noundef %b, i8 %cl) {
+; CHECK-LABEL: shld16rrcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    andb $15, %dl, %cl
+; CHECK-NEXT:    shldw %cl, %si, %di, %ax
+; CHECK-NEXT:    retq
+entry:
+    %clin = sext i8 %cl to i16
+    %shld = call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 %clin)
+    ret i16 %shld
+}
+
+define i32 @shld32rrcl(i32 noundef %a, i32 noundef %b, i8 %cl) {
+; CHECK-LABEL: shld32rrcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shldl %cl, %esi, %edi, %eax
+; CHECK-NEXT:    retq
+entry:
+    %clin = sext i8 %cl to i32
+    %shld = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %clin)
+    ret i32 %shld
+}
+
+define i64 @shld64rrcl(i64 noundef %a, i64 noundef %b, i8 %cl) {
+; CHECK-LABEL: shld64rrcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shldq %cl, %rsi, %rdi, %rax
+; CHECK-NEXT:    retq
+entry:
+    %clin = sext i8 %cl to i64
+    %shld = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %clin)
+    ret i64 %shld
+}
+
+define i16 @shld16rri8(i16 noundef %a, i16 noundef %b) {
+; CHECK-LABEL: shld16rri8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shldw $12, %si, %di, %ax
+; CHECK-NEXT:    retq
+entry:
+    %shld = call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 12)
+    ret i16 %shld
+}
+
+define i32 @shld32rri8(i32 noundef %a, i32 noundef %b) {
+; CHECK-LABEL: shld32rri8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shldl $12, %esi, %edi, %eax
+; CHECK-NEXT:    retq
+entry:
+    %shld = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 12)
+    ret i32 %shld
+}
+
+define i64 @shld64rri8(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: shld64rri8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shldq $12, %rsi, %rdi, %rax
+; CHECK-NEXT:    retq
+entry:
+    %shld = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 12)
+    ret i64 %shld
+}
+
+define i16 @shld16mrcl(ptr %ptr, i16 noundef %b, i8 %cl) {
+; CHECK-LABEL: shld16mrcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    andb $15, %dl, %cl
+; CHECK-NEXT:    shldw %cl, %si, (%rdi), %ax
+; CHECK-NEXT:    retq
+entry:
+    %a = load i16, ptr %ptr
+    %clin = sext i8 %cl to i16
+    %shld = call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 %clin)
+    ret i16 %shld
+}
+
+define i32 @shld32mrcl(ptr %ptr, i32 noundef %b, i8 %cl) {
+; CHECK-LABEL: shld32mrcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shldl %cl, %esi, (%rdi), %eax
+; CHECK-NEXT:    retq
+entry:
+    %a = load i32, ptr %ptr
+    %clin = sext i8 %cl to i32
+    %shld = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %clin)
+    ret i32 %shld
+}
+
+define i64 @shld64mrcl(ptr %ptr, i64 noundef %b, i8 %cl) {
+; CHECK-LABEL: shld64mrcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shldq %cl, %rsi, (%rdi), %rax
+; CHECK-NEXT:    retq
+entry:
+    %a = load i64, ptr %ptr
+    %clin = sext i8 %cl to i64
+    %shld = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %clin)
+    ret i64 %shld
+}
+
+define i16 @shld16mri8(ptr %ptr, i16 noundef %b) {
+; CHECK-LABEL: shld16mri8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shldw $12, %si, (%rdi), %ax
+; CHECK-NEXT:    retq
+entry:
+    %a = load i16, ptr %ptr
+    %shld = call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 12)
+    ret i16 %shld
+}
+
+define i32 @shld32mri8(ptr %ptr, i32 noundef %b) {
+; CHECK-LABEL: shld32mri8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shldl $12, %esi, (%rdi), %eax
+; CHECK-NEXT:    retq
+entry:
+    %a = load i32, ptr %ptr
+    %shld = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 12)
+    ret i32 %shld
+}
+
+define i64 @shld64mri8(ptr %ptr, i64 noundef %b) {
+; CHECK-LABEL: shld64mri8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shldq $12, %rsi, (%rdi), %rax
+; CHECK-NEXT:    retq
+entry:
+    %a = load i64, ptr %ptr
+    %shld = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 12)
+    ret i64 %shld
+}
+
+define void @shld16mrcl_legacy(ptr %ptr, i16 noundef %b, i8 %cl) {
+; CHECK-LABEL: shld16mrcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    andb $15, %dl, %cl
+; CHECK-NEXT:    shldw %cl, %si, (%rdi)
+; CHECK-NEXT:    retq
+entry:
+    %a = load i16, ptr %ptr
+    %clin = sext i8 %cl to i16
+    %shld = call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 %clin)
+    store i16 %shld, ptr %ptr
+    ret void
+}
+
+define void @shld32mrcl_legacy(ptr %ptr, i32 noundef %b, i8 %cl) {
+; CHECK-LABEL: shld32mrcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shldl %cl, %esi, (%rdi)
+; CHECK-NEXT:    retq
+entry:
+    %a = load i32, ptr %ptr
+    %clin = sext i8 %cl to i32
+    %shld = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %clin)
+    store i32 %shld, ptr %ptr
+    ret void
+}
+
+define void @shld64mrcl_legacy(ptr %ptr, i64 noundef %b, i8 %cl) {
+; CHECK-LABEL: shld64mrcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shldq %cl, %rsi, (%rdi)
+; CHECK-NEXT:    retq
+entry:
+    %a = load i64, ptr %ptr
+    %clin = sext i8 %cl to i64
+    %shld = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %clin)
+    store i64 %shld, ptr %ptr
+    ret void
+}
+
+define void @shld16mri8_legacy(ptr %ptr, i16 noundef %b) {
+; CHECK-LABEL: shld16mri8_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shldw $12, %si, (%rdi)
+; CHECK-NEXT:    retq
+entry:
+    %a = load i16, ptr %ptr
+    %shld = call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 12)
+    store i16 %shld, ptr %ptr
+    ret void
+}
+
+define void @shld32mri8_legacy(ptr %ptr, i32 noundef %b) {
+; CHECK-LABEL: shld32mri8_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shldl $12, %esi, (%rdi)
+; CHECK-NEXT:    retq
+entry:
+    %a = load i32, ptr %ptr
+    %shld = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 12)
+    store i32 %shld, ptr %ptr
+    ret void
+}
+
+define void @shld64mri8_legacy(ptr %ptr, i64 noundef %b) {
+; CHECK-LABEL: shld64mri8_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shldq $12, %rsi, (%rdi)
+; CHECK-NEXT:    retq
+entry:
+    %a = load i64, ptr %ptr
+    %shld = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 12)
+    store i64 %shld, ptr %ptr
+    ret void
+}
diff --git a/llvm/test/CodeGen/X86/apx/shr.ll b/llvm/test/CodeGen/X86/apx/shr.ll
new file mode 100644
index 00000000000000..cd33fbb86f6524
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/shr.ll
@@ -0,0 +1,434 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs --show-mc-encoding | FileCheck %s
+
+define i8 @shr8m1(ptr %ptr) {
+; CHECK-LABEL: shr8m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrb $1, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0x2f,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %shr = lshr i8 %a, 1
+  ret i8 %shr
+}
+
+define i16 @shr16m1(ptr %ptr) {
+; CHECK-LABEL: shr16m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
+; CHECK-NEXT:    shrl %eax # EVEX TO LEGACY Compression encoding: [0xd1,0xe8]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %shr = lshr i16 %a, 1
+  ret i16 %shr
+}
+
+define i32 @shr32m1(ptr %ptr) {
+; CHECK-LABEL: shr32m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrl $1, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0x2f,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %shr = lshr i32 %a, 1
+  ret i32 %shr
+}
+
+define i64 @shr64m1(ptr %ptr) {
+; CHECK-LABEL: shr64m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrq $1, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0x2f,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %shr = lshr i64 %a, 1
+  ret i64 %shr
+}
+
+define i8 @shr8mcl(ptr %ptr, i8 %cl) {
+; CHECK-LABEL: shr8mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrb %cl, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0x2f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %shr = lshr i8 %a, %cl
+  ret i8 %shr
+}
+
+define i16 @shr16mcl(ptr %ptr, i16 %cl) {
+; CHECK-LABEL: shr16mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrl %cl, %eax # EVEX TO LEGACY Compression encoding: [0xd3,0xe8]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %shr = lshr i16 %a, %cl
+  ret i16 %shr
+}
+
+define i32 @shr32mcl(ptr %ptr, i32 %cl) {
+; CHECK-LABEL: shr32mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrl %cl, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0x2f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %shr = lshr i32 %a, %cl
+  ret i32 %shr
+}
+
+define i64 @shr64mcl(ptr %ptr, i64 %cl) {
+; CHECK-LABEL: shr64mcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shrq %cl, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0x2f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %shr = lshr i64 %a, %cl
+  ret i64 %shr
+}
+
+define i8 @shr8mi(ptr %ptr) {
+; CHECK-LABEL: shr8mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrb $4, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0x2f,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %shr = lshr i8 %a, 4
+  ret i8 %shr
+}
+
+define i16 @shr16mi(ptr %ptr) {
+; CHECK-LABEL: shr16mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
+; CHECK-NEXT:    shrl $4, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe8,0x04]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %shr = lshr i16 %a, 4
+  ret i16 %shr
+}
+
+define i32 @shr32mi(ptr %ptr) {
+; CHECK-LABEL: shr32mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrl $4, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0x2f,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %shr = lshr i32 %a, 4
+  ret i32 %shr
+}
+
+define i64 @shr64mi(ptr %ptr) {
+; CHECK-LABEL: shr64mi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrq $4, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0x2f,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %shr = lshr i64 %a, 4
+  ret i64 %shr
+}
+
+define i8 @shr8r1(i8 noundef %a) {
+; CHECK-LABEL: shr8r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrb $1, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0xef,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shr = lshr i8 %a, 1
+  ret i8 %shr
+}
+
+define i16 @shr16r1(i16 noundef %a) {
+; CHECK-LABEL: shr16r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzwl %di, %eax # encoding: [0x0f,0xb7,0xc7]
+; CHECK-NEXT:    shrl %eax # EVEX TO LEGACY Compression encoding: [0xd1,0xe8]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shr = lshr i16 %a, 1
+  ret i16 %shr
+}
+
+define i32 @shr32r1(i32 noundef %a) {
+; CHECK-LABEL: shr32r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrl $1, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xef,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shr = lshr i32 %a, 1
+  ret i32 %shr
+}
+
+define i64 @shr64r1(i64 noundef %a) {
+; CHECK-LABEL: shr64r1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrq $1, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x01]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shr = lshr i64 %a, 1
+  ret i64 %shr
+}
+
+define i8 @shr8rcl(i8 noundef %a, i8 %cl) {
+; CHECK-LABEL: shr8rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrb %cl, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0xef]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shr = lshr i8 %a, %cl
+  ret i8 %shr
+}
+
+define i16 @shr16rcl(i16 noundef %a, i16 %cl) {
+; CHECK-LABEL: shr16rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    movzwl %di, %eax # encoding: [0x0f,0xb7,0xc7]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrl %cl, %eax # EVEX TO LEGACY Compression encoding: [0xd3,0xe8]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shr = lshr i16 %a, %cl
+  ret i16 %shr
+}
+
+define i32 @shr32rcl(i32 noundef %a, i32 %cl) {
+; CHECK-LABEL: shr32rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shr = lshr i32 %a, %cl
+  ret i32 %shr
+}
+
+define i64 @shr64rcl(i64 noundef %a, i64 %cl) {
+; CHECK-LABEL: shr64rcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shr = lshr i64 %a, %cl
+  ret i64 %shr
+}
+
+define i8 @shr8ri(i8 noundef %a) {
+; CHECK-LABEL: shr8ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrb $4, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0xef,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shr = lshr i8 %a, 4
+  ret i8 %shr
+}
+
+define i16 @shr16ri(i16 noundef %a) {
+; CHECK-LABEL: shr16ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzwl %di, %eax # encoding: [0x0f,0xb7,0xc7]
+; CHECK-NEXT:    shrl $4, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe8,0x04]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shr = lshr i16 %a, 4
+  ret i16 %shr
+}
+
+define i32 @shr32ri(i32 noundef %a) {
+; CHECK-LABEL: shr32ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrl $4, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xef,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shr = lshr i32 %a, 4
+  ret i32 %shr
+}
+
+define i64 @shr64ri(i64 noundef %a) {
+; CHECK-LABEL: shr64ri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrq $4, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %shr = lshr i64 %a, 4
+  ret i64 %shr
+}
+
+define void @shr8m1_legacy(ptr %ptr) {
+; CHECK-LABEL: shr8m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrb (%rdi) # encoding: [0xd0,0x2f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %shr = lshr i8 %a, 1
+  store i8 %shr, ptr %ptr
+  ret void
+}
+
+define void @shr16m1_legacy(ptr %ptr) {
+; CHECK-LABEL: shr16m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrw (%rdi) # encoding: [0x66,0xd1,0x2f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %shr = lshr i16 %a, 1
+  store i16 %shr, ptr %ptr
+  ret void
+}
+
+define void @shr32m1_legacy(ptr %ptr) {
+; CHECK-LABEL: shr32m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrl (%rdi) # encoding: [0xd1,0x2f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %shr = lshr i32 %a, 1
+  store i32 %shr, ptr %ptr
+  ret void
+}
+
+define void @shr64m1_legacy(ptr %ptr) {
+; CHECK-LABEL: shr64m1_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrq (%rdi) # encoding: [0x48,0xd1,0x2f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %shr = lshr i64 %a, 1
+  store i64 %shr, ptr %ptr
+  ret void
+}
+
+define void @shr8mi_legacy(ptr %ptr) {
+; CHECK-LABEL: shr8mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrb $4, (%rdi) # encoding: [0xc0,0x2f,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %shr = lshr i8 %a, 4
+  store i8 %shr, ptr %ptr
+  ret void
+}
+
+define void @shr16mi_legacy(ptr %ptr) {
+; CHECK-LABEL: shr16mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrw $4, (%rdi) # encoding: [0x66,0xc1,0x2f,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %shr = lshr i16 %a, 4
+  store i16 %shr, ptr %ptr
+  ret void
+}
+
+define void @shr32mi_legacy(ptr %ptr) {
+; CHECK-LABEL: shr32mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrl $4, (%rdi) # encoding: [0xc1,0x2f,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %shr = lshr i32 %a, 4
+  store i32 %shr, ptr %ptr
+  ret void
+}
+
+define void @shr64mi_legacy(ptr %ptr) {
+; CHECK-LABEL: shr64mi_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrq $4, (%rdi) # encoding: [0x48,0xc1,0x2f,0x04]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %shr = lshr i64 %a, 4
+  store i64 %shr, ptr %ptr
+  ret void
+}
+
+define void @shr8mcl_legacy(ptr %ptr, i8 %cl) {
+; CHECK-LABEL: shr8mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrb %cl, (%rdi) # encoding: [0xd2,0x2f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i8, ptr %ptr
+  %shr = lshr i8 %a, %cl
+  store i8 %shr, ptr %ptr
+  ret void
+}
+
+define void @shr16mcl_legacy(ptr %ptr, i16 %cl) {
+; CHECK-LABEL: shr16mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrw %cl, (%rdi) # encoding: [0x66,0xd3,0x2f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i16, ptr %ptr
+  %shr = lshr i16 %a, %cl
+  store i16 %shr, ptr %ptr
+  ret void
+}
+
+define void @shr32mcl_legacy(ptr %ptr, i32 %cl) {
+; CHECK-LABEL: shr32mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrl %cl, (%rdi) # encoding: [0xd3,0x2f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i32, ptr %ptr
+  %shr = lshr i32 %a, %cl
+  store i32 %shr, ptr %ptr
+  ret void
+}
+
+define void @shr64mcl_legacy(ptr %ptr, i64 %cl) {
+; CHECK-LABEL: shr64mcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shrq %cl, (%rdi) # encoding: [0x48,0xd3,0x2f]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %a = load i64, ptr %ptr
+  %shr = lshr i64 %a, %cl
+  store i64 %shr, ptr %ptr
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/apx/shrd.ll b/llvm/test/CodeGen/X86/apx/shrd.ll
new file mode 100644
index 00000000000000..254a56a4099d78
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/shrd.ll
@@ -0,0 +1,240 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs | FileCheck %s
+
+declare i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %cl)
+declare i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %cl)
+declare i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %cl)
+
+define i16 @shrd16rrcl(i16 noundef %a, i16 noundef %b, i8 %cl) {
+; CHECK-LABEL: shrd16rrcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    andb $15, %dl, %cl
+; CHECK-NEXT:    shrdw %cl, %di, %si, %ax
+; CHECK-NEXT:    retq
+entry:
+    %clin = sext i8 %cl to i16
+    %shrd = call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %clin)
+    ret i16 %shrd
+}
+
+define i32 @shrd32rrcl(i32 noundef %a, i32 noundef %b, i8 %cl) {
+; CHECK-LABEL: shrd32rrcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrdl %cl, %edi, %esi, %eax
+; CHECK-NEXT:    retq
+entry:
+    %clin = sext i8 %cl to i32
+    %shrd = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %clin)
+    ret i32 %shrd
+}
+
+define i64 @shrd64rrcl(i64 noundef %a, i64 noundef %b, i8 %cl) {
+; CHECK-LABEL: shrd64rrcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrdq %cl, %rdi, %rsi, %rax
+; CHECK-NEXT:    retq
+entry:
+    %clin = sext i8 %cl to i64
+    %shrd = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %clin)
+    ret i64 %shrd
+}
+
+define i16 @shrd16rri8(i16 noundef %a, i16 noundef %b) {
+; CHECK-LABEL: shrd16rri8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrdw $12, %di, %si, %ax
+; CHECK-NEXT:    retq
+entry:
+    %shrd = call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 12)
+    ret i16 %shrd
+}
+
+define i32 @shrd32rri8(i32 noundef %a, i32 noundef %b) {
+; CHECK-LABEL: shrd32rri8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrdl $12, %edi, %esi, %eax
+; CHECK-NEXT:    retq
+entry:
+    %shrd = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 12)
+    ret i32 %shrd
+}
+
+define i64 @shrd64rri8(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: shrd64rri8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrdq $12, %rdi, %rsi, %rax
+; CHECK-NEXT:    retq
+entry:
+    %shrd = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 12)
+    ret i64 %shrd
+}
+
+define i16 @shrd16mrcl(ptr %ptr, i16 noundef %b, i8 %cl) {
+; CHECK-LABEL: shrd16mrcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzwl (%rdi), %eax
+; CHECK-NEXT:    andb $15, %dl, %cl
+; CHECK-NEXT:    shrdw %cl, %ax, %si, %ax
+; CHECK-NEXT:    retq
+entry:
+    %a = load i16, ptr %ptr
+    %clin = sext i8 %cl to i16
+    %shrd = call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %clin)
+    ret i16 %shrd
+}
+
+define i32 @shrd32mrcl(ptr %ptr, i32 noundef %b, i8 %cl) {
+; CHECK-LABEL: shrd32mrcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    movl (%rdi), %eax
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrdl %cl, %eax, %esi, %eax
+; CHECK-NEXT:    retq
+entry:
+    %a = load i32, ptr %ptr
+    %clin = sext i8 %cl to i32
+    %shrd = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %clin)
+    ret i32 %shrd
+}
+
+define i64 @shrd64mrcl(ptr %ptr, i64 noundef %b, i8 %cl) {
+; CHECK-LABEL: shrd64mrcl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrdq %cl, %rax, %rsi, %rax
+; CHECK-NEXT:    retq
+entry:
+    %a = load i64, ptr %ptr
+    %clin = sext i8 %cl to i64
+    %shrd = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %clin)
+    ret i64 %shrd
+}
+
+define i16 @shrd16mri8(ptr %ptr, i16 noundef %b) {
+; CHECK-LABEL: shrd16mri8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrdw $12, %si, (%rdi), %ax
+; CHECK-NEXT:    retq
+entry:
+    %a = load i16, ptr %ptr
+    %shrd = call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 12)
+    ret i16 %shrd
+}
+
+define i32 @shrd32mri8(ptr %ptr, i32 noundef %b) {
+; CHECK-LABEL: shrd32mri8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrdl $12, %esi, (%rdi), %eax
+; CHECK-NEXT:    retq
+entry:
+    %a = load i32, ptr %ptr
+    %shrd = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 12)
+    ret i32 %shrd
+}
+
+define i64 @shrd64mri8(ptr %ptr, i64 noundef %b) {
+; CHECK-LABEL: shrd64mri8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrdq $12, %rsi, (%rdi), %rax
+; CHECK-NEXT:    retq
+entry:
+    %a = load i64, ptr %ptr
+    %shrd = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 12)
+    ret i64 %shrd
+}
+
+define void @shrd16mrcl_legacy(ptr %ptr, i16 noundef %b, i8 %cl) {
+; CHECK-LABEL: shrd16mrcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzwl (%rdi), %eax
+; CHECK-NEXT:    andb $15, %dl, %cl
+; CHECK-NEXT:    shrdw %cl, %ax, %si, %ax
+; CHECK-NEXT:    movw %ax, (%rdi)
+; CHECK-NEXT:    retq
+entry:
+    %a = load i16, ptr %ptr
+    %clin = sext i8 %cl to i16
+    %shrd = call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %clin)
+    store i16 %shrd, ptr %ptr
+    ret void
+}
+
+define void @shrd32mrcl_legacy(ptr %ptr, i32 noundef %b, i8 %cl) {
+; CHECK-LABEL: shrd32mrcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    movl (%rdi), %eax
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrdl %cl, %eax, %esi, %eax
+; CHECK-NEXT:    movl %eax, (%rdi)
+; CHECK-NEXT:    retq
+entry:
+    %a = load i32, ptr %ptr
+    %clin = sext i8 %cl to i32
+    %shrd = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %clin)
+    store i32 %shrd, ptr %ptr
+    ret void
+}
+
+define void @shrd64mrcl_legacy(ptr %ptr, i64 noundef %b, i8 %cl) {
+; CHECK-LABEL: shrd64mrcl_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrdq %cl, %rax, %rsi, %rax
+; CHECK-NEXT:    movq %rax, (%rdi)
+; CHECK-NEXT:    retq
+entry:
+    %a = load i64, ptr %ptr
+    %clin = sext i8 %cl to i64
+    %shrd = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %clin)
+    store i64 %shrd, ptr %ptr
+    ret void
+}
+
+define void @shrd16mri8_legacy(ptr %ptr, i16 noundef %b) {
+; CHECK-LABEL: shrd16mri8_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrdw $12, %si, (%rdi), %ax
+; CHECK-NEXT:    movw %ax, (%rdi)
+; CHECK-NEXT:    retq
+entry:
+    %a = load i16, ptr %ptr
+    %shrd = call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 12)
+    store i16 %shrd, ptr %ptr
+    ret void
+}
+
+define void @shrd32mri8_legacy(ptr %ptr, i32 noundef %b) {
+; CHECK-LABEL: shrd32mri8_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrdl $12, %esi, (%rdi), %eax
+; CHECK-NEXT:    movl %eax, (%rdi)
+; CHECK-NEXT:    retq
+entry:
+    %a = load i32, ptr %ptr
+    %shrd = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 12)
+    store i32 %shrd, ptr %ptr
+    ret void
+}
+
+define void @shrd64mri8_legacy(ptr %ptr, i64 noundef %b) {
+; CHECK-LABEL: shrd64mri8_legacy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    shrdq $12, %rsi, (%rdi), %rax
+; CHECK-NEXT:    movq %rax, (%rdi)
+; CHECK-NEXT:    retq
+entry:
+    %a = load i64, ptr %ptr
+    %shrd = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 12)
+    store i64 %shrd, ptr %ptr
+    ret void
+}
diff --git a/llvm/test/MC/Disassembler/X86/apx/rcl.txt b/llvm/test/MC/Disassembler/X86/apx/rcl.txt
new file mode 100644
index 00000000000000..3dc9cb466d38fd
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/apx/rcl.txt
@@ -0,0 +1,194 @@
+# RUN: llvm-mc -triple x86_64 -disassemble %s | FileCheck %s --check-prefix=ATT
+# RUN: llvm-mc -triple x86_64 -disassemble -output-asm-variant=1 %s | FileCheck %s --check-prefix=INTEL
+
+# ATT:   {evex}	rclb	$123, %bl
+# INTEL: {evex}	rcl	bl, 123
+0x62,0xf4,0x7c,0x08,0xc0,0xd3,0x7b
+
+# ATT:   rclb	$123, %bl, %bl
+# INTEL: rcl	bl, bl, 123
+0x62,0xf4,0x64,0x18,0xc0,0xd3,0x7b
+
+# ATT:   {evex}	rclw	$123, %dx
+# INTEL: {evex}	rcl	dx, 123
+0x62,0xf4,0x7d,0x08,0xc1,0xd2,0x7b
+
+# ATT:   rclw	$123, %dx, %dx
+# INTEL: rcl	dx, dx, 123
+0x62,0xf4,0x6d,0x18,0xc1,0xd2,0x7b
+
+# ATT:   {evex}	rcll	$123, %ecx
+# INTEL: {evex}	rcl	ecx, 123
+0x62,0xf4,0x7c,0x08,0xc1,0xd1,0x7b
+
+# ATT:   rcll	$123, %ecx, %ecx
+# INTEL: rcl	ecx, ecx, 123
+0x62,0xf4,0x74,0x18,0xc1,0xd1,0x7b
+
+# ATT:   {evex}	rclq	$123, %r9
+# INTEL: {evex}	rcl	r9, 123
+0x62,0xd4,0xfc,0x08,0xc1,0xd1,0x7b
+
+# ATT:   rclq	$123, %r9, %r9
+# INTEL: rcl	r9, r9, 123
+0x62,0xd4,0xb4,0x18,0xc1,0xd1,0x7b
+
+# ATT:   {evex}	rclb	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	rcl	byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x08,0xc0,0x94,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   rclb	$123, 291(%r8,%rax,4), %bl
+# INTEL: rcl	bl, byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x64,0x18,0xc0,0x94,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	rclw	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	rcl	word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7d,0x08,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   rclw	$123, 291(%r8,%rax,4), %dx
+# INTEL: rcl	dx, word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x6d,0x18,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	rcll	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	rcl	dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x08,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   rcll	$123, 291(%r8,%rax,4), %ecx
+# INTEL: rcl	ecx, dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x74,0x18,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	rclq	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	rcl	qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xfc,0x08,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   rclq	$123, 291(%r8,%rax,4), %r9
+# INTEL: rcl	r9, qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xb4,0x18,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	rclb	%bl
+# INTEL: {evex}	rcl	bl
+0x62,0xf4,0x7c,0x08,0xd0,0xd3
+
+# ATT:   rclb	%bl, %bl
+# INTEL: rcl	bl, bl
+0x62,0xf4,0x64,0x18,0xd0,0xd3
+
+# ATT:   {evex}	rclb	%cl, %bl
+# INTEL: {evex}	rcl	bl, cl
+0x62,0xf4,0x7c,0x08,0xd2,0xd3
+
+# ATT:   rclb	%cl, %bl, %bl
+# INTEL: rcl	bl, bl, cl
+0x62,0xf4,0x64,0x18,0xd2,0xd3
+
+# ATT:   {evex}	rclw	%cl, %dx
+# INTEL: {evex}	rcl	dx, cl
+0x62,0xf4,0x7d,0x08,0xd3,0xd2
+
+# ATT:   rclw	%cl, %dx, %dx
+# INTEL: rcl	dx, dx, cl
+0x62,0xf4,0x6d,0x18,0xd3,0xd2
+
+# ATT:   {evex}	rcll	%cl, %ecx
+# INTEL: {evex}	rcl	ecx, cl
+0x62,0xf4,0x7c,0x08,0xd3,0xd1
+
+# ATT:   rcll	%cl, %ecx, %ecx
+# INTEL: rcl	ecx, ecx, cl
+0x62,0xf4,0x74,0x18,0xd3,0xd1
+
+# ATT:   {evex}	rclq	%cl, %r9
+# INTEL: {evex}	rcl	r9, cl
+0x62,0xd4,0xfc,0x08,0xd3,0xd1
+
+# ATT:   rclq	%cl, %r9, %r9
+# INTEL: rcl	r9, r9, cl
+0x62,0xd4,0xb4,0x18,0xd3,0xd1
+
+# ATT:   {evex}	rclb	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	rcl	byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x08,0xd2,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rclb	%cl, 291(%r8,%rax,4), %bl
+# INTEL: rcl	bl, byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x64,0x18,0xd2,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rclw	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	rcl	word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7d,0x08,0xd3,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rclw	%cl, 291(%r8,%rax,4), %dx
+# INTEL: rcl	dx, word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x6d,0x18,0xd3,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rcll	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	rcl	dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x08,0xd3,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rcll	%cl, 291(%r8,%rax,4), %ecx
+# INTEL: rcl	ecx, dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x74,0x18,0xd3,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rclq	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	rcl	qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xfc,0x08,0xd3,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rclq	%cl, 291(%r8,%rax,4), %r9
+# INTEL: rcl	r9, qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xb4,0x18,0xd3,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rclw	%dx
+# INTEL: {evex}	rcl	dx
+0x62,0xf4,0x7d,0x08,0xd1,0xd2
+
+# ATT:   rclw	%dx, %dx
+# INTEL: rcl	dx, dx
+0x62,0xf4,0x6d,0x18,0xd1,0xd2
+
+# ATT:   {evex}	rcll	%ecx
+# INTEL: {evex}	rcl	ecx
+0x62,0xf4,0x7c,0x08,0xd1,0xd1
+
+# ATT:   rcll	%ecx, %ecx
+# INTEL: rcl	ecx, ecx
+0x62,0xf4,0x74,0x18,0xd1,0xd1
+
+# ATT:   {evex}	rclq	%r9
+# INTEL: {evex}	rcl	r9
+0x62,0xd4,0xfc,0x08,0xd1,0xd1
+
+# ATT:   rclq	%r9, %r9
+# INTEL: rcl	r9, r9
+0x62,0xd4,0xb4,0x18,0xd1,0xd1
+
+# ATT:   {evex}	rclb	291(%r8,%rax,4)
+# INTEL: {evex}	rcl	byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x08,0xd0,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rclb	291(%r8,%rax,4), %bl
+# INTEL: rcl	bl, byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x64,0x18,0xd0,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rclw	291(%r8,%rax,4)
+# INTEL: {evex}	rcl	word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7d,0x08,0xd1,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rclw	291(%r8,%rax,4), %dx
+# INTEL: rcl	dx, word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x6d,0x18,0xd1,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rcll	291(%r8,%rax,4)
+# INTEL: {evex}	rcl	dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x08,0xd1,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rcll	291(%r8,%rax,4), %ecx
+# INTEL: rcl	ecx, dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x74,0x18,0xd1,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rclq	291(%r8,%rax,4)
+# INTEL: {evex}	rcl	qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xfc,0x08,0xd1,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rclq	291(%r8,%rax,4), %r9
+# INTEL: rcl	r9, qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xb4,0x18,0xd1,0x94,0x80,0x23,0x01,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/X86/apx/rcr.txt b/llvm/test/MC/Disassembler/X86/apx/rcr.txt
new file mode 100644
index 00000000000000..42b9849a9e16ff
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/apx/rcr.txt
@@ -0,0 +1,194 @@
+# RUN: llvm-mc -triple x86_64 -disassemble %s | FileCheck %s --check-prefix=ATT
+# RUN: llvm-mc -triple x86_64 -disassemble -output-asm-variant=1 %s | FileCheck %s --check-prefix=INTEL
+
+# ATT:   {evex}	rcrb	$123, %bl
+# INTEL: {evex}	rcr	bl, 123
+0x62,0xf4,0x7c,0x08,0xc0,0xdb,0x7b
+
+# ATT:   rcrb	$123, %bl, %bl
+# INTEL: rcr	bl, bl, 123
+0x62,0xf4,0x64,0x18,0xc0,0xdb,0x7b
+
+# ATT:   {evex}	rcrw	$123, %dx
+# INTEL: {evex}	rcr	dx, 123
+0x62,0xf4,0x7d,0x08,0xc1,0xda,0x7b
+
+# ATT:   rcrw	$123, %dx, %dx
+# INTEL: rcr	dx, dx, 123
+0x62,0xf4,0x6d,0x18,0xc1,0xda,0x7b
+
+# ATT:   {evex}	rcrl	$123, %ecx
+# INTEL: {evex}	rcr	ecx, 123
+0x62,0xf4,0x7c,0x08,0xc1,0xd9,0x7b
+
+# ATT:   rcrl	$123, %ecx, %ecx
+# INTEL: rcr	ecx, ecx, 123
+0x62,0xf4,0x74,0x18,0xc1,0xd9,0x7b
+
+# ATT:   {evex}	rcrq	$123, %r9
+# INTEL: {evex}	rcr	r9, 123
+0x62,0xd4,0xfc,0x08,0xc1,0xd9,0x7b
+
+# ATT:   rcrq	$123, %r9, %r9
+# INTEL: rcr	r9, r9, 123
+0x62,0xd4,0xb4,0x18,0xc1,0xd9,0x7b
+
+# ATT:   {evex}	rcrb	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	rcr	byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x08,0xc0,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   rcrb	$123, 291(%r8,%rax,4), %bl
+# INTEL: rcr	bl, byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x64,0x18,0xc0,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	rcrw	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	rcr	word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7d,0x08,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   rcrw	$123, 291(%r8,%rax,4), %dx
+# INTEL: rcr	dx, word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x6d,0x18,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	rcrl	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	rcr	dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x08,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   rcrl	$123, 291(%r8,%rax,4), %ecx
+# INTEL: rcr	ecx, dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x74,0x18,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	rcrq	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	rcr	qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xfc,0x08,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   rcrq	$123, 291(%r8,%rax,4), %r9
+# INTEL: rcr	r9, qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xb4,0x18,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	rcrb	%bl
+# INTEL: {evex}	rcr	bl
+0x62,0xf4,0x7c,0x08,0xd0,0xdb
+
+# ATT:   rcrb	%bl, %bl
+# INTEL: rcr	bl, bl
+0x62,0xf4,0x64,0x18,0xd0,0xdb
+
+# ATT:   {evex}	rcrb	%cl, %bl
+# INTEL: {evex}	rcr	bl, cl
+0x62,0xf4,0x7c,0x08,0xd2,0xdb
+
+# ATT:   rcrb	%cl, %bl, %bl
+# INTEL: rcr	bl, bl, cl
+0x62,0xf4,0x64,0x18,0xd2,0xdb
+
+# ATT:   {evex}	rcrw	%cl, %dx
+# INTEL: {evex}	rcr	dx, cl
+0x62,0xf4,0x7d,0x08,0xd3,0xda
+
+# ATT:   rcrw	%cl, %dx, %dx
+# INTEL: rcr	dx, dx, cl
+0x62,0xf4,0x6d,0x18,0xd3,0xda
+
+# ATT:   {evex}	rcrl	%cl, %ecx
+# INTEL: {evex}	rcr	ecx, cl
+0x62,0xf4,0x7c,0x08,0xd3,0xd9
+
+# ATT:   rcrl	%cl, %ecx, %ecx
+# INTEL: rcr	ecx, ecx, cl
+0x62,0xf4,0x74,0x18,0xd3,0xd9
+
+# ATT:   {evex}	rcrq	%cl, %r9
+# INTEL: {evex}	rcr	r9, cl
+0x62,0xd4,0xfc,0x08,0xd3,0xd9
+
+# ATT:   rcrq	%cl, %r9, %r9
+# INTEL: rcr	r9, r9, cl
+0x62,0xd4,0xb4,0x18,0xd3,0xd9
+
+# ATT:   {evex}	rcrb	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	rcr	byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x08,0xd2,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rcrb	%cl, 291(%r8,%rax,4), %bl
+# INTEL: rcr	bl, byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x64,0x18,0xd2,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rcrw	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	rcr	word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7d,0x08,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rcrw	%cl, 291(%r8,%rax,4), %dx
+# INTEL: rcr	dx, word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x6d,0x18,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rcrl	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	rcr	dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x08,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rcrl	%cl, 291(%r8,%rax,4), %ecx
+# INTEL: rcr	ecx, dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x74,0x18,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rcrq	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	rcr	qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xfc,0x08,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rcrq	%cl, 291(%r8,%rax,4), %r9
+# INTEL: rcr	r9, qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xb4,0x18,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rcrw	%dx
+# INTEL: {evex}	rcr	dx
+0x62,0xf4,0x7d,0x08,0xd1,0xda
+
+# ATT:   rcrw	%dx, %dx
+# INTEL: rcr	dx, dx
+0x62,0xf4,0x6d,0x18,0xd1,0xda
+
+# ATT:   {evex}	rcrl	%ecx
+# INTEL: {evex}	rcr	ecx
+0x62,0xf4,0x7c,0x08,0xd1,0xd9
+
+# ATT:   rcrl	%ecx, %ecx
+# INTEL: rcr	ecx, ecx
+0x62,0xf4,0x74,0x18,0xd1,0xd9
+
+# ATT:   {evex}	rcrq	%r9
+# INTEL: {evex}	rcr	r9
+0x62,0xd4,0xfc,0x08,0xd1,0xd9
+
+# ATT:   rcrq	%r9, %r9
+# INTEL: rcr	r9, r9
+0x62,0xd4,0xb4,0x18,0xd1,0xd9
+
+# ATT:   {evex}	rcrb	291(%r8,%rax,4)
+# INTEL: {evex}	rcr	byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x08,0xd0,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rcrb	291(%r8,%rax,4), %bl
+# INTEL: rcr	bl, byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x64,0x18,0xd0,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rcrw	291(%r8,%rax,4)
+# INTEL: {evex}	rcr	word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7d,0x08,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rcrw	291(%r8,%rax,4), %dx
+# INTEL: rcr	dx, word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x6d,0x18,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rcrl	291(%r8,%rax,4)
+# INTEL: {evex}	rcr	dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x08,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rcrl	291(%r8,%rax,4), %ecx
+# INTEL: rcr	ecx, dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x74,0x18,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rcrq	291(%r8,%rax,4)
+# INTEL: {evex}	rcr	qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xfc,0x08,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rcrq	291(%r8,%rax,4), %r9
+# INTEL: rcr	r9, qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xb4,0x18,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/X86/apx/rol.txt b/llvm/test/MC/Disassembler/X86/apx/rol.txt
new file mode 100644
index 00000000000000..bb713d1c885667
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/apx/rol.txt
@@ -0,0 +1,386 @@
+# RUN: llvm-mc -triple x86_64 -disassemble %s | FileCheck %s --check-prefix=ATT
+# RUN: llvm-mc -triple x86_64 -disassemble -output-asm-variant=1 %s | FileCheck %s --check-prefix=INTEL
+
+# ATT:   {evex}	rolb	$123, %bl
+# INTEL: {evex}	rol	bl, 123
+0x62,0xf4,0x7c,0x08,0xc0,0xc3,0x7b
+
+# ATT:   {nf}	rolb	$123, %bl
+# INTEL: {nf}	rol	bl, 123
+0x62,0xf4,0x7c,0x0c,0xc0,0xc3,0x7b
+
+# ATT:   rolb	$123, %bl, %bl
+# INTEL: rol	bl, bl, 123
+0x62,0xf4,0x64,0x18,0xc0,0xc3,0x7b
+
+# ATT:   {nf}	rolb	$123, %bl, %bl
+# INTEL: {nf}	rol	bl, bl, 123
+0x62,0xf4,0x64,0x1c,0xc0,0xc3,0x7b
+
+# ATT:   {evex}	rolw	$123, %dx
+# INTEL: {evex}	rol	dx, 123
+0x62,0xf4,0x7d,0x08,0xc1,0xc2,0x7b
+
+# ATT:   {nf}	rolw	$123, %dx
+# INTEL: {nf}	rol	dx, 123
+0x62,0xf4,0x7d,0x0c,0xc1,0xc2,0x7b
+
+# ATT:   rolw	$123, %dx, %dx
+# INTEL: rol	dx, dx, 123
+0x62,0xf4,0x6d,0x18,0xc1,0xc2,0x7b
+
+# ATT:   {nf}	rolw	$123, %dx, %dx
+# INTEL: {nf}	rol	dx, dx, 123
+0x62,0xf4,0x6d,0x1c,0xc1,0xc2,0x7b
+
+# ATT:   {evex}	roll	$123, %ecx
+# INTEL: {evex}	rol	ecx, 123
+0x62,0xf4,0x7c,0x08,0xc1,0xc1,0x7b
+
+# ATT:   {nf}	roll	$123, %ecx
+# INTEL: {nf}	rol	ecx, 123
+0x62,0xf4,0x7c,0x0c,0xc1,0xc1,0x7b
+
+# ATT:   roll	$123, %ecx, %ecx
+# INTEL: rol	ecx, ecx, 123
+0x62,0xf4,0x74,0x18,0xc1,0xc1,0x7b
+
+# ATT:   {nf}	roll	$123, %ecx, %ecx
+# INTEL: {nf}	rol	ecx, ecx, 123
+0x62,0xf4,0x74,0x1c,0xc1,0xc1,0x7b
+
+# ATT:   {evex}	rolq	$123, %r9
+# INTEL: {evex}	rol	r9, 123
+0x62,0xd4,0xfc,0x08,0xc1,0xc1,0x7b
+
+# ATT:   {nf}	rolq	$123, %r9
+# INTEL: {nf}	rol	r9, 123
+0x62,0xd4,0xfc,0x0c,0xc1,0xc1,0x7b
+
+# ATT:   rolq	$123, %r9, %r9
+# INTEL: rol	r9, r9, 123
+0x62,0xd4,0xb4,0x18,0xc1,0xc1,0x7b
+
+# ATT:   {nf}	rolq	$123, %r9, %r9
+# INTEL: {nf}	rol	r9, r9, 123
+0x62,0xd4,0xb4,0x1c,0xc1,0xc1,0x7b
+
+# ATT:   {evex}	rolb	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	rol	byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x08,0xc0,0x84,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	rolb	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	rol	byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x0c,0xc0,0x84,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   rolb	$123, 291(%r8,%rax,4), %bl
+# INTEL: rol	bl, byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x64,0x18,0xc0,0x84,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	rolb	$123, 291(%r8,%rax,4), %bl
+# INTEL: {nf}	rol	bl, byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x64,0x1c,0xc0,0x84,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	rolw	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	rol	word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7d,0x08,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	rolw	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	rol	word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7d,0x0c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   rolw	$123, 291(%r8,%rax,4), %dx
+# INTEL: rol	dx, word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x6d,0x18,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	rolw	$123, 291(%r8,%rax,4), %dx
+# INTEL: {nf}	rol	dx, word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x6d,0x1c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	roll	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	rol	dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x08,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	roll	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	rol	dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x0c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   roll	$123, 291(%r8,%rax,4), %ecx
+# INTEL: rol	ecx, dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x74,0x18,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	roll	$123, 291(%r8,%rax,4), %ecx
+# INTEL: {nf}	rol	ecx, dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x74,0x1c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	rolq	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	rol	qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xfc,0x08,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	rolq	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	rol	qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xfc,0x0c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   rolq	$123, 291(%r8,%rax,4), %r9
+# INTEL: rol	r9, qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xb4,0x18,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	rolq	$123, 291(%r8,%rax,4), %r9
+# INTEL: {nf}	rol	r9, qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xb4,0x1c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	rolb	%bl
+# INTEL: {evex}	rol	bl
+0x62,0xf4,0x7c,0x08,0xd0,0xc3
+
+# ATT:   {nf}	rolb	%bl
+# INTEL: {nf}	rol	bl
+0x62,0xf4,0x7c,0x0c,0xd0,0xc3
+
+# ATT:   rolb	%bl, %bl
+# INTEL: rol	bl, bl
+0x62,0xf4,0x64,0x18,0xd0,0xc3
+
+# ATT:   {nf}	rolb	%bl, %bl
+# INTEL: {nf}	rol	bl, bl
+0x62,0xf4,0x64,0x1c,0xd0,0xc3
+
+# ATT:   {evex}	rolb	%cl, %bl
+# INTEL: {evex}	rol	bl, cl
+0x62,0xf4,0x7c,0x08,0xd2,0xc3
+
+# ATT:   {nf}	rolb	%cl, %bl
+# INTEL: {nf}	rol	bl, cl
+0x62,0xf4,0x7c,0x0c,0xd2,0xc3
+
+# ATT:   rolb	%cl, %bl, %bl
+# INTEL: rol	bl, bl, cl
+0x62,0xf4,0x64,0x18,0xd2,0xc3
+
+# ATT:   {nf}	rolb	%cl, %bl, %bl
+# INTEL: {nf}	rol	bl, bl, cl
+0x62,0xf4,0x64,0x1c,0xd2,0xc3
+
+# ATT:   {evex}	rolw	%cl, %dx
+# INTEL: {evex}	rol	dx, cl
+0x62,0xf4,0x7d,0x08,0xd3,0xc2
+
+# ATT:   {nf}	rolw	%cl, %dx
+# INTEL: {nf}	rol	dx, cl
+0x62,0xf4,0x7d,0x0c,0xd3,0xc2
+
+# ATT:   rolw	%cl, %dx, %dx
+# INTEL: rol	dx, dx, cl
+0x62,0xf4,0x6d,0x18,0xd3,0xc2
+
+# ATT:   {nf}	rolw	%cl, %dx, %dx
+# INTEL: {nf}	rol	dx, dx, cl
+0x62,0xf4,0x6d,0x1c,0xd3,0xc2
+
+# ATT:   {evex}	roll	%cl, %ecx
+# INTEL: {evex}	rol	ecx, cl
+0x62,0xf4,0x7c,0x08,0xd3,0xc1
+
+# ATT:   {nf}	roll	%cl, %ecx
+# INTEL: {nf}	rol	ecx, cl
+0x62,0xf4,0x7c,0x0c,0xd3,0xc1
+
+# ATT:   roll	%cl, %ecx, %ecx
+# INTEL: rol	ecx, ecx, cl
+0x62,0xf4,0x74,0x18,0xd3,0xc1
+
+# ATT:   {nf}	roll	%cl, %ecx, %ecx
+# INTEL: {nf}	rol	ecx, ecx, cl
+0x62,0xf4,0x74,0x1c,0xd3,0xc1
+
+# ATT:   {evex}	rolq	%cl, %r9
+# INTEL: {evex}	rol	r9, cl
+0x62,0xd4,0xfc,0x08,0xd3,0xc1
+
+# ATT:   {nf}	rolq	%cl, %r9
+# INTEL: {nf}	rol	r9, cl
+0x62,0xd4,0xfc,0x0c,0xd3,0xc1
+
+# ATT:   rolq	%cl, %r9, %r9
+# INTEL: rol	r9, r9, cl
+0x62,0xd4,0xb4,0x18,0xd3,0xc1
+
+# ATT:   {nf}	rolq	%cl, %r9, %r9
+# INTEL: {nf}	rol	r9, r9, cl
+0x62,0xd4,0xb4,0x1c,0xd3,0xc1
+
+# ATT:   {evex}	rolb	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	rol	byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x08,0xd2,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rolb	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	rol	byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x0c,0xd2,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rolb	%cl, 291(%r8,%rax,4), %bl
+# INTEL: rol	bl, byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x64,0x18,0xd2,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rolb	%cl, 291(%r8,%rax,4), %bl
+# INTEL: {nf}	rol	bl, byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x64,0x1c,0xd2,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rolw	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	rol	word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7d,0x08,0xd3,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rolw	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	rol	word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7d,0x0c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rolw	%cl, 291(%r8,%rax,4), %dx
+# INTEL: rol	dx, word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x6d,0x18,0xd3,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rolw	%cl, 291(%r8,%rax,4), %dx
+# INTEL: {nf}	rol	dx, word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x6d,0x1c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	roll	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	rol	dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x08,0xd3,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	roll	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	rol	dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x0c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   roll	%cl, 291(%r8,%rax,4), %ecx
+# INTEL: rol	ecx, dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x74,0x18,0xd3,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	roll	%cl, 291(%r8,%rax,4), %ecx
+# INTEL: {nf}	rol	ecx, dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x74,0x1c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rolq	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	rol	qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xfc,0x08,0xd3,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rolq	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	rol	qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xfc,0x0c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rolq	%cl, 291(%r8,%rax,4), %r9
+# INTEL: rol	r9, qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xb4,0x18,0xd3,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rolq	%cl, 291(%r8,%rax,4), %r9
+# INTEL: {nf}	rol	r9, qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xb4,0x1c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rolw	%dx
+# INTEL: {evex}	rol	dx
+0x62,0xf4,0x7d,0x08,0xd1,0xc2
+
+# ATT:   {nf}	rolw	%dx
+# INTEL: {nf}	rol	dx
+0x62,0xf4,0x7d,0x0c,0xd1,0xc2
+
+# ATT:   rolw	%dx, %dx
+# INTEL: rol	dx, dx
+0x62,0xf4,0x6d,0x18,0xd1,0xc2
+
+# ATT:   {nf}	rolw	%dx, %dx
+# INTEL: {nf}	rol	dx, dx
+0x62,0xf4,0x6d,0x1c,0xd1,0xc2
+
+# ATT:   {evex}	roll	%ecx
+# INTEL: {evex}	rol	ecx
+0x62,0xf4,0x7c,0x08,0xd1,0xc1
+
+# ATT:   {nf}	roll	%ecx
+# INTEL: {nf}	rol	ecx
+0x62,0xf4,0x7c,0x0c,0xd1,0xc1
+
+# ATT:   roll	%ecx, %ecx
+# INTEL: rol	ecx, ecx
+0x62,0xf4,0x74,0x18,0xd1,0xc1
+
+# ATT:   {nf}	roll	%ecx, %ecx
+# INTEL: {nf}	rol	ecx, ecx
+0x62,0xf4,0x74,0x1c,0xd1,0xc1
+
+# ATT:   {evex}	rolq	%r9
+# INTEL: {evex}	rol	r9
+0x62,0xd4,0xfc,0x08,0xd1,0xc1
+
+# ATT:   {nf}	rolq	%r9
+# INTEL: {nf}	rol	r9
+0x62,0xd4,0xfc,0x0c,0xd1,0xc1
+
+# ATT:   rolq	%r9, %r9
+# INTEL: rol	r9, r9
+0x62,0xd4,0xb4,0x18,0xd1,0xc1
+
+# ATT:   {nf}	rolq	%r9, %r9
+# INTEL: {nf}	rol	r9, r9
+0x62,0xd4,0xb4,0x1c,0xd1,0xc1
+
+# ATT:   {evex}	rolb	291(%r8,%rax,4)
+# INTEL: {evex}	rol	byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x08,0xd0,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rolb	291(%r8,%rax,4)
+# INTEL: {nf}	rol	byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x0c,0xd0,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rolb	291(%r8,%rax,4), %bl
+# INTEL: rol	bl, byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x64,0x18,0xd0,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rolb	291(%r8,%rax,4), %bl
+# INTEL: {nf}	rol	bl, byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x64,0x1c,0xd0,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rolw	291(%r8,%rax,4)
+# INTEL: {evex}	rol	word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7d,0x08,0xd1,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rolw	291(%r8,%rax,4)
+# INTEL: {nf}	rol	word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7d,0x0c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rolw	291(%r8,%rax,4), %dx
+# INTEL: rol	dx, word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x6d,0x18,0xd1,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rolw	291(%r8,%rax,4), %dx
+# INTEL: {nf}	rol	dx, word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x6d,0x1c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	roll	291(%r8,%rax,4)
+# INTEL: {evex}	rol	dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x08,0xd1,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	roll	291(%r8,%rax,4)
+# INTEL: {nf}	rol	dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x0c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   roll	291(%r8,%rax,4), %ecx
+# INTEL: rol	ecx, dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x74,0x18,0xd1,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	roll	291(%r8,%rax,4), %ecx
+# INTEL: {nf}	rol	ecx, dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x74,0x1c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rolq	291(%r8,%rax,4)
+# INTEL: {evex}	rol	qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xfc,0x08,0xd1,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rolq	291(%r8,%rax,4)
+# INTEL: {nf}	rol	qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xfc,0x0c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rolq	291(%r8,%rax,4), %r9
+# INTEL: rol	r9, qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xb4,0x18,0xd1,0x84,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rolq	291(%r8,%rax,4), %r9
+# INTEL: {nf}	rol	r9, qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xb4,0x1c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/X86/apx/ror.txt b/llvm/test/MC/Disassembler/X86/apx/ror.txt
new file mode 100644
index 00000000000000..484a3e143fdac4
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/apx/ror.txt
@@ -0,0 +1,386 @@
+# RUN: llvm-mc -triple x86_64 -disassemble %s | FileCheck %s --check-prefix=ATT
+# RUN: llvm-mc -triple x86_64 -disassemble -output-asm-variant=1 %s | FileCheck %s --check-prefix=INTEL
+
+# ATT:   {evex}	rorb	$123, %bl
+# INTEL: {evex}	ror	bl, 123
+0x62,0xf4,0x7c,0x08,0xc0,0xcb,0x7b
+
+# ATT:   {nf}	rorb	$123, %bl
+# INTEL: {nf}	ror	bl, 123
+0x62,0xf4,0x7c,0x0c,0xc0,0xcb,0x7b
+
+# ATT:   rorb	$123, %bl, %bl
+# INTEL: ror	bl, bl, 123
+0x62,0xf4,0x64,0x18,0xc0,0xcb,0x7b
+
+# ATT:   {nf}	rorb	$123, %bl, %bl
+# INTEL: {nf}	ror	bl, bl, 123
+0x62,0xf4,0x64,0x1c,0xc0,0xcb,0x7b
+
+# ATT:   {evex}	rorw	$123, %dx
+# INTEL: {evex}	ror	dx, 123
+0x62,0xf4,0x7d,0x08,0xc1,0xca,0x7b
+
+# ATT:   {nf}	rorw	$123, %dx
+# INTEL: {nf}	ror	dx, 123
+0x62,0xf4,0x7d,0x0c,0xc1,0xca,0x7b
+
+# ATT:   rorw	$123, %dx, %dx
+# INTEL: ror	dx, dx, 123
+0x62,0xf4,0x6d,0x18,0xc1,0xca,0x7b
+
+# ATT:   {nf}	rorw	$123, %dx, %dx
+# INTEL: {nf}	ror	dx, dx, 123
+0x62,0xf4,0x6d,0x1c,0xc1,0xca,0x7b
+
+# ATT:   {evex}	rorl	$123, %ecx
+# INTEL: {evex}	ror	ecx, 123
+0x62,0xf4,0x7c,0x08,0xc1,0xc9,0x7b
+
+# ATT:   {nf}	rorl	$123, %ecx
+# INTEL: {nf}	ror	ecx, 123
+0x62,0xf4,0x7c,0x0c,0xc1,0xc9,0x7b
+
+# ATT:   rorl	$123, %ecx, %ecx
+# INTEL: ror	ecx, ecx, 123
+0x62,0xf4,0x74,0x18,0xc1,0xc9,0x7b
+
+# ATT:   {nf}	rorl	$123, %ecx, %ecx
+# INTEL: {nf}	ror	ecx, ecx, 123
+0x62,0xf4,0x74,0x1c,0xc1,0xc9,0x7b
+
+# ATT:   {evex}	rorq	$123, %r9
+# INTEL: {evex}	ror	r9, 123
+0x62,0xd4,0xfc,0x08,0xc1,0xc9,0x7b
+
+# ATT:   {nf}	rorq	$123, %r9
+# INTEL: {nf}	ror	r9, 123
+0x62,0xd4,0xfc,0x0c,0xc1,0xc9,0x7b
+
+# ATT:   rorq	$123, %r9, %r9
+# INTEL: ror	r9, r9, 123
+0x62,0xd4,0xb4,0x18,0xc1,0xc9,0x7b
+
+# ATT:   {nf}	rorq	$123, %r9, %r9
+# INTEL: {nf}	ror	r9, r9, 123
+0x62,0xd4,0xb4,0x1c,0xc1,0xc9,0x7b
+
+# ATT:   {evex}	rorb	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	ror	byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x08,0xc0,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	rorb	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	ror	byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x0c,0xc0,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   rorb	$123, 291(%r8,%rax,4), %bl
+# INTEL: ror	bl, byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x64,0x18,0xc0,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	rorb	$123, 291(%r8,%rax,4), %bl
+# INTEL: {nf}	ror	bl, byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x64,0x1c,0xc0,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	rorw	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	ror	word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7d,0x08,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	rorw	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	ror	word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7d,0x0c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   rorw	$123, 291(%r8,%rax,4), %dx
+# INTEL: ror	dx, word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x6d,0x18,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	rorw	$123, 291(%r8,%rax,4), %dx
+# INTEL: {nf}	ror	dx, word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x6d,0x1c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	rorl	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	ror	dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x08,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	rorl	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	ror	dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x0c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   rorl	$123, 291(%r8,%rax,4), %ecx
+# INTEL: ror	ecx, dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x74,0x18,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	rorl	$123, 291(%r8,%rax,4), %ecx
+# INTEL: {nf}	ror	ecx, dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x74,0x1c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	rorq	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	ror	qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xfc,0x08,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	rorq	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	ror	qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xfc,0x0c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   rorq	$123, 291(%r8,%rax,4), %r9
+# INTEL: ror	r9, qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xb4,0x18,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	rorq	$123, 291(%r8,%rax,4), %r9
+# INTEL: {nf}	ror	r9, qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xb4,0x1c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	rorb	%bl
+# INTEL: {evex}	ror	bl
+0x62,0xf4,0x7c,0x08,0xd0,0xcb
+
+# ATT:   {nf}	rorb	%bl
+# INTEL: {nf}	ror	bl
+0x62,0xf4,0x7c,0x0c,0xd0,0xcb
+
+# ATT:   rorb	%bl, %bl
+# INTEL: ror	bl, bl
+0x62,0xf4,0x64,0x18,0xd0,0xcb
+
+# ATT:   {nf}	rorb	%bl, %bl
+# INTEL: {nf}	ror	bl, bl
+0x62,0xf4,0x64,0x1c,0xd0,0xcb
+
+# ATT:   {evex}	rorb	%cl, %bl
+# INTEL: {evex}	ror	bl, cl
+0x62,0xf4,0x7c,0x08,0xd2,0xcb
+
+# ATT:   {nf}	rorb	%cl, %bl
+# INTEL: {nf}	ror	bl, cl
+0x62,0xf4,0x7c,0x0c,0xd2,0xcb
+
+# ATT:   rorb	%cl, %bl, %bl
+# INTEL: ror	bl, bl, cl
+0x62,0xf4,0x64,0x18,0xd2,0xcb
+
+# ATT:   {nf}	rorb	%cl, %bl, %bl
+# INTEL: {nf}	ror	bl, bl, cl
+0x62,0xf4,0x64,0x1c,0xd2,0xcb
+
+# ATT:   {evex}	rorw	%cl, %dx
+# INTEL: {evex}	ror	dx, cl
+0x62,0xf4,0x7d,0x08,0xd3,0xca
+
+# ATT:   {nf}	rorw	%cl, %dx
+# INTEL: {nf}	ror	dx, cl
+0x62,0xf4,0x7d,0x0c,0xd3,0xca
+
+# ATT:   rorw	%cl, %dx, %dx
+# INTEL: ror	dx, dx, cl
+0x62,0xf4,0x6d,0x18,0xd3,0xca
+
+# ATT:   {nf}	rorw	%cl, %dx, %dx
+# INTEL: {nf}	ror	dx, dx, cl
+0x62,0xf4,0x6d,0x1c,0xd3,0xca
+
+# ATT:   {evex}	rorl	%cl, %ecx
+# INTEL: {evex}	ror	ecx, cl
+0x62,0xf4,0x7c,0x08,0xd3,0xc9
+
+# ATT:   {nf}	rorl	%cl, %ecx
+# INTEL: {nf}	ror	ecx, cl
+0x62,0xf4,0x7c,0x0c,0xd3,0xc9
+
+# ATT:   rorl	%cl, %ecx, %ecx
+# INTEL: ror	ecx, ecx, cl
+0x62,0xf4,0x74,0x18,0xd3,0xc9
+
+# ATT:   {nf}	rorl	%cl, %ecx, %ecx
+# INTEL: {nf}	ror	ecx, ecx, cl
+0x62,0xf4,0x74,0x1c,0xd3,0xc9
+
+# ATT:   {evex}	rorq	%cl, %r9
+# INTEL: {evex}	ror	r9, cl
+0x62,0xd4,0xfc,0x08,0xd3,0xc9
+
+# ATT:   {nf}	rorq	%cl, %r9
+# INTEL: {nf}	ror	r9, cl
+0x62,0xd4,0xfc,0x0c,0xd3,0xc9
+
+# ATT:   rorq	%cl, %r9, %r9
+# INTEL: ror	r9, r9, cl
+0x62,0xd4,0xb4,0x18,0xd3,0xc9
+
+# ATT:   {nf}	rorq	%cl, %r9, %r9
+# INTEL: {nf}	ror	r9, r9, cl
+0x62,0xd4,0xb4,0x1c,0xd3,0xc9
+
+# ATT:   {evex}	rorb	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	ror	byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x08,0xd2,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rorb	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	ror	byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x0c,0xd2,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rorb	%cl, 291(%r8,%rax,4), %bl
+# INTEL: ror	bl, byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x64,0x18,0xd2,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rorb	%cl, 291(%r8,%rax,4), %bl
+# INTEL: {nf}	ror	bl, byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x64,0x1c,0xd2,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rorw	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	ror	word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7d,0x08,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rorw	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	ror	word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7d,0x0c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rorw	%cl, 291(%r8,%rax,4), %dx
+# INTEL: ror	dx, word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x6d,0x18,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rorw	%cl, 291(%r8,%rax,4), %dx
+# INTEL: {nf}	ror	dx, word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x6d,0x1c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rorl	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	ror	dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x08,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rorl	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	ror	dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x0c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rorl	%cl, 291(%r8,%rax,4), %ecx
+# INTEL: ror	ecx, dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x74,0x18,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rorl	%cl, 291(%r8,%rax,4), %ecx
+# INTEL: {nf}	ror	ecx, dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x74,0x1c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rorq	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	ror	qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xfc,0x08,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rorq	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	ror	qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xfc,0x0c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rorq	%cl, 291(%r8,%rax,4), %r9
+# INTEL: ror	r9, qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xb4,0x18,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rorq	%cl, 291(%r8,%rax,4), %r9
+# INTEL: {nf}	ror	r9, qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xb4,0x1c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rorw	%dx
+# INTEL: {evex}	ror	dx
+0x62,0xf4,0x7d,0x08,0xd1,0xca
+
+# ATT:   {nf}	rorw	%dx
+# INTEL: {nf}	ror	dx
+0x62,0xf4,0x7d,0x0c,0xd1,0xca
+
+# ATT:   rorw	%dx, %dx
+# INTEL: ror	dx, dx
+0x62,0xf4,0x6d,0x18,0xd1,0xca
+
+# ATT:   {nf}	rorw	%dx, %dx
+# INTEL: {nf}	ror	dx, dx
+0x62,0xf4,0x6d,0x1c,0xd1,0xca
+
+# ATT:   {evex}	rorl	%ecx
+# INTEL: {evex}	ror	ecx
+0x62,0xf4,0x7c,0x08,0xd1,0xc9
+
+# ATT:   {nf}	rorl	%ecx
+# INTEL: {nf}	ror	ecx
+0x62,0xf4,0x7c,0x0c,0xd1,0xc9
+
+# ATT:   rorl	%ecx, %ecx
+# INTEL: ror	ecx, ecx
+0x62,0xf4,0x74,0x18,0xd1,0xc9
+
+# ATT:   {nf}	rorl	%ecx, %ecx
+# INTEL: {nf}	ror	ecx, ecx
+0x62,0xf4,0x74,0x1c,0xd1,0xc9
+
+# ATT:   {evex}	rorq	%r9
+# INTEL: {evex}	ror	r9
+0x62,0xd4,0xfc,0x08,0xd1,0xc9
+
+# ATT:   {nf}	rorq	%r9
+# INTEL: {nf}	ror	r9
+0x62,0xd4,0xfc,0x0c,0xd1,0xc9
+
+# ATT:   rorq	%r9, %r9
+# INTEL: ror	r9, r9
+0x62,0xd4,0xb4,0x18,0xd1,0xc9
+
+# ATT:   {nf}	rorq	%r9, %r9
+# INTEL: {nf}	ror	r9, r9
+0x62,0xd4,0xb4,0x1c,0xd1,0xc9
+
+# ATT:   {evex}	rorb	291(%r8,%rax,4)
+# INTEL: {evex}	ror	byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x08,0xd0,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rorb	291(%r8,%rax,4)
+# INTEL: {nf}	ror	byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x0c,0xd0,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rorb	291(%r8,%rax,4), %bl
+# INTEL: ror	bl, byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x64,0x18,0xd0,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rorb	291(%r8,%rax,4), %bl
+# INTEL: {nf}	ror	bl, byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x64,0x1c,0xd0,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rorw	291(%r8,%rax,4)
+# INTEL: {evex}	ror	word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7d,0x08,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rorw	291(%r8,%rax,4)
+# INTEL: {nf}	ror	word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7d,0x0c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rorw	291(%r8,%rax,4), %dx
+# INTEL: ror	dx, word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x6d,0x18,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rorw	291(%r8,%rax,4), %dx
+# INTEL: {nf}	ror	dx, word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x6d,0x1c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rorl	291(%r8,%rax,4)
+# INTEL: {evex}	ror	dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x08,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rorl	291(%r8,%rax,4)
+# INTEL: {nf}	ror	dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x0c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rorl	291(%r8,%rax,4), %ecx
+# INTEL: ror	ecx, dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x74,0x18,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rorl	291(%r8,%rax,4), %ecx
+# INTEL: {nf}	ror	ecx, dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x74,0x1c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	rorq	291(%r8,%rax,4)
+# INTEL: {evex}	ror	qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xfc,0x08,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rorq	291(%r8,%rax,4)
+# INTEL: {nf}	ror	qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xfc,0x0c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   rorq	291(%r8,%rax,4), %r9
+# INTEL: ror	r9, qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xb4,0x18,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	rorq	291(%r8,%rax,4), %r9
+# INTEL: {nf}	ror	r9, qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xb4,0x1c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/X86/apx/sar.txt b/llvm/test/MC/Disassembler/X86/apx/sar.txt
new file mode 100644
index 00000000000000..b5e41ee956ccd2
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/apx/sar.txt
@@ -0,0 +1,386 @@
+# RUN: llvm-mc -triple x86_64 -disassemble %s | FileCheck %s --check-prefix=ATT
+# RUN: llvm-mc -triple x86_64 -disassemble -output-asm-variant=1 %s | FileCheck %s --check-prefix=INTEL
+
+# ATT:   {evex}	sarb	$123, %bl
+# INTEL: {evex}	sar	bl, 123
+0x62,0xf4,0x7c,0x08,0xc0,0xfb,0x7b
+
+# ATT:   {nf}	sarb	$123, %bl
+# INTEL: {nf}	sar	bl, 123
+0x62,0xf4,0x7c,0x0c,0xc0,0xfb,0x7b
+
+# ATT:   sarb	$123, %bl, %bl
+# INTEL: sar	bl, bl, 123
+0x62,0xf4,0x64,0x18,0xc0,0xfb,0x7b
+
+# ATT:   {nf}	sarb	$123, %bl, %bl
+# INTEL: {nf}	sar	bl, bl, 123
+0x62,0xf4,0x64,0x1c,0xc0,0xfb,0x7b
+
+# ATT:   {evex}	sarw	$123, %dx
+# INTEL: {evex}	sar	dx, 123
+0x62,0xf4,0x7d,0x08,0xc1,0xfa,0x7b
+
+# ATT:   {nf}	sarw	$123, %dx
+# INTEL: {nf}	sar	dx, 123
+0x62,0xf4,0x7d,0x0c,0xc1,0xfa,0x7b
+
+# ATT:   sarw	$123, %dx, %dx
+# INTEL: sar	dx, dx, 123
+0x62,0xf4,0x6d,0x18,0xc1,0xfa,0x7b
+
+# ATT:   {nf}	sarw	$123, %dx, %dx
+# INTEL: {nf}	sar	dx, dx, 123
+0x62,0xf4,0x6d,0x1c,0xc1,0xfa,0x7b
+
+# ATT:   {evex}	sarl	$123, %ecx
+# INTEL: {evex}	sar	ecx, 123
+0x62,0xf4,0x7c,0x08,0xc1,0xf9,0x7b
+
+# ATT:   {nf}	sarl	$123, %ecx
+# INTEL: {nf}	sar	ecx, 123
+0x62,0xf4,0x7c,0x0c,0xc1,0xf9,0x7b
+
+# ATT:   sarl	$123, %ecx, %ecx
+# INTEL: sar	ecx, ecx, 123
+0x62,0xf4,0x74,0x18,0xc1,0xf9,0x7b
+
+# ATT:   {nf}	sarl	$123, %ecx, %ecx
+# INTEL: {nf}	sar	ecx, ecx, 123
+0x62,0xf4,0x74,0x1c,0xc1,0xf9,0x7b
+
+# ATT:   {evex}	sarq	$123, %r9
+# INTEL: {evex}	sar	r9, 123
+0x62,0xd4,0xfc,0x08,0xc1,0xf9,0x7b
+
+# ATT:   {nf}	sarq	$123, %r9
+# INTEL: {nf}	sar	r9, 123
+0x62,0xd4,0xfc,0x0c,0xc1,0xf9,0x7b
+
+# ATT:   sarq	$123, %r9, %r9
+# INTEL: sar	r9, r9, 123
+0x62,0xd4,0xb4,0x18,0xc1,0xf9,0x7b
+
+# ATT:   {nf}	sarq	$123, %r9, %r9
+# INTEL: {nf}	sar	r9, r9, 123
+0x62,0xd4,0xb4,0x1c,0xc1,0xf9,0x7b
+
+# ATT:   {evex}	sarb	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	sar	byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x08,0xc0,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	sarb	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	sar	byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x0c,0xc0,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   sarb	$123, 291(%r8,%rax,4), %bl
+# INTEL: sar	bl, byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x64,0x18,0xc0,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	sarb	$123, 291(%r8,%rax,4), %bl
+# INTEL: {nf}	sar	bl, byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x64,0x1c,0xc0,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	sarw	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	sar	word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7d,0x08,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	sarw	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	sar	word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7d,0x0c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   sarw	$123, 291(%r8,%rax,4), %dx
+# INTEL: sar	dx, word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x6d,0x18,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	sarw	$123, 291(%r8,%rax,4), %dx
+# INTEL: {nf}	sar	dx, word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x6d,0x1c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	sarl	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	sar	dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x08,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	sarl	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	sar	dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x0c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   sarl	$123, 291(%r8,%rax,4), %ecx
+# INTEL: sar	ecx, dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x74,0x18,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	sarl	$123, 291(%r8,%rax,4), %ecx
+# INTEL: {nf}	sar	ecx, dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x74,0x1c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	sarq	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	sar	qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xfc,0x08,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	sarq	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	sar	qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xfc,0x0c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   sarq	$123, 291(%r8,%rax,4), %r9
+# INTEL: sar	r9, qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xb4,0x18,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	sarq	$123, 291(%r8,%rax,4), %r9
+# INTEL: {nf}	sar	r9, qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xb4,0x1c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	sarb	%bl
+# INTEL: {evex}	sar	bl
+0x62,0xf4,0x7c,0x08,0xd0,0xfb
+
+# ATT:   {nf}	sarb	%bl
+# INTEL: {nf}	sar	bl
+0x62,0xf4,0x7c,0x0c,0xd0,0xfb
+
+# ATT:   sarb	%bl, %bl
+# INTEL: sar	bl, bl
+0x62,0xf4,0x64,0x18,0xd0,0xfb
+
+# ATT:   {nf}	sarb	%bl, %bl
+# INTEL: {nf}	sar	bl, bl
+0x62,0xf4,0x64,0x1c,0xd0,0xfb
+
+# ATT:   {evex}	sarb	%cl, %bl
+# INTEL: {evex}	sar	bl, cl
+0x62,0xf4,0x7c,0x08,0xd2,0xfb
+
+# ATT:   {nf}	sarb	%cl, %bl
+# INTEL: {nf}	sar	bl, cl
+0x62,0xf4,0x7c,0x0c,0xd2,0xfb
+
+# ATT:   sarb	%cl, %bl, %bl
+# INTEL: sar	bl, bl, cl
+0x62,0xf4,0x64,0x18,0xd2,0xfb
+
+# ATT:   {nf}	sarb	%cl, %bl, %bl
+# INTEL: {nf}	sar	bl, bl, cl
+0x62,0xf4,0x64,0x1c,0xd2,0xfb
+
+# ATT:   {evex}	sarw	%cl, %dx
+# INTEL: {evex}	sar	dx, cl
+0x62,0xf4,0x7d,0x08,0xd3,0xfa
+
+# ATT:   {nf}	sarw	%cl, %dx
+# INTEL: {nf}	sar	dx, cl
+0x62,0xf4,0x7d,0x0c,0xd3,0xfa
+
+# ATT:   sarw	%cl, %dx, %dx
+# INTEL: sar	dx, dx, cl
+0x62,0xf4,0x6d,0x18,0xd3,0xfa
+
+# ATT:   {nf}	sarw	%cl, %dx, %dx
+# INTEL: {nf}	sar	dx, dx, cl
+0x62,0xf4,0x6d,0x1c,0xd3,0xfa
+
+# ATT:   {evex}	sarl	%cl, %ecx
+# INTEL: {evex}	sar	ecx, cl
+0x62,0xf4,0x7c,0x08,0xd3,0xf9
+
+# ATT:   {nf}	sarl	%cl, %ecx
+# INTEL: {nf}	sar	ecx, cl
+0x62,0xf4,0x7c,0x0c,0xd3,0xf9
+
+# ATT:   sarl	%cl, %ecx, %ecx
+# INTEL: sar	ecx, ecx, cl
+0x62,0xf4,0x74,0x18,0xd3,0xf9
+
+# ATT:   {nf}	sarl	%cl, %ecx, %ecx
+# INTEL: {nf}	sar	ecx, ecx, cl
+0x62,0xf4,0x74,0x1c,0xd3,0xf9
+
+# ATT:   {evex}	sarq	%cl, %r9
+# INTEL: {evex}	sar	r9, cl
+0x62,0xd4,0xfc,0x08,0xd3,0xf9
+
+# ATT:   {nf}	sarq	%cl, %r9
+# INTEL: {nf}	sar	r9, cl
+0x62,0xd4,0xfc,0x0c,0xd3,0xf9
+
+# ATT:   sarq	%cl, %r9, %r9
+# INTEL: sar	r9, r9, cl
+0x62,0xd4,0xb4,0x18,0xd3,0xf9
+
+# ATT:   {nf}	sarq	%cl, %r9, %r9
+# INTEL: {nf}	sar	r9, r9, cl
+0x62,0xd4,0xb4,0x1c,0xd3,0xf9
+
+# ATT:   {evex}	sarb	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	sar	byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x08,0xd2,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	sarb	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	sar	byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x0c,0xd2,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   sarb	%cl, 291(%r8,%rax,4), %bl
+# INTEL: sar	bl, byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x64,0x18,0xd2,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	sarb	%cl, 291(%r8,%rax,4), %bl
+# INTEL: {nf}	sar	bl, byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x64,0x1c,0xd2,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	sarw	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	sar	word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7d,0x08,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	sarw	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	sar	word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7d,0x0c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   sarw	%cl, 291(%r8,%rax,4), %dx
+# INTEL: sar	dx, word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x6d,0x18,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	sarw	%cl, 291(%r8,%rax,4), %dx
+# INTEL: {nf}	sar	dx, word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x6d,0x1c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	sarl	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	sar	dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x08,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	sarl	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	sar	dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x0c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   sarl	%cl, 291(%r8,%rax,4), %ecx
+# INTEL: sar	ecx, dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x74,0x18,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	sarl	%cl, 291(%r8,%rax,4), %ecx
+# INTEL: {nf}	sar	ecx, dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x74,0x1c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	sarq	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	sar	qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xfc,0x08,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	sarq	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	sar	qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xfc,0x0c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   sarq	%cl, 291(%r8,%rax,4), %r9
+# INTEL: sar	r9, qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xb4,0x18,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	sarq	%cl, 291(%r8,%rax,4), %r9
+# INTEL: {nf}	sar	r9, qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xb4,0x1c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	sarw	%dx
+# INTEL: {evex}	sar	dx
+0x62,0xf4,0x7d,0x08,0xd1,0xfa
+
+# ATT:   {nf}	sarw	%dx
+# INTEL: {nf}	sar	dx
+0x62,0xf4,0x7d,0x0c,0xd1,0xfa
+
+# ATT:   sarw	%dx, %dx
+# INTEL: sar	dx, dx
+0x62,0xf4,0x6d,0x18,0xd1,0xfa
+
+# ATT:   {nf}	sarw	%dx, %dx
+# INTEL: {nf}	sar	dx, dx
+0x62,0xf4,0x6d,0x1c,0xd1,0xfa
+
+# ATT:   {evex}	sarl	%ecx
+# INTEL: {evex}	sar	ecx
+0x62,0xf4,0x7c,0x08,0xd1,0xf9
+
+# ATT:   {nf}	sarl	%ecx
+# INTEL: {nf}	sar	ecx
+0x62,0xf4,0x7c,0x0c,0xd1,0xf9
+
+# ATT:   sarl	%ecx, %ecx
+# INTEL: sar	ecx, ecx
+0x62,0xf4,0x74,0x18,0xd1,0xf9
+
+# ATT:   {nf}	sarl	%ecx, %ecx
+# INTEL: {nf}	sar	ecx, ecx
+0x62,0xf4,0x74,0x1c,0xd1,0xf9
+
+# ATT:   {evex}	sarq	%r9
+# INTEL: {evex}	sar	r9
+0x62,0xd4,0xfc,0x08,0xd1,0xf9
+
+# ATT:   {nf}	sarq	%r9
+# INTEL: {nf}	sar	r9
+0x62,0xd4,0xfc,0x0c,0xd1,0xf9
+
+# ATT:   sarq	%r9, %r9
+# INTEL: sar	r9, r9
+0x62,0xd4,0xb4,0x18,0xd1,0xf9
+
+# ATT:   {nf}	sarq	%r9, %r9
+# INTEL: {nf}	sar	r9, r9
+0x62,0xd4,0xb4,0x1c,0xd1,0xf9
+
+# ATT:   {evex}	sarb	291(%r8,%rax,4)
+# INTEL: {evex}	sar	byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x08,0xd0,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	sarb	291(%r8,%rax,4)
+# INTEL: {nf}	sar	byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x0c,0xd0,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   sarb	291(%r8,%rax,4), %bl
+# INTEL: sar	bl, byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x64,0x18,0xd0,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	sarb	291(%r8,%rax,4), %bl
+# INTEL: {nf}	sar	bl, byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x64,0x1c,0xd0,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	sarw	291(%r8,%rax,4)
+# INTEL: {evex}	sar	word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7d,0x08,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	sarw	291(%r8,%rax,4)
+# INTEL: {nf}	sar	word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7d,0x0c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   sarw	291(%r8,%rax,4), %dx
+# INTEL: sar	dx, word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x6d,0x18,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	sarw	291(%r8,%rax,4), %dx
+# INTEL: {nf}	sar	dx, word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x6d,0x1c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	sarl	291(%r8,%rax,4)
+# INTEL: {evex}	sar	dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x08,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	sarl	291(%r8,%rax,4)
+# INTEL: {nf}	sar	dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x0c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   sarl	291(%r8,%rax,4), %ecx
+# INTEL: sar	ecx, dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x74,0x18,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	sarl	291(%r8,%rax,4), %ecx
+# INTEL: {nf}	sar	ecx, dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x74,0x1c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	sarq	291(%r8,%rax,4)
+# INTEL: {evex}	sar	qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xfc,0x08,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	sarq	291(%r8,%rax,4)
+# INTEL: {nf}	sar	qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xfc,0x0c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   sarq	291(%r8,%rax,4), %r9
+# INTEL: sar	r9, qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xb4,0x18,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	sarq	291(%r8,%rax,4), %r9
+# INTEL: {nf}	sar	r9, qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xb4,0x1c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/X86/apx/shl.txt b/llvm/test/MC/Disassembler/X86/apx/shl.txt
new file mode 100644
index 00000000000000..6f0b1b9a2af2e8
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/apx/shl.txt
@@ -0,0 +1,386 @@
+# RUN: llvm-mc -triple x86_64 -disassemble %s | FileCheck %s --check-prefix=ATT
+# RUN: llvm-mc -triple x86_64 -disassemble -output-asm-variant=1 %s | FileCheck %s --check-prefix=INTEL
+
+# ATT:   {evex}	shlb	$123, %bl
+# INTEL: {evex}	shl	bl, 123
+0x62,0xf4,0x7c,0x08,0xc0,0xe3,0x7b
+
+# ATT:   {nf}	shlb	$123, %bl
+# INTEL: {nf}	shl	bl, 123
+0x62,0xf4,0x7c,0x0c,0xc0,0xe3,0x7b
+
+# ATT:   shlb	$123, %bl, %bl
+# INTEL: shl	bl, bl, 123
+0x62,0xf4,0x64,0x18,0xc0,0xe3,0x7b
+
+# ATT:   {nf}	shlb	$123, %bl, %bl
+# INTEL: {nf}	shl	bl, bl, 123
+0x62,0xf4,0x64,0x1c,0xc0,0xe3,0x7b
+
+# ATT:   {evex}	shlw	$123, %dx
+# INTEL: {evex}	shl	dx, 123
+0x62,0xf4,0x7d,0x08,0xc1,0xe2,0x7b
+
+# ATT:   {nf}	shlw	$123, %dx
+# INTEL: {nf}	shl	dx, 123
+0x62,0xf4,0x7d,0x0c,0xc1,0xe2,0x7b
+
+# ATT:   shlw	$123, %dx, %dx
+# INTEL: shl	dx, dx, 123
+0x62,0xf4,0x6d,0x18,0xc1,0xe2,0x7b
+
+# ATT:   {nf}	shlw	$123, %dx, %dx
+# INTEL: {nf}	shl	dx, dx, 123
+0x62,0xf4,0x6d,0x1c,0xc1,0xe2,0x7b
+
+# ATT:   {evex}	shll	$123, %ecx
+# INTEL: {evex}	shl	ecx, 123
+0x62,0xf4,0x7c,0x08,0xc1,0xe1,0x7b
+
+# ATT:   {nf}	shll	$123, %ecx
+# INTEL: {nf}	shl	ecx, 123
+0x62,0xf4,0x7c,0x0c,0xc1,0xe1,0x7b
+
+# ATT:   shll	$123, %ecx, %ecx
+# INTEL: shl	ecx, ecx, 123
+0x62,0xf4,0x74,0x18,0xc1,0xe1,0x7b
+
+# ATT:   {nf}	shll	$123, %ecx, %ecx
+# INTEL: {nf}	shl	ecx, ecx, 123
+0x62,0xf4,0x74,0x1c,0xc1,0xe1,0x7b
+
+# ATT:   {evex}	shlq	$123, %r9
+# INTEL: {evex}	shl	r9, 123
+0x62,0xd4,0xfc,0x08,0xc1,0xe1,0x7b
+
+# ATT:   {nf}	shlq	$123, %r9
+# INTEL: {nf}	shl	r9, 123
+0x62,0xd4,0xfc,0x0c,0xc1,0xe1,0x7b
+
+# ATT:   shlq	$123, %r9, %r9
+# INTEL: shl	r9, r9, 123
+0x62,0xd4,0xb4,0x18,0xc1,0xe1,0x7b
+
+# ATT:   {nf}	shlq	$123, %r9, %r9
+# INTEL: {nf}	shl	r9, r9, 123
+0x62,0xd4,0xb4,0x1c,0xc1,0xe1,0x7b
+
+# ATT:   {evex}	shlb	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	shl	byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x08,0xc0,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shlb	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	shl	byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x0c,0xc0,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   shlb	$123, 291(%r8,%rax,4), %bl
+# INTEL: shl	bl, byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x64,0x18,0xc0,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shlb	$123, 291(%r8,%rax,4), %bl
+# INTEL: {nf}	shl	bl, byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x64,0x1c,0xc0,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	shlw	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	shl	word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7d,0x08,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shlw	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	shl	word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7d,0x0c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   shlw	$123, 291(%r8,%rax,4), %dx
+# INTEL: shl	dx, word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x6d,0x18,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shlw	$123, 291(%r8,%rax,4), %dx
+# INTEL: {nf}	shl	dx, word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x6d,0x1c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	shll	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	shl	dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x08,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shll	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	shl	dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x0c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   shll	$123, 291(%r8,%rax,4), %ecx
+# INTEL: shl	ecx, dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x74,0x18,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shll	$123, 291(%r8,%rax,4), %ecx
+# INTEL: {nf}	shl	ecx, dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x74,0x1c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	shlq	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	shl	qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xfc,0x08,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shlq	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	shl	qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xfc,0x0c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   shlq	$123, 291(%r8,%rax,4), %r9
+# INTEL: shl	r9, qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xb4,0x18,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shlq	$123, 291(%r8,%rax,4), %r9
+# INTEL: {nf}	shl	r9, qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xb4,0x1c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	shlb	%bl
+# INTEL: {evex}	shl	bl
+0x62,0xf4,0x7c,0x08,0xd0,0xe3
+
+# ATT:   {nf}	shlb	%bl
+# INTEL: {nf}	shl	bl
+0x62,0xf4,0x7c,0x0c,0xd0,0xe3
+
+# ATT:   shlb	%bl, %bl
+# INTEL: shl	bl, bl
+0x62,0xf4,0x64,0x18,0xd0,0xe3
+
+# ATT:   {nf}	shlb	%bl, %bl
+# INTEL: {nf}	shl	bl, bl
+0x62,0xf4,0x64,0x1c,0xd0,0xe3
+
+# ATT:   {evex}	shlb	%cl, %bl
+# INTEL: {evex}	shl	bl, cl
+0x62,0xf4,0x7c,0x08,0xd2,0xe3
+
+# ATT:   {nf}	shlb	%cl, %bl
+# INTEL: {nf}	shl	bl, cl
+0x62,0xf4,0x7c,0x0c,0xd2,0xe3
+
+# ATT:   shlb	%cl, %bl, %bl
+# INTEL: shl	bl, bl, cl
+0x62,0xf4,0x64,0x18,0xd2,0xe3
+
+# ATT:   {nf}	shlb	%cl, %bl, %bl
+# INTEL: {nf}	shl	bl, bl, cl
+0x62,0xf4,0x64,0x1c,0xd2,0xe3
+
+# ATT:   {evex}	shlw	%cl, %dx
+# INTEL: {evex}	shl	dx, cl
+0x62,0xf4,0x7d,0x08,0xd3,0xe2
+
+# ATT:   {nf}	shlw	%cl, %dx
+# INTEL: {nf}	shl	dx, cl
+0x62,0xf4,0x7d,0x0c,0xd3,0xe2
+
+# ATT:   shlw	%cl, %dx, %dx
+# INTEL: shl	dx, dx, cl
+0x62,0xf4,0x6d,0x18,0xd3,0xe2
+
+# ATT:   {nf}	shlw	%cl, %dx, %dx
+# INTEL: {nf}	shl	dx, dx, cl
+0x62,0xf4,0x6d,0x1c,0xd3,0xe2
+
+# ATT:   {evex}	shll	%cl, %ecx
+# INTEL: {evex}	shl	ecx, cl
+0x62,0xf4,0x7c,0x08,0xd3,0xe1
+
+# ATT:   {nf}	shll	%cl, %ecx
+# INTEL: {nf}	shl	ecx, cl
+0x62,0xf4,0x7c,0x0c,0xd3,0xe1
+
+# ATT:   shll	%cl, %ecx, %ecx
+# INTEL: shl	ecx, ecx, cl
+0x62,0xf4,0x74,0x18,0xd3,0xe1
+
+# ATT:   {nf}	shll	%cl, %ecx, %ecx
+# INTEL: {nf}	shl	ecx, ecx, cl
+0x62,0xf4,0x74,0x1c,0xd3,0xe1
+
+# ATT:   {evex}	shlq	%cl, %r9
+# INTEL: {evex}	shl	r9, cl
+0x62,0xd4,0xfc,0x08,0xd3,0xe1
+
+# ATT:   {nf}	shlq	%cl, %r9
+# INTEL: {nf}	shl	r9, cl
+0x62,0xd4,0xfc,0x0c,0xd3,0xe1
+
+# ATT:   shlq	%cl, %r9, %r9
+# INTEL: shl	r9, r9, cl
+0x62,0xd4,0xb4,0x18,0xd3,0xe1
+
+# ATT:   {nf}	shlq	%cl, %r9, %r9
+# INTEL: {nf}	shl	r9, r9, cl
+0x62,0xd4,0xb4,0x1c,0xd3,0xe1
+
+# ATT:   {evex}	shlb	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	shl	byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x08,0xd2,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shlb	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	shl	byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x0c,0xd2,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shlb	%cl, 291(%r8,%rax,4), %bl
+# INTEL: shl	bl, byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x64,0x18,0xd2,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shlb	%cl, 291(%r8,%rax,4), %bl
+# INTEL: {nf}	shl	bl, byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x64,0x1c,0xd2,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shlw	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	shl	word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7d,0x08,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shlw	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	shl	word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7d,0x0c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shlw	%cl, 291(%r8,%rax,4), %dx
+# INTEL: shl	dx, word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x6d,0x18,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shlw	%cl, 291(%r8,%rax,4), %dx
+# INTEL: {nf}	shl	dx, word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x6d,0x1c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shll	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	shl	dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x08,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shll	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	shl	dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x0c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shll	%cl, 291(%r8,%rax,4), %ecx
+# INTEL: shl	ecx, dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x74,0x18,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shll	%cl, 291(%r8,%rax,4), %ecx
+# INTEL: {nf}	shl	ecx, dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x74,0x1c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shlq	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	shl	qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xfc,0x08,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shlq	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	shl	qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xfc,0x0c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shlq	%cl, 291(%r8,%rax,4), %r9
+# INTEL: shl	r9, qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xb4,0x18,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shlq	%cl, 291(%r8,%rax,4), %r9
+# INTEL: {nf}	shl	r9, qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xb4,0x1c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shlw	%dx
+# INTEL: {evex}	shl	dx
+0x62,0xf4,0x7d,0x08,0xd1,0xe2
+
+# ATT:   {nf}	shlw	%dx
+# INTEL: {nf}	shl	dx
+0x62,0xf4,0x7d,0x0c,0xd1,0xe2
+
+# ATT:   shlw	%dx, %dx
+# INTEL: shl	dx, dx
+0x62,0xf4,0x6d,0x18,0xd1,0xe2
+
+# ATT:   {nf}	shlw	%dx, %dx
+# INTEL: {nf}	shl	dx, dx
+0x62,0xf4,0x6d,0x1c,0xd1,0xe2
+
+# ATT:   {evex}	shll	%ecx
+# INTEL: {evex}	shl	ecx
+0x62,0xf4,0x7c,0x08,0xd1,0xe1
+
+# ATT:   {nf}	shll	%ecx
+# INTEL: {nf}	shl	ecx
+0x62,0xf4,0x7c,0x0c,0xd1,0xe1
+
+# ATT:   shll	%ecx, %ecx
+# INTEL: shl	ecx, ecx
+0x62,0xf4,0x74,0x18,0xd1,0xe1
+
+# ATT:   {nf}	shll	%ecx, %ecx
+# INTEL: {nf}	shl	ecx, ecx
+0x62,0xf4,0x74,0x1c,0xd1,0xe1
+
+# ATT:   {evex}	shlq	%r9
+# INTEL: {evex}	shl	r9
+0x62,0xd4,0xfc,0x08,0xd1,0xe1
+
+# ATT:   {nf}	shlq	%r9
+# INTEL: {nf}	shl	r9
+0x62,0xd4,0xfc,0x0c,0xd1,0xe1
+
+# ATT:   shlq	%r9, %r9
+# INTEL: shl	r9, r9
+0x62,0xd4,0xb4,0x18,0xd1,0xe1
+
+# ATT:   {nf}	shlq	%r9, %r9
+# INTEL: {nf}	shl	r9, r9
+0x62,0xd4,0xb4,0x1c,0xd1,0xe1
+
+# ATT:   {evex}	shlb	291(%r8,%rax,4)
+# INTEL: {evex}	shl	byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x08,0xd0,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shlb	291(%r8,%rax,4)
+# INTEL: {nf}	shl	byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x0c,0xd0,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shlb	291(%r8,%rax,4), %bl
+# INTEL: shl	bl, byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x64,0x18,0xd0,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shlb	291(%r8,%rax,4), %bl
+# INTEL: {nf}	shl	bl, byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x64,0x1c,0xd0,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shlw	291(%r8,%rax,4)
+# INTEL: {evex}	shl	word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7d,0x08,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shlw	291(%r8,%rax,4)
+# INTEL: {nf}	shl	word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7d,0x0c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shlw	291(%r8,%rax,4), %dx
+# INTEL: shl	dx, word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x6d,0x18,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shlw	291(%r8,%rax,4), %dx
+# INTEL: {nf}	shl	dx, word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x6d,0x1c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shll	291(%r8,%rax,4)
+# INTEL: {evex}	shl	dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x08,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shll	291(%r8,%rax,4)
+# INTEL: {nf}	shl	dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x0c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shll	291(%r8,%rax,4), %ecx
+# INTEL: shl	ecx, dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x74,0x18,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shll	291(%r8,%rax,4), %ecx
+# INTEL: {nf}	shl	ecx, dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x74,0x1c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shlq	291(%r8,%rax,4)
+# INTEL: {evex}	shl	qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xfc,0x08,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shlq	291(%r8,%rax,4)
+# INTEL: {nf}	shl	qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xfc,0x0c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shlq	291(%r8,%rax,4), %r9
+# INTEL: shl	r9, qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xb4,0x18,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shlq	291(%r8,%rax,4), %r9
+# INTEL: {nf}	shl	r9, qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xb4,0x1c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/X86/apx/shld.txt b/llvm/test/MC/Disassembler/X86/apx/shld.txt
new file mode 100644
index 00000000000000..2a633b93960407
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/apx/shld.txt
@@ -0,0 +1,194 @@
+# RUN: llvm-mc -triple x86_64 -disassemble %s | FileCheck %s --check-prefix=ATT
+# RUN: llvm-mc -triple x86_64 -disassemble -output-asm-variant=1 %s | FileCheck %s --check-prefix=INTEL
+
+# ATT:   {evex}	shldw	$123, %dx, %dx
+# INTEL: {evex}	shld	dx, dx, 123
+0x62,0xf4,0x7d,0x08,0x24,0xd2,0x7b
+
+# ATT:   {nf}	shldw	$123, %dx, %dx
+# INTEL: {nf}	shld	dx, dx, 123
+0x62,0xf4,0x7d,0x0c,0x24,0xd2,0x7b
+
+# ATT:   shldw	$123, %dx, %dx, %dx
+# INTEL: shld	dx, dx, dx, 123
+0x62,0xf4,0x6d,0x18,0x24,0xd2,0x7b
+
+# ATT:   {nf}	shldw	$123, %dx, %dx, %dx
+# INTEL: {nf}	shld	dx, dx, dx, 123
+0x62,0xf4,0x6d,0x1c,0x24,0xd2,0x7b
+
+# ATT:   {evex}	shldw	$123, %dx, 291(%r8,%rax,4)
+# INTEL: {evex}	shld	word ptr [r8 + 4*rax + 291], dx, 123
+0x62,0xd4,0x7d,0x08,0x24,0x94,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shldw	$123, %dx, 291(%r8,%rax,4)
+# INTEL: {nf}	shld	word ptr [r8 + 4*rax + 291], dx, 123
+0x62,0xd4,0x7d,0x0c,0x24,0x94,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   shldw	$123, %dx, 291(%r8,%rax,4), %dx
+# INTEL: shld	dx, word ptr [r8 + 4*rax + 291], dx, 123
+0x62,0xd4,0x6d,0x18,0x24,0x94,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shldw	$123, %dx, 291(%r8,%rax,4), %dx
+# INTEL: {nf}	shld	dx, word ptr [r8 + 4*rax + 291], dx, 123
+0x62,0xd4,0x6d,0x1c,0x24,0x94,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	shldl	$123, %ecx, %ecx
+# INTEL: {evex}	shld	ecx, ecx, 123
+0x62,0xf4,0x7c,0x08,0x24,0xc9,0x7b
+
+# ATT:   {nf}	shldl	$123, %ecx, %ecx
+# INTEL: {nf}	shld	ecx, ecx, 123
+0x62,0xf4,0x7c,0x0c,0x24,0xc9,0x7b
+
+# ATT:   shldl	$123, %ecx, %ecx, %ecx
+# INTEL: shld	ecx, ecx, ecx, 123
+0x62,0xf4,0x74,0x18,0x24,0xc9,0x7b
+
+# ATT:   {nf}	shldl	$123, %ecx, %ecx, %ecx
+# INTEL: {nf}	shld	ecx, ecx, ecx, 123
+0x62,0xf4,0x74,0x1c,0x24,0xc9,0x7b
+
+# ATT:   {evex}	shldl	$123, %ecx, 291(%r8,%rax,4)
+# INTEL: {evex}	shld	dword ptr [r8 + 4*rax + 291], ecx, 123
+0x62,0xd4,0x7c,0x08,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shldl	$123, %ecx, 291(%r8,%rax,4)
+# INTEL: {nf}	shld	dword ptr [r8 + 4*rax + 291], ecx, 123
+0x62,0xd4,0x7c,0x0c,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   shldl	$123, %ecx, 291(%r8,%rax,4), %ecx
+# INTEL: shld	ecx, dword ptr [r8 + 4*rax + 291], ecx, 123
+0x62,0xd4,0x74,0x18,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shldl	$123, %ecx, 291(%r8,%rax,4), %ecx
+# INTEL: {nf}	shld	ecx, dword ptr [r8 + 4*rax + 291], ecx, 123
+0x62,0xd4,0x74,0x1c,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	shldq	$123, %r9, %r9
+# INTEL: {evex}	shld	r9, r9, 123
+0x62,0x54,0xfc,0x08,0x24,0xc9,0x7b
+
+# ATT:   {nf}	shldq	$123, %r9, %r9
+# INTEL: {nf}	shld	r9, r9, 123
+0x62,0x54,0xfc,0x0c,0x24,0xc9,0x7b
+
+# ATT:   shldq	$123, %r9, %r9, %r9
+# INTEL: shld	r9, r9, r9, 123
+0x62,0x54,0xb4,0x18,0x24,0xc9,0x7b
+
+# ATT:   {nf}	shldq	$123, %r9, %r9, %r9
+# INTEL: {nf}	shld	r9, r9, r9, 123
+0x62,0x54,0xb4,0x1c,0x24,0xc9,0x7b
+
+# ATT:   {evex}	shldq	$123, %r9, 291(%r8,%rax,4)
+# INTEL: {evex}	shld	qword ptr [r8 + 4*rax + 291], r9, 123
+0x62,0x54,0xfc,0x08,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shldq	$123, %r9, 291(%r8,%rax,4)
+# INTEL: {nf}	shld	qword ptr [r8 + 4*rax + 291], r9, 123
+0x62,0x54,0xfc,0x0c,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   shldq	$123, %r9, 291(%r8,%rax,4), %r9
+# INTEL: shld	r9, qword ptr [r8 + 4*rax + 291], r9, 123
+0x62,0x54,0xb4,0x18,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shldq	$123, %r9, 291(%r8,%rax,4), %r9
+# INTEL: {nf}	shld	r9, qword ptr [r8 + 4*rax + 291], r9, 123
+0x62,0x54,0xb4,0x1c,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	shldw	%cl, %dx, %dx
+# INTEL: {evex}	shld	dx, dx, cl
+0x62,0xf4,0x7d,0x08,0xa5,0xd2
+
+# ATT:   {nf}	shldw	%cl, %dx, %dx
+# INTEL: {nf}	shld	dx, dx, cl
+0x62,0xf4,0x7d,0x0c,0xa5,0xd2
+
+# ATT:   shldw	%cl, %dx, %dx, %dx
+# INTEL: shld	dx, dx, dx, cl
+0x62,0xf4,0x6d,0x18,0xa5,0xd2
+
+# ATT:   {nf}	shldw	%cl, %dx, %dx, %dx
+# INTEL: {nf}	shld	dx, dx, dx, cl
+0x62,0xf4,0x6d,0x1c,0xa5,0xd2
+
+# ATT:   {evex}	shldw	%cl, %dx, 291(%r8,%rax,4)
+# INTEL: {evex}	shld	word ptr [r8 + 4*rax + 291], dx, cl
+0x62,0xd4,0x7d,0x08,0xa5,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shldw	%cl, %dx, 291(%r8,%rax,4)
+# INTEL: {nf}	shld	word ptr [r8 + 4*rax + 291], dx, cl
+0x62,0xd4,0x7d,0x0c,0xa5,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shldw	%cl, %dx, 291(%r8,%rax,4), %dx
+# INTEL: shld	dx, word ptr [r8 + 4*rax + 291], dx, cl
+0x62,0xd4,0x6d,0x18,0xa5,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shldw	%cl, %dx, 291(%r8,%rax,4), %dx
+# INTEL: {nf}	shld	dx, word ptr [r8 + 4*rax + 291], dx, cl
+0x62,0xd4,0x6d,0x1c,0xa5,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shldl	%cl, %ecx, %ecx
+# INTEL: {evex}	shld	ecx, ecx, cl
+0x62,0xf4,0x7c,0x08,0xa5,0xc9
+
+# ATT:   {nf}	shldl	%cl, %ecx, %ecx
+# INTEL: {nf}	shld	ecx, ecx, cl
+0x62,0xf4,0x7c,0x0c,0xa5,0xc9
+
+# ATT:   shldl	%cl, %ecx, %ecx, %ecx
+# INTEL: shld	ecx, ecx, ecx, cl
+0x62,0xf4,0x74,0x18,0xa5,0xc9
+
+# ATT:   {nf}	shldl	%cl, %ecx, %ecx, %ecx
+# INTEL: {nf}	shld	ecx, ecx, ecx, cl
+0x62,0xf4,0x74,0x1c,0xa5,0xc9
+
+# ATT:   {evex}	shldl	%cl, %ecx, 291(%r8,%rax,4)
+# INTEL: {evex}	shld	dword ptr [r8 + 4*rax + 291], ecx, cl
+0x62,0xd4,0x7c,0x08,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shldl	%cl, %ecx, 291(%r8,%rax,4)
+# INTEL: {nf}	shld	dword ptr [r8 + 4*rax + 291], ecx, cl
+0x62,0xd4,0x7c,0x0c,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shldl	%cl, %ecx, 291(%r8,%rax,4), %ecx
+# INTEL: shld	ecx, dword ptr [r8 + 4*rax + 291], ecx, cl
+0x62,0xd4,0x74,0x18,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shldl	%cl, %ecx, 291(%r8,%rax,4), %ecx
+# INTEL: {nf}	shld	ecx, dword ptr [r8 + 4*rax + 291], ecx, cl
+0x62,0xd4,0x74,0x1c,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shldq	%cl, %r9, %r9
+# INTEL: {evex}	shld	r9, r9, cl
+0x62,0x54,0xfc,0x08,0xa5,0xc9
+
+# ATT:   {nf}	shldq	%cl, %r9, %r9
+# INTEL: {nf}	shld	r9, r9, cl
+0x62,0x54,0xfc,0x0c,0xa5,0xc9
+
+# ATT:   shldq	%cl, %r9, %r9, %r9
+# INTEL: shld	r9, r9, r9, cl
+0x62,0x54,0xb4,0x18,0xa5,0xc9
+
+# ATT:   {nf}	shldq	%cl, %r9, %r9, %r9
+# INTEL: {nf}	shld	r9, r9, r9, cl
+0x62,0x54,0xb4,0x1c,0xa5,0xc9
+
+# ATT:   {evex}	shldq	%cl, %r9, 291(%r8,%rax,4)
+# INTEL: {evex}	shld	qword ptr [r8 + 4*rax + 291], r9, cl
+0x62,0x54,0xfc,0x08,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shldq	%cl, %r9, 291(%r8,%rax,4)
+# INTEL: {nf}	shld	qword ptr [r8 + 4*rax + 291], r9, cl
+0x62,0x54,0xfc,0x0c,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shldq	%cl, %r9, 291(%r8,%rax,4), %r9
+# INTEL: shld	r9, qword ptr [r8 + 4*rax + 291], r9, cl
+0x62,0x54,0xb4,0x18,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shldq	%cl, %r9, 291(%r8,%rax,4), %r9
+# INTEL: {nf}	shld	r9, qword ptr [r8 + 4*rax + 291], r9, cl
+0x62,0x54,0xb4,0x1c,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/X86/apx/shr.txt b/llvm/test/MC/Disassembler/X86/apx/shr.txt
new file mode 100644
index 00000000000000..1e7e1732b56fb7
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/apx/shr.txt
@@ -0,0 +1,386 @@
+# RUN: llvm-mc -triple x86_64 -disassemble %s | FileCheck %s --check-prefix=ATT
+# RUN: llvm-mc -triple x86_64 -disassemble -output-asm-variant=1 %s | FileCheck %s --check-prefix=INTEL
+
+# ATT:   {evex}	shrb	$123, %bl
+# INTEL: {evex}	shr	bl, 123
+0x62,0xf4,0x7c,0x08,0xc0,0xeb,0x7b
+
+# ATT:   {nf}	shrb	$123, %bl
+# INTEL: {nf}	shr	bl, 123
+0x62,0xf4,0x7c,0x0c,0xc0,0xeb,0x7b
+
+# ATT:   shrb	$123, %bl, %bl
+# INTEL: shr	bl, bl, 123
+0x62,0xf4,0x64,0x18,0xc0,0xeb,0x7b
+
+# ATT:   {nf}	shrb	$123, %bl, %bl
+# INTEL: {nf}	shr	bl, bl, 123
+0x62,0xf4,0x64,0x1c,0xc0,0xeb,0x7b
+
+# ATT:   {evex}	shrw	$123, %dx
+# INTEL: {evex}	shr	dx, 123
+0x62,0xf4,0x7d,0x08,0xc1,0xea,0x7b
+
+# ATT:   {nf}	shrw	$123, %dx
+# INTEL: {nf}	shr	dx, 123
+0x62,0xf4,0x7d,0x0c,0xc1,0xea,0x7b
+
+# ATT:   shrw	$123, %dx, %dx
+# INTEL: shr	dx, dx, 123
+0x62,0xf4,0x6d,0x18,0xc1,0xea,0x7b
+
+# ATT:   {nf}	shrw	$123, %dx, %dx
+# INTEL: {nf}	shr	dx, dx, 123
+0x62,0xf4,0x6d,0x1c,0xc1,0xea,0x7b
+
+# ATT:   {evex}	shrl	$123, %ecx
+# INTEL: {evex}	shr	ecx, 123
+0x62,0xf4,0x7c,0x08,0xc1,0xe9,0x7b
+
+# ATT:   {nf}	shrl	$123, %ecx
+# INTEL: {nf}	shr	ecx, 123
+0x62,0xf4,0x7c,0x0c,0xc1,0xe9,0x7b
+
+# ATT:   shrl	$123, %ecx, %ecx
+# INTEL: shr	ecx, ecx, 123
+0x62,0xf4,0x74,0x18,0xc1,0xe9,0x7b
+
+# ATT:   {nf}	shrl	$123, %ecx, %ecx
+# INTEL: {nf}	shr	ecx, ecx, 123
+0x62,0xf4,0x74,0x1c,0xc1,0xe9,0x7b
+
+# ATT:   {evex}	shrq	$123, %r9
+# INTEL: {evex}	shr	r9, 123
+0x62,0xd4,0xfc,0x08,0xc1,0xe9,0x7b
+
+# ATT:   {nf}	shrq	$123, %r9
+# INTEL: {nf}	shr	r9, 123
+0x62,0xd4,0xfc,0x0c,0xc1,0xe9,0x7b
+
+# ATT:   shrq	$123, %r9, %r9
+# INTEL: shr	r9, r9, 123
+0x62,0xd4,0xb4,0x18,0xc1,0xe9,0x7b
+
+# ATT:   {nf}	shrq	$123, %r9, %r9
+# INTEL: {nf}	shr	r9, r9, 123
+0x62,0xd4,0xb4,0x1c,0xc1,0xe9,0x7b
+
+# ATT:   {evex}	shrb	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	shr	byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x08,0xc0,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shrb	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	shr	byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x0c,0xc0,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   shrb	$123, 291(%r8,%rax,4), %bl
+# INTEL: shr	bl, byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x64,0x18,0xc0,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shrb	$123, 291(%r8,%rax,4), %bl
+# INTEL: {nf}	shr	bl, byte ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x64,0x1c,0xc0,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	shrw	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	shr	word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7d,0x08,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shrw	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	shr	word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7d,0x0c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   shrw	$123, 291(%r8,%rax,4), %dx
+# INTEL: shr	dx, word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x6d,0x18,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shrw	$123, 291(%r8,%rax,4), %dx
+# INTEL: {nf}	shr	dx, word ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x6d,0x1c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	shrl	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	shr	dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x08,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shrl	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	shr	dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x7c,0x0c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   shrl	$123, 291(%r8,%rax,4), %ecx
+# INTEL: shr	ecx, dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x74,0x18,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shrl	$123, 291(%r8,%rax,4), %ecx
+# INTEL: {nf}	shr	ecx, dword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0x74,0x1c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	shrq	$123, 291(%r8,%rax,4)
+# INTEL: {evex}	shr	qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xfc,0x08,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shrq	$123, 291(%r8,%rax,4)
+# INTEL: {nf}	shr	qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xfc,0x0c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   shrq	$123, 291(%r8,%rax,4), %r9
+# INTEL: shr	r9, qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xb4,0x18,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shrq	$123, 291(%r8,%rax,4), %r9
+# INTEL: {nf}	shr	r9, qword ptr [r8 + 4*rax + 291], 123
+0x62,0xd4,0xb4,0x1c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	shrb	%bl
+# INTEL: {evex}	shr	bl
+0x62,0xf4,0x7c,0x08,0xd0,0xeb
+
+# ATT:   {nf}	shrb	%bl
+# INTEL: {nf}	shr	bl
+0x62,0xf4,0x7c,0x0c,0xd0,0xeb
+
+# ATT:   shrb	%bl, %bl
+# INTEL: shr	bl, bl
+0x62,0xf4,0x64,0x18,0xd0,0xeb
+
+# ATT:   {nf}	shrb	%bl, %bl
+# INTEL: {nf}	shr	bl, bl
+0x62,0xf4,0x64,0x1c,0xd0,0xeb
+
+# ATT:   {evex}	shrb	%cl, %bl
+# INTEL: {evex}	shr	bl, cl
+0x62,0xf4,0x7c,0x08,0xd2,0xeb
+
+# ATT:   {nf}	shrb	%cl, %bl
+# INTEL: {nf}	shr	bl, cl
+0x62,0xf4,0x7c,0x0c,0xd2,0xeb
+
+# ATT:   shrb	%cl, %bl, %bl
+# INTEL: shr	bl, bl, cl
+0x62,0xf4,0x64,0x18,0xd2,0xeb
+
+# ATT:   {nf}	shrb	%cl, %bl, %bl
+# INTEL: {nf}	shr	bl, bl, cl
+0x62,0xf4,0x64,0x1c,0xd2,0xeb
+
+# ATT:   {evex}	shrw	%cl, %dx
+# INTEL: {evex}	shr	dx, cl
+0x62,0xf4,0x7d,0x08,0xd3,0xea
+
+# ATT:   {nf}	shrw	%cl, %dx
+# INTEL: {nf}	shr	dx, cl
+0x62,0xf4,0x7d,0x0c,0xd3,0xea
+
+# ATT:   shrw	%cl, %dx, %dx
+# INTEL: shr	dx, dx, cl
+0x62,0xf4,0x6d,0x18,0xd3,0xea
+
+# ATT:   {nf}	shrw	%cl, %dx, %dx
+# INTEL: {nf}	shr	dx, dx, cl
+0x62,0xf4,0x6d,0x1c,0xd3,0xea
+
+# ATT:   {evex}	shrl	%cl, %ecx
+# INTEL: {evex}	shr	ecx, cl
+0x62,0xf4,0x7c,0x08,0xd3,0xe9
+
+# ATT:   {nf}	shrl	%cl, %ecx
+# INTEL: {nf}	shr	ecx, cl
+0x62,0xf4,0x7c,0x0c,0xd3,0xe9
+
+# ATT:   shrl	%cl, %ecx, %ecx
+# INTEL: shr	ecx, ecx, cl
+0x62,0xf4,0x74,0x18,0xd3,0xe9
+
+# ATT:   {nf}	shrl	%cl, %ecx, %ecx
+# INTEL: {nf}	shr	ecx, ecx, cl
+0x62,0xf4,0x74,0x1c,0xd3,0xe9
+
+# ATT:   {evex}	shrq	%cl, %r9
+# INTEL: {evex}	shr	r9, cl
+0x62,0xd4,0xfc,0x08,0xd3,0xe9
+
+# ATT:   {nf}	shrq	%cl, %r9
+# INTEL: {nf}	shr	r9, cl
+0x62,0xd4,0xfc,0x0c,0xd3,0xe9
+
+# ATT:   shrq	%cl, %r9, %r9
+# INTEL: shr	r9, r9, cl
+0x62,0xd4,0xb4,0x18,0xd3,0xe9
+
+# ATT:   {nf}	shrq	%cl, %r9, %r9
+# INTEL: {nf}	shr	r9, r9, cl
+0x62,0xd4,0xb4,0x1c,0xd3,0xe9
+
+# ATT:   {evex}	shrb	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	shr	byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x08,0xd2,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrb	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	shr	byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x0c,0xd2,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shrb	%cl, 291(%r8,%rax,4), %bl
+# INTEL: shr	bl, byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x64,0x18,0xd2,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrb	%cl, 291(%r8,%rax,4), %bl
+# INTEL: {nf}	shr	bl, byte ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x64,0x1c,0xd2,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shrw	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	shr	word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7d,0x08,0xd3,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrw	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	shr	word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7d,0x0c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shrw	%cl, 291(%r8,%rax,4), %dx
+# INTEL: shr	dx, word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x6d,0x18,0xd3,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrw	%cl, 291(%r8,%rax,4), %dx
+# INTEL: {nf}	shr	dx, word ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x6d,0x1c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shrl	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	shr	dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x08,0xd3,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrl	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	shr	dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x7c,0x0c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shrl	%cl, 291(%r8,%rax,4), %ecx
+# INTEL: shr	ecx, dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x74,0x18,0xd3,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrl	%cl, 291(%r8,%rax,4), %ecx
+# INTEL: {nf}	shr	ecx, dword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0x74,0x1c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shrq	%cl, 291(%r8,%rax,4)
+# INTEL: {evex}	shr	qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xfc,0x08,0xd3,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrq	%cl, 291(%r8,%rax,4)
+# INTEL: {nf}	shr	qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xfc,0x0c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shrq	%cl, 291(%r8,%rax,4), %r9
+# INTEL: shr	r9, qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xb4,0x18,0xd3,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrq	%cl, 291(%r8,%rax,4), %r9
+# INTEL: {nf}	shr	r9, qword ptr [r8 + 4*rax + 291], cl
+0x62,0xd4,0xb4,0x1c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shrw	%dx
+# INTEL: {evex}	shr	dx
+0x62,0xf4,0x7d,0x08,0xd1,0xea
+
+# ATT:   {nf}	shrw	%dx
+# INTEL: {nf}	shr	dx
+0x62,0xf4,0x7d,0x0c,0xd1,0xea
+
+# ATT:   shrw	%dx, %dx
+# INTEL: shr	dx, dx
+0x62,0xf4,0x6d,0x18,0xd1,0xea
+
+# ATT:   {nf}	shrw	%dx, %dx
+# INTEL: {nf}	shr	dx, dx
+0x62,0xf4,0x6d,0x1c,0xd1,0xea
+
+# ATT:   {evex}	shrl	%ecx
+# INTEL: {evex}	shr	ecx
+0x62,0xf4,0x7c,0x08,0xd1,0xe9
+
+# ATT:   {nf}	shrl	%ecx
+# INTEL: {nf}	shr	ecx
+0x62,0xf4,0x7c,0x0c,0xd1,0xe9
+
+# ATT:   shrl	%ecx, %ecx
+# INTEL: shr	ecx, ecx
+0x62,0xf4,0x74,0x18,0xd1,0xe9
+
+# ATT:   {nf}	shrl	%ecx, %ecx
+# INTEL: {nf}	shr	ecx, ecx
+0x62,0xf4,0x74,0x1c,0xd1,0xe9
+
+# ATT:   {evex}	shrq	%r9
+# INTEL: {evex}	shr	r9
+0x62,0xd4,0xfc,0x08,0xd1,0xe9
+
+# ATT:   {nf}	shrq	%r9
+# INTEL: {nf}	shr	r9
+0x62,0xd4,0xfc,0x0c,0xd1,0xe9
+
+# ATT:   shrq	%r9, %r9
+# INTEL: shr	r9, r9
+0x62,0xd4,0xb4,0x18,0xd1,0xe9
+
+# ATT:   {nf}	shrq	%r9, %r9
+# INTEL: {nf}	shr	r9, r9
+0x62,0xd4,0xb4,0x1c,0xd1,0xe9
+
+# ATT:   {evex}	shrb	291(%r8,%rax,4)
+# INTEL: {evex}	shr	byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x08,0xd0,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrb	291(%r8,%rax,4)
+# INTEL: {nf}	shr	byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x0c,0xd0,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shrb	291(%r8,%rax,4), %bl
+# INTEL: shr	bl, byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x64,0x18,0xd0,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrb	291(%r8,%rax,4), %bl
+# INTEL: {nf}	shr	bl, byte ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x64,0x1c,0xd0,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shrw	291(%r8,%rax,4)
+# INTEL: {evex}	shr	word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7d,0x08,0xd1,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrw	291(%r8,%rax,4)
+# INTEL: {nf}	shr	word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7d,0x0c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shrw	291(%r8,%rax,4), %dx
+# INTEL: shr	dx, word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x6d,0x18,0xd1,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrw	291(%r8,%rax,4), %dx
+# INTEL: {nf}	shr	dx, word ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x6d,0x1c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shrl	291(%r8,%rax,4)
+# INTEL: {evex}	shr	dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x08,0xd1,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrl	291(%r8,%rax,4)
+# INTEL: {nf}	shr	dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x7c,0x0c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shrl	291(%r8,%rax,4), %ecx
+# INTEL: shr	ecx, dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x74,0x18,0xd1,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrl	291(%r8,%rax,4), %ecx
+# INTEL: {nf}	shr	ecx, dword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0x74,0x1c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shrq	291(%r8,%rax,4)
+# INTEL: {evex}	shr	qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xfc,0x08,0xd1,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrq	291(%r8,%rax,4)
+# INTEL: {nf}	shr	qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xfc,0x0c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shrq	291(%r8,%rax,4), %r9
+# INTEL: shr	r9, qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xb4,0x18,0xd1,0xac,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrq	291(%r8,%rax,4), %r9
+# INTEL: {nf}	shr	r9, qword ptr [r8 + 4*rax + 291]
+0x62,0xd4,0xb4,0x1c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/X86/apx/shrd.txt b/llvm/test/MC/Disassembler/X86/apx/shrd.txt
new file mode 100644
index 00000000000000..4f71bfda77f7de
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/apx/shrd.txt
@@ -0,0 +1,194 @@
+# RUN: llvm-mc -triple x86_64 -disassemble %s | FileCheck %s --check-prefix=ATT
+# RUN: llvm-mc -triple x86_64 -disassemble -output-asm-variant=1 %s | FileCheck %s --check-prefix=INTEL
+
+# ATT:   {evex}	shrdw	$123, %dx, %dx
+# INTEL: {evex}	shrd	dx, dx, 123
+0x62,0xf4,0x7d,0x08,0x2c,0xd2,0x7b
+
+# ATT:   {nf}	shrdw	$123, %dx, %dx
+# INTEL: {nf}	shrd	dx, dx, 123
+0x62,0xf4,0x7d,0x0c,0x2c,0xd2,0x7b
+
+# ATT:   shrdw	$123, %dx, %dx, %dx
+# INTEL: shrd	dx, dx, dx, 123
+0x62,0xf4,0x6d,0x18,0x2c,0xd2,0x7b
+
+# ATT:   {nf}	shrdw	$123, %dx, %dx, %dx
+# INTEL: {nf}	shrd	dx, dx, dx, 123
+0x62,0xf4,0x6d,0x1c,0x2c,0xd2,0x7b
+
+# ATT:   {evex}	shrdw	$123, %dx, 291(%r8,%rax,4)
+# INTEL: {evex}	shrd	word ptr [r8 + 4*rax + 291], dx, 123
+0x62,0xd4,0x7d,0x08,0x2c,0x94,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shrdw	$123, %dx, 291(%r8,%rax,4)
+# INTEL: {nf}	shrd	word ptr [r8 + 4*rax + 291], dx, 123
+0x62,0xd4,0x7d,0x0c,0x2c,0x94,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   shrdw	$123, %dx, 291(%r8,%rax,4), %dx
+# INTEL: shrd	dx, word ptr [r8 + 4*rax + 291], dx, 123
+0x62,0xd4,0x6d,0x18,0x2c,0x94,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shrdw	$123, %dx, 291(%r8,%rax,4), %dx
+# INTEL: {nf}	shrd	dx, word ptr [r8 + 4*rax + 291], dx, 123
+0x62,0xd4,0x6d,0x1c,0x2c,0x94,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	shrdl	$123, %ecx, %ecx
+# INTEL: {evex}	shrd	ecx, ecx, 123
+0x62,0xf4,0x7c,0x08,0x2c,0xc9,0x7b
+
+# ATT:   {nf}	shrdl	$123, %ecx, %ecx
+# INTEL: {nf}	shrd	ecx, ecx, 123
+0x62,0xf4,0x7c,0x0c,0x2c,0xc9,0x7b
+
+# ATT:   shrdl	$123, %ecx, %ecx, %ecx
+# INTEL: shrd	ecx, ecx, ecx, 123
+0x62,0xf4,0x74,0x18,0x2c,0xc9,0x7b
+
+# ATT:   {nf}	shrdl	$123, %ecx, %ecx, %ecx
+# INTEL: {nf}	shrd	ecx, ecx, ecx, 123
+0x62,0xf4,0x74,0x1c,0x2c,0xc9,0x7b
+
+# ATT:   {evex}	shrdl	$123, %ecx, 291(%r8,%rax,4)
+# INTEL: {evex}	shrd	dword ptr [r8 + 4*rax + 291], ecx, 123
+0x62,0xd4,0x7c,0x08,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shrdl	$123, %ecx, 291(%r8,%rax,4)
+# INTEL: {nf}	shrd	dword ptr [r8 + 4*rax + 291], ecx, 123
+0x62,0xd4,0x7c,0x0c,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   shrdl	$123, %ecx, 291(%r8,%rax,4), %ecx
+# INTEL: shrd	ecx, dword ptr [r8 + 4*rax + 291], ecx, 123
+0x62,0xd4,0x74,0x18,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shrdl	$123, %ecx, 291(%r8,%rax,4), %ecx
+# INTEL: {nf}	shrd	ecx, dword ptr [r8 + 4*rax + 291], ecx, 123
+0x62,0xd4,0x74,0x1c,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	shrdq	$123, %r9, %r9
+# INTEL: {evex}	shrd	r9, r9, 123
+0x62,0x54,0xfc,0x08,0x2c,0xc9,0x7b
+
+# ATT:   {nf}	shrdq	$123, %r9, %r9
+# INTEL: {nf}	shrd	r9, r9, 123
+0x62,0x54,0xfc,0x0c,0x2c,0xc9,0x7b
+
+# ATT:   shrdq	$123, %r9, %r9, %r9
+# INTEL: shrd	r9, r9, r9, 123
+0x62,0x54,0xb4,0x18,0x2c,0xc9,0x7b
+
+# ATT:   {nf}	shrdq	$123, %r9, %r9, %r9
+# INTEL: {nf}	shrd	r9, r9, r9, 123
+0x62,0x54,0xb4,0x1c,0x2c,0xc9,0x7b
+
+# ATT:   {evex}	shrdq	$123, %r9, 291(%r8,%rax,4)
+# INTEL: {evex}	shrd	qword ptr [r8 + 4*rax + 291], r9, 123
+0x62,0x54,0xfc,0x08,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shrdq	$123, %r9, 291(%r8,%rax,4)
+# INTEL: {nf}	shrd	qword ptr [r8 + 4*rax + 291], r9, 123
+0x62,0x54,0xfc,0x0c,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   shrdq	$123, %r9, 291(%r8,%rax,4), %r9
+# INTEL: shrd	r9, qword ptr [r8 + 4*rax + 291], r9, 123
+0x62,0x54,0xb4,0x18,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {nf}	shrdq	$123, %r9, 291(%r8,%rax,4), %r9
+# INTEL: {nf}	shrd	r9, qword ptr [r8 + 4*rax + 291], r9, 123
+0x62,0x54,0xb4,0x1c,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT:   {evex}	shrdw	%cl, %dx, %dx
+# INTEL: {evex}	shrd	dx, dx, cl
+0x62,0xf4,0x7d,0x08,0xad,0xd2
+
+# ATT:   {nf}	shrdw	%cl, %dx, %dx
+# INTEL: {nf}	shrd	dx, dx, cl
+0x62,0xf4,0x7d,0x0c,0xad,0xd2
+
+# ATT:   shrdw	%cl, %dx, %dx, %dx
+# INTEL: shrd	dx, dx, dx, cl
+0x62,0xf4,0x6d,0x18,0xad,0xd2
+
+# ATT:   {nf}	shrdw	%cl, %dx, %dx, %dx
+# INTEL: {nf}	shrd	dx, dx, dx, cl
+0x62,0xf4,0x6d,0x1c,0xad,0xd2
+
+# ATT:   {evex}	shrdw	%cl, %dx, 291(%r8,%rax,4)
+# INTEL: {evex}	shrd	word ptr [r8 + 4*rax + 291], dx, cl
+0x62,0xd4,0x7d,0x08,0xad,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrdw	%cl, %dx, 291(%r8,%rax,4)
+# INTEL: {nf}	shrd	word ptr [r8 + 4*rax + 291], dx, cl
+0x62,0xd4,0x7d,0x0c,0xad,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shrdw	%cl, %dx, 291(%r8,%rax,4), %dx
+# INTEL: shrd	dx, word ptr [r8 + 4*rax + 291], dx, cl
+0x62,0xd4,0x6d,0x18,0xad,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrdw	%cl, %dx, 291(%r8,%rax,4), %dx
+# INTEL: {nf}	shrd	dx, word ptr [r8 + 4*rax + 291], dx, cl
+0x62,0xd4,0x6d,0x1c,0xad,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shrdl	%cl, %ecx, %ecx
+# INTEL: {evex}	shrd	ecx, ecx, cl
+0x62,0xf4,0x7c,0x08,0xad,0xc9
+
+# ATT:   {nf}	shrdl	%cl, %ecx, %ecx
+# INTEL: {nf}	shrd	ecx, ecx, cl
+0x62,0xf4,0x7c,0x0c,0xad,0xc9
+
+# ATT:   shrdl	%cl, %ecx, %ecx, %ecx
+# INTEL: shrd	ecx, ecx, ecx, cl
+0x62,0xf4,0x74,0x18,0xad,0xc9
+
+# ATT:   {nf}	shrdl	%cl, %ecx, %ecx, %ecx
+# INTEL: {nf}	shrd	ecx, ecx, ecx, cl
+0x62,0xf4,0x74,0x1c,0xad,0xc9
+
+# ATT:   {evex}	shrdl	%cl, %ecx, 291(%r8,%rax,4)
+# INTEL: {evex}	shrd	dword ptr [r8 + 4*rax + 291], ecx, cl
+0x62,0xd4,0x7c,0x08,0xad,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrdl	%cl, %ecx, 291(%r8,%rax,4)
+# INTEL: {nf}	shrd	dword ptr [r8 + 4*rax + 291], ecx, cl
+0x62,0xd4,0x7c,0x0c,0xad,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shrdl	%cl, %ecx, 291(%r8,%rax,4), %ecx
+# INTEL: shrd	ecx, dword ptr [r8 + 4*rax + 291], ecx, cl
+0x62,0xd4,0x74,0x18,0xad,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrdl	%cl, %ecx, 291(%r8,%rax,4), %ecx
+# INTEL: {nf}	shrd	ecx, dword ptr [r8 + 4*rax + 291], ecx, cl
+0x62,0xd4,0x74,0x1c,0xad,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {evex}	shrdq	%cl, %r9, %r9
+# INTEL: {evex}	shrd	r9, r9, cl
+0x62,0x54,0xfc,0x08,0xad,0xc9
+
+# ATT:   {nf}	shrdq	%cl, %r9, %r9
+# INTEL: {nf}	shrd	r9, r9, cl
+0x62,0x54,0xfc,0x0c,0xad,0xc9
+
+# ATT:   shrdq	%cl, %r9, %r9, %r9
+# INTEL: shrd	r9, r9, r9, cl
+0x62,0x54,0xb4,0x18,0xad,0xc9
+
+# ATT:   {nf}	shrdq	%cl, %r9, %r9, %r9
+# INTEL: {nf}	shrd	r9, r9, r9, cl
+0x62,0x54,0xb4,0x1c,0xad,0xc9
+
+# ATT:   {evex}	shrdq	%cl, %r9, 291(%r8,%rax,4)
+# INTEL: {evex}	shrd	qword ptr [r8 + 4*rax + 291], r9, cl
+0x62,0x54,0xfc,0x08,0xad,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrdq	%cl, %r9, 291(%r8,%rax,4)
+# INTEL: {nf}	shrd	qword ptr [r8 + 4*rax + 291], r9, cl
+0x62,0x54,0xfc,0x0c,0xad,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   shrdq	%cl, %r9, 291(%r8,%rax,4), %r9
+# INTEL: shrd	r9, qword ptr [r8 + 4*rax + 291], r9, cl
+0x62,0x54,0xb4,0x18,0xad,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {nf}	shrdq	%cl, %r9, 291(%r8,%rax,4), %r9
+# INTEL: {nf}	shrd	r9, qword ptr [r8 + 4*rax + 291], r9, cl
+0x62,0x54,0xb4,0x1c,0xad,0x8c,0x80,0x23,0x01,0x00,0x00
diff --git a/llvm/test/MC/X86/apx/rcl-att.s b/llvm/test/MC/X86/apx/rcl-att.s
new file mode 100644
index 00000000000000..68bbb970a1ec42
--- /dev/null
+++ b/llvm/test/MC/X86/apx/rcl-att.s
@@ -0,0 +1,146 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s
+# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+# ERROR-COUNT-47: error:
+# ERROR-NOT: error:
+# CHECK: {evex}	rclb	$123, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc0,0xd3,0x7b]
+         {evex}	rclb	$123, %bl
+# CHECK: rclb	$123, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xc0,0xd3,0x7b]
+         rclb	$123, %bl, %bl
+# CHECK: {evex}	rclw	$123, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xc1,0xd2,0x7b]
+         {evex}	rclw	$123, %dx
+# CHECK: rclw	$123, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xc1,0xd2,0x7b]
+         rclw	$123, %dx, %dx
+# CHECK: {evex}	rcll	$123, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc1,0xd1,0x7b]
+         {evex}	rcll	$123, %ecx
+# CHECK: rcll	$123, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xc1,0xd1,0x7b]
+         rcll	$123, %ecx, %ecx
+# CHECK: {evex}	rclq	$123, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xd1,0x7b]
+         {evex}	rclq	$123, %r9
+# CHECK: rclq	$123, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xd1,0x7b]
+         rclq	$123, %r9, %r9
+# CHECK: {evex}	rclb	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc0,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rclb	$123, 291(%r8,%rax,4)
+# CHECK: rclb	$123, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xc0,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rclb	$123, 291(%r8,%rax,4), %bl
+# CHECK: {evex}	rclw	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rclw	$123, 291(%r8,%rax,4)
+# CHECK: rclw	$123, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rclw	$123, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	rcll	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rcll	$123, 291(%r8,%rax,4)
+# CHECK: rcll	$123, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rcll	$123, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	rclq	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rclq	$123, 291(%r8,%rax,4)
+# CHECK: rclq	$123, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rclq	$123, 291(%r8,%rax,4), %r9
+# CHECK: {evex}	rclb	%bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd0,0xd3]
+         {evex}	rclb	%bl
+# CHECK: {evex}	rclb	%cl, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd2,0xd3]
+         {evex}	rclb	%cl, %bl
+# CHECK: rclb	%cl, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xd2,0xd3]
+         rclb	%cl, %bl, %bl
+# CHECK: {evex}	rclw	%cl, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd3,0xd2]
+         {evex}	rclw	%cl, %dx
+# CHECK: rclw	%cl, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd3,0xd2]
+         rclw	%cl, %dx, %dx
+# CHECK: {evex}	rcll	%cl, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd3,0xd1]
+         {evex}	rcll	%cl, %ecx
+# CHECK: rcll	%cl, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd3,0xd1]
+         rcll	%cl, %ecx, %ecx
+# CHECK: {evex}	rclq	%cl, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xd1]
+         {evex}	rclq	%cl, %r9
+# CHECK: rclq	%cl, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xd1]
+         rclq	%cl, %r9, %r9
+# CHECK: {evex}	rclb	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd2,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rclb	%cl, 291(%r8,%rax,4)
+# CHECK: rclb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd2,0x94,0x80,0x23,0x01,0x00,0x00]
+         rclb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: {evex}	rclw	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd3,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rclw	%cl, 291(%r8,%rax,4)
+# CHECK: rclw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd3,0x94,0x80,0x23,0x01,0x00,0x00]
+         rclw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	rcll	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd3,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcll	%cl, 291(%r8,%rax,4)
+# CHECK: rcll	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd3,0x94,0x80,0x23,0x01,0x00,0x00]
+         rcll	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	rclq	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rclq	%cl, 291(%r8,%rax,4)
+# CHECK: rclq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0x94,0x80,0x23,0x01,0x00,0x00]
+         rclq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: {evex}	rclw	%dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd1,0xd2]
+         {evex}	rclw	%dx
+# CHECK: rclw	%dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd1,0xd2]
+         rclw	%dx, %dx
+# CHECK: {evex}	rcll	%ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd1,0xd1]
+         {evex}	rcll	%ecx
+# CHECK: rcll	%ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd1,0xd1]
+         rcll	%ecx, %ecx
+# CHECK: {evex}	rclq	%r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xd1]
+         {evex}	rclq	%r9
+# CHECK: rclq	%r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xd1]
+         rclq	%r9, %r9
+# CHECK: {evex}	rclb	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd0,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rclb	291(%r8,%rax,4)
+# CHECK: rclb	291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd0,0x94,0x80,0x23,0x01,0x00,0x00]
+         rclb	291(%r8,%rax,4), %bl
+# CHECK: {evex}	rclw	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd1,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rclw	291(%r8,%rax,4)
+# CHECK: rclw	291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd1,0x94,0x80,0x23,0x01,0x00,0x00]
+         rclw	291(%r8,%rax,4), %dx
+# CHECK: {evex}	rcll	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd1,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcll	291(%r8,%rax,4)
+# CHECK: rcll	291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd1,0x94,0x80,0x23,0x01,0x00,0x00]
+         rcll	291(%r8,%rax,4), %ecx
+# CHECK: {evex}	rclq	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rclq	291(%r8,%rax,4)
+# CHECK: rclq	291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0x94,0x80,0x23,0x01,0x00,0x00]
+         rclq	291(%r8,%rax,4), %r9
diff --git a/llvm/test/MC/X86/apx/rcl-intel.s b/llvm/test/MC/X86/apx/rcl-intel.s
new file mode 100644
index 00000000000000..8477cb6461a362
--- /dev/null
+++ b/llvm/test/MC/X86/apx/rcl-intel.s
@@ -0,0 +1,143 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding -x86-asm-syntax=intel -output-asm-variant=1 %s | FileCheck %s
+
+# CHECK: {evex}	rcl	bl, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc0,0xd3,0x7b]
+         {evex}	rcl	bl, 123
+# CHECK: rcl	bl, bl, 123
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xc0,0xd3,0x7b]
+         rcl	bl, bl, 123
+# CHECK: {evex}	rcl	dx, 123
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xc1,0xd2,0x7b]
+         {evex}	rcl	dx, 123
+# CHECK: rcl	dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xc1,0xd2,0x7b]
+         rcl	dx, dx, 123
+# CHECK: {evex}	rcl	ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc1,0xd1,0x7b]
+         {evex}	rcl	ecx, 123
+# CHECK: rcl	ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xc1,0xd1,0x7b]
+         rcl	ecx, ecx, 123
+# CHECK: {evex}	rcl	r9, 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xd1,0x7b]
+         {evex}	rcl	r9, 123
+# CHECK: rcl	r9, r9, 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xd1,0x7b]
+         rcl	r9, r9, 123
+# CHECK: {evex}	rcl	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc0,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rcl	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: rcl	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xc0,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rcl	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	rcl	word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rcl	word ptr [r8 + 4*rax + 291], 123
+# CHECK: rcl	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rcl	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	rcl	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rcl	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: rcl	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rcl	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	rcl	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rcl	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: rcl	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rcl	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	rcl	bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd0,0xd3]
+         {evex}	rcl	bl
+# CHECK: {evex}	rcl	bl, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd2,0xd3]
+         {evex}	rcl	bl, cl
+# CHECK: rcl	bl, bl, cl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xd2,0xd3]
+         rcl	bl, bl, cl
+# CHECK: {evex}	rcl	dx, cl
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd3,0xd2]
+         {evex}	rcl	dx, cl
+# CHECK: rcl	dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd3,0xd2]
+         rcl	dx, dx, cl
+# CHECK: {evex}	rcl	ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd3,0xd1]
+         {evex}	rcl	ecx, cl
+# CHECK: rcl	ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd3,0xd1]
+         rcl	ecx, ecx, cl
+# CHECK: {evex}	rcl	r9, cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xd1]
+         {evex}	rcl	r9, cl
+# CHECK: rcl	r9, r9, cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xd1]
+         rcl	r9, r9, cl
+# CHECK: {evex}	rcl	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd2,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcl	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: rcl	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd2,0x94,0x80,0x23,0x01,0x00,0x00]
+         rcl	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	rcl	word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd3,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcl	word ptr [r8 + 4*rax + 291], cl
+# CHECK: rcl	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd3,0x94,0x80,0x23,0x01,0x00,0x00]
+         rcl	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	rcl	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd3,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcl	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: rcl	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd3,0x94,0x80,0x23,0x01,0x00,0x00]
+         rcl	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	rcl	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcl	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: rcl	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0x94,0x80,0x23,0x01,0x00,0x00]
+         rcl	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	rcl	dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd1,0xd2]
+         {evex}	rcl	dx
+# CHECK: rcl	dx, dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd1,0xd2]
+         rcl	dx, dx
+# CHECK: {evex}	rcl	ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd1,0xd1]
+         {evex}	rcl	ecx
+# CHECK: rcl	ecx, ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd1,0xd1]
+         rcl	ecx, ecx
+# CHECK: {evex}	rcl	r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xd1]
+         {evex}	rcl	r9
+# CHECK: rcl	r9, r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xd1]
+         rcl	r9, r9
+# CHECK: {evex}	rcl	byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd0,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcl	byte ptr [r8 + 4*rax + 291]
+# CHECK: rcl	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd0,0x94,0x80,0x23,0x01,0x00,0x00]
+         rcl	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	rcl	word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd1,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcl	word ptr [r8 + 4*rax + 291]
+# CHECK: rcl	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd1,0x94,0x80,0x23,0x01,0x00,0x00]
+         rcl	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	rcl	dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd1,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcl	dword ptr [r8 + 4*rax + 291]
+# CHECK: rcl	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd1,0x94,0x80,0x23,0x01,0x00,0x00]
+         rcl	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	rcl	qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcl	qword ptr [r8 + 4*rax + 291]
+# CHECK: rcl	r9, qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0x94,0x80,0x23,0x01,0x00,0x00]
+         rcl	r9, qword ptr [r8 + 4*rax + 291]
diff --git a/llvm/test/MC/X86/apx/rcr-att.s b/llvm/test/MC/X86/apx/rcr-att.s
new file mode 100644
index 00000000000000..50136bca94447a
--- /dev/null
+++ b/llvm/test/MC/X86/apx/rcr-att.s
@@ -0,0 +1,146 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s
+# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+# ERROR-COUNT-47: error:
+# ERROR-NOT: error:
+# CHECK: {evex}	rcrb	$123, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc0,0xdb,0x7b]
+         {evex}	rcrb	$123, %bl
+# CHECK: rcrb	$123, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xc0,0xdb,0x7b]
+         rcrb	$123, %bl, %bl
+# CHECK: {evex}	rcrw	$123, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xc1,0xda,0x7b]
+         {evex}	rcrw	$123, %dx
+# CHECK: rcrw	$123, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xc1,0xda,0x7b]
+         rcrw	$123, %dx, %dx
+# CHECK: {evex}	rcrl	$123, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc1,0xd9,0x7b]
+         {evex}	rcrl	$123, %ecx
+# CHECK: rcrl	$123, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xc1,0xd9,0x7b]
+         rcrl	$123, %ecx, %ecx
+# CHECK: {evex}	rcrq	$123, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xd9,0x7b]
+         {evex}	rcrq	$123, %r9
+# CHECK: rcrq	$123, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xd9,0x7b]
+         rcrq	$123, %r9, %r9
+# CHECK: {evex}	rcrb	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc0,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rcrb	$123, 291(%r8,%rax,4)
+# CHECK: rcrb	$123, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xc0,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rcrb	$123, 291(%r8,%rax,4), %bl
+# CHECK: {evex}	rcrw	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rcrw	$123, 291(%r8,%rax,4)
+# CHECK: rcrw	$123, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rcrw	$123, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	rcrl	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rcrl	$123, 291(%r8,%rax,4)
+# CHECK: rcrl	$123, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rcrl	$123, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	rcrq	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rcrq	$123, 291(%r8,%rax,4)
+# CHECK: rcrq	$123, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rcrq	$123, 291(%r8,%rax,4), %r9
+# CHECK: {evex}	rcrb	%bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd0,0xdb]
+         {evex}	rcrb	%bl
+# CHECK: {evex}	rcrb	%cl, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd2,0xdb]
+         {evex}	rcrb	%cl, %bl
+# CHECK: rcrb	%cl, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xd2,0xdb]
+         rcrb	%cl, %bl, %bl
+# CHECK: {evex}	rcrw	%cl, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd3,0xda]
+         {evex}	rcrw	%cl, %dx
+# CHECK: rcrw	%cl, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd3,0xda]
+         rcrw	%cl, %dx, %dx
+# CHECK: {evex}	rcrl	%cl, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd3,0xd9]
+         {evex}	rcrl	%cl, %ecx
+# CHECK: rcrl	%cl, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd3,0xd9]
+         rcrl	%cl, %ecx, %ecx
+# CHECK: {evex}	rcrq	%cl, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xd9]
+         {evex}	rcrq	%cl, %r9
+# CHECK: rcrq	%cl, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xd9]
+         rcrq	%cl, %r9, %r9
+# CHECK: {evex}	rcrb	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd2,0x9c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcrb	%cl, 291(%r8,%rax,4)
+# CHECK: rcrb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd2,0x9c,0x80,0x23,0x01,0x00,0x00]
+         rcrb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: {evex}	rcrw	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcrw	%cl, 291(%r8,%rax,4)
+# CHECK: rcrw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00]
+         rcrw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	rcrl	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcrl	%cl, 291(%r8,%rax,4)
+# CHECK: rcrl	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00]
+         rcrl	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	rcrq	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcrq	%cl, 291(%r8,%rax,4)
+# CHECK: rcrq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00]
+         rcrq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: {evex}	rcrw	%dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd1,0xda]
+         {evex}	rcrw	%dx
+# CHECK: rcrw	%dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd1,0xda]
+         rcrw	%dx, %dx
+# CHECK: {evex}	rcrl	%ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd1,0xd9]
+         {evex}	rcrl	%ecx
+# CHECK: rcrl	%ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd1,0xd9]
+         rcrl	%ecx, %ecx
+# CHECK: {evex}	rcrq	%r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xd9]
+         {evex}	rcrq	%r9
+# CHECK: rcrq	%r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xd9]
+         rcrq	%r9, %r9
+# CHECK: {evex}	rcrb	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd0,0x9c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcrb	291(%r8,%rax,4)
+# CHECK: rcrb	291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd0,0x9c,0x80,0x23,0x01,0x00,0x00]
+         rcrb	291(%r8,%rax,4), %bl
+# CHECK: {evex}	rcrw	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcrw	291(%r8,%rax,4)
+# CHECK: rcrw	291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00]
+         rcrw	291(%r8,%rax,4), %dx
+# CHECK: {evex}	rcrl	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcrl	291(%r8,%rax,4)
+# CHECK: rcrl	291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00]
+         rcrl	291(%r8,%rax,4), %ecx
+# CHECK: {evex}	rcrq	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcrq	291(%r8,%rax,4)
+# CHECK: rcrq	291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00]
+         rcrq	291(%r8,%rax,4), %r9
diff --git a/llvm/test/MC/X86/apx/rcr-intel.s b/llvm/test/MC/X86/apx/rcr-intel.s
new file mode 100644
index 00000000000000..91303ec6ce2c2a
--- /dev/null
+++ b/llvm/test/MC/X86/apx/rcr-intel.s
@@ -0,0 +1,143 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding -x86-asm-syntax=intel -output-asm-variant=1 %s | FileCheck %s
+
+# CHECK: {evex}	rcr	bl, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc0,0xdb,0x7b]
+         {evex}	rcr	bl, 123
+# CHECK: rcr	bl, bl, 123
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xc0,0xdb,0x7b]
+         rcr	bl, bl, 123
+# CHECK: {evex}	rcr	dx, 123
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xc1,0xda,0x7b]
+         {evex}	rcr	dx, 123
+# CHECK: rcr	dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xc1,0xda,0x7b]
+         rcr	dx, dx, 123
+# CHECK: {evex}	rcr	ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc1,0xd9,0x7b]
+         {evex}	rcr	ecx, 123
+# CHECK: rcr	ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xc1,0xd9,0x7b]
+         rcr	ecx, ecx, 123
+# CHECK: {evex}	rcr	r9, 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xd9,0x7b]
+         {evex}	rcr	r9, 123
+# CHECK: rcr	r9, r9, 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xd9,0x7b]
+         rcr	r9, r9, 123
+# CHECK: {evex}	rcr	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc0,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rcr	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: rcr	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xc0,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rcr	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	rcr	word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rcr	word ptr [r8 + 4*rax + 291], 123
+# CHECK: rcr	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rcr	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	rcr	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rcr	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: rcr	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rcr	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	rcr	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rcr	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: rcr	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0x9c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rcr	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	rcr	bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd0,0xdb]
+         {evex}	rcr	bl
+# CHECK: {evex}	rcr	bl, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd2,0xdb]
+         {evex}	rcr	bl, cl
+# CHECK: rcr	bl, bl, cl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xd2,0xdb]
+         rcr	bl, bl, cl
+# CHECK: {evex}	rcr	dx, cl
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd3,0xda]
+         {evex}	rcr	dx, cl
+# CHECK: rcr	dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd3,0xda]
+         rcr	dx, dx, cl
+# CHECK: {evex}	rcr	ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd3,0xd9]
+         {evex}	rcr	ecx, cl
+# CHECK: rcr	ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd3,0xd9]
+         rcr	ecx, ecx, cl
+# CHECK: {evex}	rcr	r9, cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xd9]
+         {evex}	rcr	r9, cl
+# CHECK: rcr	r9, r9, cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xd9]
+         rcr	r9, r9, cl
+# CHECK: {evex}	rcr	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd2,0x9c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcr	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: rcr	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd2,0x9c,0x80,0x23,0x01,0x00,0x00]
+         rcr	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	rcr	word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcr	word ptr [r8 + 4*rax + 291], cl
+# CHECK: rcr	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00]
+         rcr	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	rcr	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcr	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: rcr	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00]
+         rcr	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	rcr	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcr	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: rcr	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0x9c,0x80,0x23,0x01,0x00,0x00]
+         rcr	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	rcr	dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd1,0xda]
+         {evex}	rcr	dx
+# CHECK: rcr	dx, dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd1,0xda]
+         rcr	dx, dx
+# CHECK: {evex}	rcr	ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd1,0xd9]
+         {evex}	rcr	ecx
+# CHECK: rcr	ecx, ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd1,0xd9]
+         rcr	ecx, ecx
+# CHECK: {evex}	rcr	r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xd9]
+         {evex}	rcr	r9
+# CHECK: rcr	r9, r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xd9]
+         rcr	r9, r9
+# CHECK: {evex}	rcr	byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd0,0x9c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcr	byte ptr [r8 + 4*rax + 291]
+# CHECK: rcr	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd0,0x9c,0x80,0x23,0x01,0x00,0x00]
+         rcr	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	rcr	word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcr	word ptr [r8 + 4*rax + 291]
+# CHECK: rcr	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00]
+         rcr	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	rcr	dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcr	dword ptr [r8 + 4*rax + 291]
+# CHECK: rcr	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00]
+         rcr	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	rcr	qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rcr	qword ptr [r8 + 4*rax + 291]
+# CHECK: rcr	r9, qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0x9c,0x80,0x23,0x01,0x00,0x00]
+         rcr	r9, qword ptr [r8 + 4*rax + 291]
diff --git a/llvm/test/MC/X86/apx/rol-att.s b/llvm/test/MC/X86/apx/rol-att.s
new file mode 100644
index 00000000000000..30c1c3a9ffe4c2
--- /dev/null
+++ b/llvm/test/MC/X86/apx/rol-att.s
@@ -0,0 +1,287 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s
+# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+# ERROR-COUNT-94: error:
+# ERROR-NOT: error:
+# CHECK: {evex}	rolb	$123, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc0,0xc3,0x7b]
+         {evex}	rolb	$123, %bl
+# CHECK: {nf}	rolb	$123, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc0,0xc3,0x7b]
+         {nf}	rolb	$123, %bl
+# CHECK: rolb	$123, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xc0,0xc3,0x7b]
+         rolb	$123, %bl, %bl
+# CHECK: {nf}	rolb	$123, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xc0,0xc3,0x7b]
+         {nf}	rolb	$123, %bl, %bl
+# CHECK: {evex}	rolw	$123, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xc1,0xc2,0x7b]
+         {evex}	rolw	$123, %dx
+# CHECK: {nf}	rolw	$123, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xc1,0xc2,0x7b]
+         {nf}	rolw	$123, %dx
+# CHECK: rolw	$123, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xc1,0xc2,0x7b]
+         rolw	$123, %dx, %dx
+# CHECK: {nf}	rolw	$123, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xc1,0xc2,0x7b]
+         {nf}	rolw	$123, %dx, %dx
+# CHECK: {evex}	roll	$123, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc1,0xc1,0x7b]
+         {evex}	roll	$123, %ecx
+# CHECK: {nf}	roll	$123, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc1,0xc1,0x7b]
+         {nf}	roll	$123, %ecx
+# CHECK: roll	$123, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xc1,0xc1,0x7b]
+         roll	$123, %ecx, %ecx
+# CHECK: {nf}	roll	$123, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xc1,0xc1,0x7b]
+         {nf}	roll	$123, %ecx, %ecx
+# CHECK: {evex}	rolq	$123, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xc1,0x7b]
+         {evex}	rolq	$123, %r9
+# CHECK: {nf}	rolq	$123, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0xc1,0x7b]
+         {nf}	rolq	$123, %r9
+# CHECK: rolq	$123, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xc1,0x7b]
+         rolq	$123, %r9, %r9
+# CHECK: {nf}	rolq	$123, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0xc1,0x7b]
+         {nf}	rolq	$123, %r9, %r9
+# CHECK: {evex}	rolb	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc0,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rolb	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	rolb	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc0,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rolb	$123, 291(%r8,%rax,4)
+# CHECK: rolb	$123, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xc0,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rolb	$123, 291(%r8,%rax,4), %bl
+# CHECK: {nf}	rolb	$123, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xc0,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rolb	$123, 291(%r8,%rax,4), %bl
+# CHECK: {evex}	rolw	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rolw	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	rolw	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rolw	$123, 291(%r8,%rax,4)
+# CHECK: rolw	$123, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rolw	$123, 291(%r8,%rax,4), %dx
+# CHECK: {nf}	rolw	$123, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rolw	$123, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	roll	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	roll	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	roll	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	roll	$123, 291(%r8,%rax,4)
+# CHECK: roll	$123, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         roll	$123, 291(%r8,%rax,4), %ecx
+# CHECK: {nf}	roll	$123, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	roll	$123, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	rolq	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rolq	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	rolq	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rolq	$123, 291(%r8,%rax,4)
+# CHECK: rolq	$123, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rolq	$123, 291(%r8,%rax,4), %r9
+# CHECK: {nf}	rolq	$123, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rolq	$123, 291(%r8,%rax,4), %r9
+# CHECK: {evex}	rolb	%bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd0,0xc3]
+         {evex}	rolb	%bl
+# CHECK: {nf}	rolb	%bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd0,0xc3]
+         {nf}	rolb	%bl
+# CHECK: {evex}	rolb	%cl, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd2,0xc3]
+         {evex}	rolb	%cl, %bl
+# CHECK: {nf}	rolb	%cl, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd2,0xc3]
+         {nf}	rolb	%cl, %bl
+# CHECK: rolb	%cl, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xd2,0xc3]
+         rolb	%cl, %bl, %bl
+# CHECK: {nf}	rolb	%cl, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xd2,0xc3]
+         {nf}	rolb	%cl, %bl, %bl
+# CHECK: {evex}	rolw	%cl, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd3,0xc2]
+         {evex}	rolw	%cl, %dx
+# CHECK: {nf}	rolw	%cl, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd3,0xc2]
+         {nf}	rolw	%cl, %dx
+# CHECK: rolw	%cl, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd3,0xc2]
+         rolw	%cl, %dx, %dx
+# CHECK: {nf}	rolw	%cl, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd3,0xc2]
+         {nf}	rolw	%cl, %dx, %dx
+# CHECK: {evex}	roll	%cl, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd3,0xc1]
+         {evex}	roll	%cl, %ecx
+# CHECK: {nf}	roll	%cl, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd3,0xc1]
+         {nf}	roll	%cl, %ecx
+# CHECK: roll	%cl, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd3,0xc1]
+         roll	%cl, %ecx, %ecx
+# CHECK: {nf}	roll	%cl, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd3,0xc1]
+         {nf}	roll	%cl, %ecx, %ecx
+# CHECK: {evex}	rolq	%cl, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xc1]
+         {evex}	rolq	%cl, %r9
+# CHECK: {nf}	rolq	%cl, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0xc1]
+         {nf}	rolq	%cl, %r9
+# CHECK: rolq	%cl, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xc1]
+         rolq	%cl, %r9, %r9
+# CHECK: {nf}	rolq	%cl, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0xc1]
+         {nf}	rolq	%cl, %r9, %r9
+# CHECK: {evex}	rolb	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd2,0x84,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rolb	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	rolb	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd2,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rolb	%cl, 291(%r8,%rax,4)
+# CHECK: rolb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd2,0x84,0x80,0x23,0x01,0x00,0x00]
+         rolb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: {nf}	rolb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd2,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rolb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: {evex}	rolw	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rolw	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	rolw	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rolw	%cl, 291(%r8,%rax,4)
+# CHECK: rolw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         rolw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: {nf}	rolw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rolw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	roll	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {evex}	roll	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	roll	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	roll	%cl, 291(%r8,%rax,4)
+# CHECK: roll	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         roll	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: {nf}	roll	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	roll	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	rolq	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rolq	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	rolq	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rolq	%cl, 291(%r8,%rax,4)
+# CHECK: rolq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         rolq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: {nf}	rolq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rolq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: {evex}	rolw	%dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd1,0xc2]
+         {evex}	rolw	%dx
+# CHECK: {nf}	rolw	%dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd1,0xc2]
+         {nf}	rolw	%dx
+# CHECK: rolw	%dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd1,0xc2]
+         rolw	%dx, %dx
+# CHECK: {nf}	rolw	%dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd1,0xc2]
+         {nf}	rolw	%dx, %dx
+# CHECK: {evex}	roll	%ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd1,0xc1]
+         {evex}	roll	%ecx
+# CHECK: {nf}	roll	%ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd1,0xc1]
+         {nf}	roll	%ecx
+# CHECK: roll	%ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd1,0xc1]
+         roll	%ecx, %ecx
+# CHECK: {nf}	roll	%ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd1,0xc1]
+         {nf}	roll	%ecx, %ecx
+# CHECK: {evex}	rolq	%r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xc1]
+         {evex}	rolq	%r9
+# CHECK: {nf}	rolq	%r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0xc1]
+         {nf}	rolq	%r9
+# CHECK: rolq	%r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xc1]
+         rolq	%r9, %r9
+# CHECK: {nf}	rolq	%r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0xc1]
+         {nf}	rolq	%r9, %r9
+# CHECK: {evex}	rolb	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd0,0x84,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rolb	291(%r8,%rax,4)
+# CHECK: {nf}	rolb	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd0,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rolb	291(%r8,%rax,4)
+# CHECK: rolb	291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd0,0x84,0x80,0x23,0x01,0x00,0x00]
+         rolb	291(%r8,%rax,4), %bl
+# CHECK: {nf}	rolb	291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd0,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rolb	291(%r8,%rax,4), %bl
+# CHECK: {evex}	rolw	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rolw	291(%r8,%rax,4)
+# CHECK: {nf}	rolw	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rolw	291(%r8,%rax,4)
+# CHECK: rolw	291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         rolw	291(%r8,%rax,4), %dx
+# CHECK: {nf}	rolw	291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rolw	291(%r8,%rax,4), %dx
+# CHECK: {evex}	roll	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {evex}	roll	291(%r8,%rax,4)
+# CHECK: {nf}	roll	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	roll	291(%r8,%rax,4)
+# CHECK: roll	291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         roll	291(%r8,%rax,4), %ecx
+# CHECK: {nf}	roll	291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	roll	291(%r8,%rax,4), %ecx
+# CHECK: {evex}	rolq	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rolq	291(%r8,%rax,4)
+# CHECK: {nf}	rolq	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rolq	291(%r8,%rax,4)
+# CHECK: rolq	291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         rolq	291(%r8,%rax,4), %r9
+# CHECK: {nf}	rolq	291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rolq	291(%r8,%rax,4), %r9
diff --git a/llvm/test/MC/X86/apx/rol-intel.s b/llvm/test/MC/X86/apx/rol-intel.s
new file mode 100644
index 00000000000000..87d2914c9d5451
--- /dev/null
+++ b/llvm/test/MC/X86/apx/rol-intel.s
@@ -0,0 +1,284 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding -x86-asm-syntax=intel -output-asm-variant=1 %s | FileCheck %s
+
+# CHECK: {evex}	rol	bl, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc0,0xc3,0x7b]
+         {evex}	rol	bl, 123
+# CHECK: {nf}	rol	bl, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc0,0xc3,0x7b]
+         {nf}	rol	bl, 123
+# CHECK: rol	bl, bl, 123
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xc0,0xc3,0x7b]
+         rol	bl, bl, 123
+# CHECK: {nf}	rol	bl, bl, 123
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xc0,0xc3,0x7b]
+         {nf}	rol	bl, bl, 123
+# CHECK: {evex}	rol	dx, 123
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xc1,0xc2,0x7b]
+         {evex}	rol	dx, 123
+# CHECK: {nf}	rol	dx, 123
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xc1,0xc2,0x7b]
+         {nf}	rol	dx, 123
+# CHECK: rol	dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xc1,0xc2,0x7b]
+         rol	dx, dx, 123
+# CHECK: {nf}	rol	dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xc1,0xc2,0x7b]
+         {nf}	rol	dx, dx, 123
+# CHECK: {evex}	rol	ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc1,0xc1,0x7b]
+         {evex}	rol	ecx, 123
+# CHECK: {nf}	rol	ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc1,0xc1,0x7b]
+         {nf}	rol	ecx, 123
+# CHECK: rol	ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xc1,0xc1,0x7b]
+         rol	ecx, ecx, 123
+# CHECK: {nf}	rol	ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xc1,0xc1,0x7b]
+         {nf}	rol	ecx, ecx, 123
+# CHECK: {evex}	rol	r9, 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xc1,0x7b]
+         {evex}	rol	r9, 123
+# CHECK: {nf}	rol	r9, 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0xc1,0x7b]
+         {nf}	rol	r9, 123
+# CHECK: rol	r9, r9, 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xc1,0x7b]
+         rol	r9, r9, 123
+# CHECK: {nf}	rol	r9, r9, 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0xc1,0x7b]
+         {nf}	rol	r9, r9, 123
+# CHECK: {evex}	rol	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc0,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rol	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	rol	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc0,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rol	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: rol	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xc0,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rol	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	rol	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xc0,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rol	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	rol	word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rol	word ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	rol	word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rol	word ptr [r8 + 4*rax + 291], 123
+# CHECK: rol	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rol	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	rol	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rol	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	rol	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rol	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	rol	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rol	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: rol	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rol	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	rol	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rol	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	rol	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rol	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	rol	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rol	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: rol	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rol	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	rol	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0x84,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rol	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	rol	bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd0,0xc3]
+         {evex}	rol	bl
+# CHECK: {nf}	rol	bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd0,0xc3]
+         {nf}	rol	bl
+# CHECK: {evex}	rol	bl, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd2,0xc3]
+         {evex}	rol	bl, cl
+# CHECK: {nf}	rol	bl, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd2,0xc3]
+         {nf}	rol	bl, cl
+# CHECK: rol	bl, bl, cl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xd2,0xc3]
+         rol	bl, bl, cl
+# CHECK: {nf}	rol	bl, bl, cl
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xd2,0xc3]
+         {nf}	rol	bl, bl, cl
+# CHECK: {evex}	rol	dx, cl
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd3,0xc2]
+         {evex}	rol	dx, cl
+# CHECK: {nf}	rol	dx, cl
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd3,0xc2]
+         {nf}	rol	dx, cl
+# CHECK: rol	dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd3,0xc2]
+         rol	dx, dx, cl
+# CHECK: {nf}	rol	dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd3,0xc2]
+         {nf}	rol	dx, dx, cl
+# CHECK: {evex}	rol	ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd3,0xc1]
+         {evex}	rol	ecx, cl
+# CHECK: {nf}	rol	ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd3,0xc1]
+         {nf}	rol	ecx, cl
+# CHECK: rol	ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd3,0xc1]
+         rol	ecx, ecx, cl
+# CHECK: {nf}	rol	ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd3,0xc1]
+         {nf}	rol	ecx, ecx, cl
+# CHECK: {evex}	rol	r9, cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xc1]
+         {evex}	rol	r9, cl
+# CHECK: {nf}	rol	r9, cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0xc1]
+         {nf}	rol	r9, cl
+# CHECK: rol	r9, r9, cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xc1]
+         rol	r9, r9, cl
+# CHECK: {nf}	rol	r9, r9, cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0xc1]
+         {nf}	rol	r9, r9, cl
+# CHECK: {evex}	rol	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd2,0x84,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rol	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	rol	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd2,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rol	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: rol	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd2,0x84,0x80,0x23,0x01,0x00,0x00]
+         rol	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	rol	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd2,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rol	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	rol	word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rol	word ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	rol	word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rol	word ptr [r8 + 4*rax + 291], cl
+# CHECK: rol	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         rol	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	rol	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rol	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	rol	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rol	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	rol	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rol	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: rol	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         rol	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	rol	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rol	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	rol	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rol	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	rol	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rol	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: rol	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         rol	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	rol	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rol	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	rol	dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd1,0xc2]
+         {evex}	rol	dx
+# CHECK: {nf}	rol	dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd1,0xc2]
+         {nf}	rol	dx
+# CHECK: rol	dx, dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd1,0xc2]
+         rol	dx, dx
+# CHECK: {nf}	rol	dx, dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd1,0xc2]
+         {nf}	rol	dx, dx
+# CHECK: {evex}	rol	ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd1,0xc1]
+         {evex}	rol	ecx
+# CHECK: {nf}	rol	ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd1,0xc1]
+         {nf}	rol	ecx
+# CHECK: rol	ecx, ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd1,0xc1]
+         rol	ecx, ecx
+# CHECK: {nf}	rol	ecx, ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd1,0xc1]
+         {nf}	rol	ecx, ecx
+# CHECK: {evex}	rol	r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xc1]
+         {evex}	rol	r9
+# CHECK: {nf}	rol	r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0xc1]
+         {nf}	rol	r9
+# CHECK: rol	r9, r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xc1]
+         rol	r9, r9
+# CHECK: {nf}	rol	r9, r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0xc1]
+         {nf}	rol	r9, r9
+# CHECK: {evex}	rol	byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd0,0x84,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rol	byte ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	rol	byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd0,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rol	byte ptr [r8 + 4*rax + 291]
+# CHECK: rol	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd0,0x84,0x80,0x23,0x01,0x00,0x00]
+         rol	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	rol	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd0,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rol	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	rol	word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rol	word ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	rol	word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rol	word ptr [r8 + 4*rax + 291]
+# CHECK: rol	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         rol	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	rol	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rol	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	rol	dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rol	dword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	rol	dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rol	dword ptr [r8 + 4*rax + 291]
+# CHECK: rol	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         rol	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	rol	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rol	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	rol	qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rol	qword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	rol	qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rol	qword ptr [r8 + 4*rax + 291]
+# CHECK: rol	r9, qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         rol	r9, qword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	rol	r9, qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0x84,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rol	r9, qword ptr [r8 + 4*rax + 291]
diff --git a/llvm/test/MC/X86/apx/ror-att.s b/llvm/test/MC/X86/apx/ror-att.s
new file mode 100644
index 00000000000000..aa877f20e4e5d5
--- /dev/null
+++ b/llvm/test/MC/X86/apx/ror-att.s
@@ -0,0 +1,287 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s
+# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+# ERROR-COUNT-94: error:
+# ERROR-NOT: error:
+# CHECK: {evex}	rorb	$123, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc0,0xcb,0x7b]
+         {evex}	rorb	$123, %bl
+# CHECK: {nf}	rorb	$123, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc0,0xcb,0x7b]
+         {nf}	rorb	$123, %bl
+# CHECK: rorb	$123, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xc0,0xcb,0x7b]
+         rorb	$123, %bl, %bl
+# CHECK: {nf}	rorb	$123, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xc0,0xcb,0x7b]
+         {nf}	rorb	$123, %bl, %bl
+# CHECK: {evex}	rorw	$123, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xc1,0xca,0x7b]
+         {evex}	rorw	$123, %dx
+# CHECK: {nf}	rorw	$123, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xc1,0xca,0x7b]
+         {nf}	rorw	$123, %dx
+# CHECK: rorw	$123, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xc1,0xca,0x7b]
+         rorw	$123, %dx, %dx
+# CHECK: {nf}	rorw	$123, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xc1,0xca,0x7b]
+         {nf}	rorw	$123, %dx, %dx
+# CHECK: {evex}	rorl	$123, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc1,0xc9,0x7b]
+         {evex}	rorl	$123, %ecx
+# CHECK: {nf}	rorl	$123, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc1,0xc9,0x7b]
+         {nf}	rorl	$123, %ecx
+# CHECK: rorl	$123, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xc1,0xc9,0x7b]
+         rorl	$123, %ecx, %ecx
+# CHECK: {nf}	rorl	$123, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xc1,0xc9,0x7b]
+         {nf}	rorl	$123, %ecx, %ecx
+# CHECK: {evex}	rorq	$123, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xc9,0x7b]
+         {evex}	rorq	$123, %r9
+# CHECK: {nf}	rorq	$123, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0xc9,0x7b]
+         {nf}	rorq	$123, %r9
+# CHECK: rorq	$123, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xc9,0x7b]
+         rorq	$123, %r9, %r9
+# CHECK: {nf}	rorq	$123, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0xc9,0x7b]
+         {nf}	rorq	$123, %r9, %r9
+# CHECK: {evex}	rorb	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc0,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rorb	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	rorb	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc0,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rorb	$123, 291(%r8,%rax,4)
+# CHECK: rorb	$123, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xc0,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rorb	$123, 291(%r8,%rax,4), %bl
+# CHECK: {nf}	rorb	$123, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xc0,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rorb	$123, 291(%r8,%rax,4), %bl
+# CHECK: {evex}	rorw	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rorw	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	rorw	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rorw	$123, 291(%r8,%rax,4)
+# CHECK: rorw	$123, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rorw	$123, 291(%r8,%rax,4), %dx
+# CHECK: {nf}	rorw	$123, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rorw	$123, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	rorl	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rorl	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	rorl	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rorl	$123, 291(%r8,%rax,4)
+# CHECK: rorl	$123, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rorl	$123, 291(%r8,%rax,4), %ecx
+# CHECK: {nf}	rorl	$123, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rorl	$123, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	rorq	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	rorq	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	rorq	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rorq	$123, 291(%r8,%rax,4)
+# CHECK: rorq	$123, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         rorq	$123, 291(%r8,%rax,4), %r9
+# CHECK: {nf}	rorq	$123, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	rorq	$123, 291(%r8,%rax,4), %r9
+# CHECK: {evex}	rorb	%bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd0,0xcb]
+         {evex}	rorb	%bl
+# CHECK: {nf}	rorb	%bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd0,0xcb]
+         {nf}	rorb	%bl
+# CHECK: {evex}	rorb	%cl, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd2,0xcb]
+         {evex}	rorb	%cl, %bl
+# CHECK: {nf}	rorb	%cl, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd2,0xcb]
+         {nf}	rorb	%cl, %bl
+# CHECK: rorb	%cl, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xd2,0xcb]
+         rorb	%cl, %bl, %bl
+# CHECK: {nf}	rorb	%cl, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xd2,0xcb]
+         {nf}	rorb	%cl, %bl, %bl
+# CHECK: {evex}	rorw	%cl, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd3,0xca]
+         {evex}	rorw	%cl, %dx
+# CHECK: {nf}	rorw	%cl, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd3,0xca]
+         {nf}	rorw	%cl, %dx
+# CHECK: rorw	%cl, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd3,0xca]
+         rorw	%cl, %dx, %dx
+# CHECK: {nf}	rorw	%cl, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd3,0xca]
+         {nf}	rorw	%cl, %dx, %dx
+# CHECK: {evex}	rorl	%cl, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd3,0xc9]
+         {evex}	rorl	%cl, %ecx
+# CHECK: {nf}	rorl	%cl, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd3,0xc9]
+         {nf}	rorl	%cl, %ecx
+# CHECK: rorl	%cl, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd3,0xc9]
+         rorl	%cl, %ecx, %ecx
+# CHECK: {nf}	rorl	%cl, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd3,0xc9]
+         {nf}	rorl	%cl, %ecx, %ecx
+# CHECK: {evex}	rorq	%cl, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xc9]
+         {evex}	rorq	%cl, %r9
+# CHECK: {nf}	rorq	%cl, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0xc9]
+         {nf}	rorq	%cl, %r9
+# CHECK: rorq	%cl, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xc9]
+         rorq	%cl, %r9, %r9
+# CHECK: {nf}	rorq	%cl, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0xc9]
+         {nf}	rorq	%cl, %r9, %r9
+# CHECK: {evex}	rorb	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd2,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rorb	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	rorb	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd2,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rorb	%cl, 291(%r8,%rax,4)
+# CHECK: rorb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd2,0x8c,0x80,0x23,0x01,0x00,0x00]
+         rorb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: {nf}	rorb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd2,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rorb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: {evex}	rorw	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rorw	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	rorw	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rorw	%cl, 291(%r8,%rax,4)
+# CHECK: rorw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         rorw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: {nf}	rorw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rorw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	rorl	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rorl	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	rorl	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rorl	%cl, 291(%r8,%rax,4)
+# CHECK: rorl	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         rorl	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: {nf}	rorl	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rorl	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	rorq	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rorq	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	rorq	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rorq	%cl, 291(%r8,%rax,4)
+# CHECK: rorq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         rorq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: {nf}	rorq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rorq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: {evex}	rorw	%dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd1,0xca]
+         {evex}	rorw	%dx
+# CHECK: {nf}	rorw	%dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd1,0xca]
+         {nf}	rorw	%dx
+# CHECK: rorw	%dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd1,0xca]
+         rorw	%dx, %dx
+# CHECK: {nf}	rorw	%dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd1,0xca]
+         {nf}	rorw	%dx, %dx
+# CHECK: {evex}	rorl	%ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd1,0xc9]
+         {evex}	rorl	%ecx
+# CHECK: {nf}	rorl	%ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd1,0xc9]
+         {nf}	rorl	%ecx
+# CHECK: rorl	%ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd1,0xc9]
+         rorl	%ecx, %ecx
+# CHECK: {nf}	rorl	%ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd1,0xc9]
+         {nf}	rorl	%ecx, %ecx
+# CHECK: {evex}	rorq	%r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xc9]
+         {evex}	rorq	%r9
+# CHECK: {nf}	rorq	%r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0xc9]
+         {nf}	rorq	%r9
+# CHECK: rorq	%r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xc9]
+         rorq	%r9, %r9
+# CHECK: {nf}	rorq	%r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0xc9]
+         {nf}	rorq	%r9, %r9
+# CHECK: {evex}	rorb	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd0,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rorb	291(%r8,%rax,4)
+# CHECK: {nf}	rorb	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd0,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rorb	291(%r8,%rax,4)
+# CHECK: rorb	291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd0,0x8c,0x80,0x23,0x01,0x00,0x00]
+         rorb	291(%r8,%rax,4), %bl
+# CHECK: {nf}	rorb	291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd0,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rorb	291(%r8,%rax,4), %bl
+# CHECK: {evex}	rorw	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rorw	291(%r8,%rax,4)
+# CHECK: {nf}	rorw	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rorw	291(%r8,%rax,4)
+# CHECK: rorw	291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         rorw	291(%r8,%rax,4), %dx
+# CHECK: {nf}	rorw	291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rorw	291(%r8,%rax,4), %dx
+# CHECK: {evex}	rorl	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rorl	291(%r8,%rax,4)
+# CHECK: {nf}	rorl	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rorl	291(%r8,%rax,4)
+# CHECK: rorl	291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         rorl	291(%r8,%rax,4), %ecx
+# CHECK: {nf}	rorl	291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rorl	291(%r8,%rax,4), %ecx
+# CHECK: {evex}	rorq	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	rorq	291(%r8,%rax,4)
+# CHECK: {nf}	rorq	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rorq	291(%r8,%rax,4)
+# CHECK: rorq	291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         rorq	291(%r8,%rax,4), %r9
+# CHECK: {nf}	rorq	291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	rorq	291(%r8,%rax,4), %r9
diff --git a/llvm/test/MC/X86/apx/ror-intel.s b/llvm/test/MC/X86/apx/ror-intel.s
new file mode 100644
index 00000000000000..27e2d6b6d24e75
--- /dev/null
+++ b/llvm/test/MC/X86/apx/ror-intel.s
@@ -0,0 +1,284 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding -x86-asm-syntax=intel -output-asm-variant=1 %s | FileCheck %s
+
+# CHECK: {evex}	ror	bl, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc0,0xcb,0x7b]
+         {evex}	ror	bl, 123
+# CHECK: {nf}	ror	bl, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc0,0xcb,0x7b]
+         {nf}	ror	bl, 123
+# CHECK: ror	bl, bl, 123
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xc0,0xcb,0x7b]
+         ror	bl, bl, 123
+# CHECK: {nf}	ror	bl, bl, 123
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xc0,0xcb,0x7b]
+         {nf}	ror	bl, bl, 123
+# CHECK: {evex}	ror	dx, 123
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xc1,0xca,0x7b]
+         {evex}	ror	dx, 123
+# CHECK: {nf}	ror	dx, 123
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xc1,0xca,0x7b]
+         {nf}	ror	dx, 123
+# CHECK: ror	dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xc1,0xca,0x7b]
+         ror	dx, dx, 123
+# CHECK: {nf}	ror	dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xc1,0xca,0x7b]
+         {nf}	ror	dx, dx, 123
+# CHECK: {evex}	ror	ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc1,0xc9,0x7b]
+         {evex}	ror	ecx, 123
+# CHECK: {nf}	ror	ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc1,0xc9,0x7b]
+         {nf}	ror	ecx, 123
+# CHECK: ror	ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xc1,0xc9,0x7b]
+         ror	ecx, ecx, 123
+# CHECK: {nf}	ror	ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xc1,0xc9,0x7b]
+         {nf}	ror	ecx, ecx, 123
+# CHECK: {evex}	ror	r9, 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xc9,0x7b]
+         {evex}	ror	r9, 123
+# CHECK: {nf}	ror	r9, 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0xc9,0x7b]
+         {nf}	ror	r9, 123
+# CHECK: ror	r9, r9, 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xc9,0x7b]
+         ror	r9, r9, 123
+# CHECK: {nf}	ror	r9, r9, 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0xc9,0x7b]
+         {nf}	ror	r9, r9, 123
+# CHECK: {evex}	ror	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc0,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	ror	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	ror	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc0,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	ror	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: ror	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xc0,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         ror	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	ror	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xc0,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	ror	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	ror	word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	ror	word ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	ror	word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	ror	word ptr [r8 + 4*rax + 291], 123
+# CHECK: ror	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         ror	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	ror	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	ror	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	ror	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	ror	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	ror	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	ror	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: ror	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         ror	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	ror	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	ror	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	ror	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	ror	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	ror	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	ror	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: ror	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         ror	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	ror	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	ror	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	ror	bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd0,0xcb]
+         {evex}	ror	bl
+# CHECK: {nf}	ror	bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd0,0xcb]
+         {nf}	ror	bl
+# CHECK: {evex}	ror	bl, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd2,0xcb]
+         {evex}	ror	bl, cl
+# CHECK: {nf}	ror	bl, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd2,0xcb]
+         {nf}	ror	bl, cl
+# CHECK: ror	bl, bl, cl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xd2,0xcb]
+         ror	bl, bl, cl
+# CHECK: {nf}	ror	bl, bl, cl
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xd2,0xcb]
+         {nf}	ror	bl, bl, cl
+# CHECK: {evex}	ror	dx, cl
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd3,0xca]
+         {evex}	ror	dx, cl
+# CHECK: {nf}	ror	dx, cl
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd3,0xca]
+         {nf}	ror	dx, cl
+# CHECK: ror	dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd3,0xca]
+         ror	dx, dx, cl
+# CHECK: {nf}	ror	dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd3,0xca]
+         {nf}	ror	dx, dx, cl
+# CHECK: {evex}	ror	ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd3,0xc9]
+         {evex}	ror	ecx, cl
+# CHECK: {nf}	ror	ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd3,0xc9]
+         {nf}	ror	ecx, cl
+# CHECK: ror	ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd3,0xc9]
+         ror	ecx, ecx, cl
+# CHECK: {nf}	ror	ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd3,0xc9]
+         {nf}	ror	ecx, ecx, cl
+# CHECK: {evex}	ror	r9, cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xc9]
+         {evex}	ror	r9, cl
+# CHECK: {nf}	ror	r9, cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0xc9]
+         {nf}	ror	r9, cl
+# CHECK: ror	r9, r9, cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xc9]
+         ror	r9, r9, cl
+# CHECK: {nf}	ror	r9, r9, cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0xc9]
+         {nf}	ror	r9, r9, cl
+# CHECK: {evex}	ror	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd2,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	ror	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	ror	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd2,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	ror	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: ror	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd2,0x8c,0x80,0x23,0x01,0x00,0x00]
+         ror	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	ror	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd2,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	ror	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	ror	word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	ror	word ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	ror	word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	ror	word ptr [r8 + 4*rax + 291], cl
+# CHECK: ror	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         ror	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	ror	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	ror	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	ror	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	ror	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	ror	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	ror	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: ror	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         ror	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	ror	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	ror	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	ror	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	ror	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	ror	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	ror	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: ror	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         ror	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	ror	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	ror	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	ror	dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd1,0xca]
+         {evex}	ror	dx
+# CHECK: {nf}	ror	dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd1,0xca]
+         {nf}	ror	dx
+# CHECK: ror	dx, dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd1,0xca]
+         ror	dx, dx
+# CHECK: {nf}	ror	dx, dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd1,0xca]
+         {nf}	ror	dx, dx
+# CHECK: {evex}	ror	ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd1,0xc9]
+         {evex}	ror	ecx
+# CHECK: {nf}	ror	ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd1,0xc9]
+         {nf}	ror	ecx
+# CHECK: ror	ecx, ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd1,0xc9]
+         ror	ecx, ecx
+# CHECK: {nf}	ror	ecx, ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd1,0xc9]
+         {nf}	ror	ecx, ecx
+# CHECK: {evex}	ror	r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xc9]
+         {evex}	ror	r9
+# CHECK: {nf}	ror	r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0xc9]
+         {nf}	ror	r9
+# CHECK: ror	r9, r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xc9]
+         ror	r9, r9
+# CHECK: {nf}	ror	r9, r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0xc9]
+         {nf}	ror	r9, r9
+# CHECK: {evex}	ror	byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd0,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	ror	byte ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	ror	byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd0,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	ror	byte ptr [r8 + 4*rax + 291]
+# CHECK: ror	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd0,0x8c,0x80,0x23,0x01,0x00,0x00]
+         ror	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	ror	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd0,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	ror	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	ror	word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	ror	word ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	ror	word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	ror	word ptr [r8 + 4*rax + 291]
+# CHECK: ror	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         ror	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	ror	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	ror	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	ror	dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	ror	dword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	ror	dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	ror	dword ptr [r8 + 4*rax + 291]
+# CHECK: ror	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         ror	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	ror	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	ror	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	ror	qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	ror	qword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	ror	qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	ror	qword ptr [r8 + 4*rax + 291]
+# CHECK: ror	r9, qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         ror	r9, qword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	ror	r9, qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	ror	r9, qword ptr [r8 + 4*rax + 291]
diff --git a/llvm/test/MC/X86/apx/sar-att.s b/llvm/test/MC/X86/apx/sar-att.s
new file mode 100644
index 00000000000000..9ab96f277bc704
--- /dev/null
+++ b/llvm/test/MC/X86/apx/sar-att.s
@@ -0,0 +1,287 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s
+# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+# ERROR-COUNT-94: error:
+# ERROR-NOT: error:
+# CHECK: {evex}	sarb	$123, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc0,0xfb,0x7b]
+         {evex}	sarb	$123, %bl
+# CHECK: {nf}	sarb	$123, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc0,0xfb,0x7b]
+         {nf}	sarb	$123, %bl
+# CHECK: sarb	$123, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xc0,0xfb,0x7b]
+         sarb	$123, %bl, %bl
+# CHECK: {nf}	sarb	$123, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xc0,0xfb,0x7b]
+         {nf}	sarb	$123, %bl, %bl
+# CHECK: {evex}	sarw	$123, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xc1,0xfa,0x7b]
+         {evex}	sarw	$123, %dx
+# CHECK: {nf}	sarw	$123, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xc1,0xfa,0x7b]
+         {nf}	sarw	$123, %dx
+# CHECK: sarw	$123, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xc1,0xfa,0x7b]
+         sarw	$123, %dx, %dx
+# CHECK: {nf}	sarw	$123, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xc1,0xfa,0x7b]
+         {nf}	sarw	$123, %dx, %dx
+# CHECK: {evex}	sarl	$123, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc1,0xf9,0x7b]
+         {evex}	sarl	$123, %ecx
+# CHECK: {nf}	sarl	$123, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc1,0xf9,0x7b]
+         {nf}	sarl	$123, %ecx
+# CHECK: sarl	$123, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xc1,0xf9,0x7b]
+         sarl	$123, %ecx, %ecx
+# CHECK: {nf}	sarl	$123, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xc1,0xf9,0x7b]
+         {nf}	sarl	$123, %ecx, %ecx
+# CHECK: {evex}	sarq	$123, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xf9,0x7b]
+         {evex}	sarq	$123, %r9
+# CHECK: {nf}	sarq	$123, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0xf9,0x7b]
+         {nf}	sarq	$123, %r9
+# CHECK: sarq	$123, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xf9,0x7b]
+         sarq	$123, %r9, %r9
+# CHECK: {nf}	sarq	$123, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0xf9,0x7b]
+         {nf}	sarq	$123, %r9, %r9
+# CHECK: {evex}	sarb	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc0,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	sarb	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	sarb	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc0,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	sarb	$123, 291(%r8,%rax,4)
+# CHECK: sarb	$123, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xc0,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         sarb	$123, 291(%r8,%rax,4), %bl
+# CHECK: {nf}	sarb	$123, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xc0,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	sarb	$123, 291(%r8,%rax,4), %bl
+# CHECK: {evex}	sarw	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	sarw	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	sarw	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	sarw	$123, 291(%r8,%rax,4)
+# CHECK: sarw	$123, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         sarw	$123, 291(%r8,%rax,4), %dx
+# CHECK: {nf}	sarw	$123, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	sarw	$123, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	sarl	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	sarl	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	sarl	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	sarl	$123, 291(%r8,%rax,4)
+# CHECK: sarl	$123, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         sarl	$123, 291(%r8,%rax,4), %ecx
+# CHECK: {nf}	sarl	$123, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	sarl	$123, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	sarq	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	sarq	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	sarq	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	sarq	$123, 291(%r8,%rax,4)
+# CHECK: sarq	$123, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         sarq	$123, 291(%r8,%rax,4), %r9
+# CHECK: {nf}	sarq	$123, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	sarq	$123, 291(%r8,%rax,4), %r9
+# CHECK: {evex}	sarb	%bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd0,0xfb]
+         {evex}	sarb	%bl
+# CHECK: {nf}	sarb	%bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd0,0xfb]
+         {nf}	sarb	%bl
+# CHECK: {evex}	sarb	%cl, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd2,0xfb]
+         {evex}	sarb	%cl, %bl
+# CHECK: {nf}	sarb	%cl, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd2,0xfb]
+         {nf}	sarb	%cl, %bl
+# CHECK: sarb	%cl, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xd2,0xfb]
+         sarb	%cl, %bl, %bl
+# CHECK: {nf}	sarb	%cl, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xd2,0xfb]
+         {nf}	sarb	%cl, %bl, %bl
+# CHECK: {evex}	sarw	%cl, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd3,0xfa]
+         {evex}	sarw	%cl, %dx
+# CHECK: {nf}	sarw	%cl, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd3,0xfa]
+         {nf}	sarw	%cl, %dx
+# CHECK: sarw	%cl, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd3,0xfa]
+         sarw	%cl, %dx, %dx
+# CHECK: {nf}	sarw	%cl, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd3,0xfa]
+         {nf}	sarw	%cl, %dx, %dx
+# CHECK: {evex}	sarl	%cl, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd3,0xf9]
+         {evex}	sarl	%cl, %ecx
+# CHECK: {nf}	sarl	%cl, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd3,0xf9]
+         {nf}	sarl	%cl, %ecx
+# CHECK: sarl	%cl, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd3,0xf9]
+         sarl	%cl, %ecx, %ecx
+# CHECK: {nf}	sarl	%cl, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd3,0xf9]
+         {nf}	sarl	%cl, %ecx, %ecx
+# CHECK: {evex}	sarq	%cl, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xf9]
+         {evex}	sarq	%cl, %r9
+# CHECK: {nf}	sarq	%cl, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0xf9]
+         {nf}	sarq	%cl, %r9
+# CHECK: sarq	%cl, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xf9]
+         sarq	%cl, %r9, %r9
+# CHECK: {nf}	sarq	%cl, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0xf9]
+         {nf}	sarq	%cl, %r9, %r9
+# CHECK: {evex}	sarb	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd2,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {evex}	sarb	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	sarb	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd2,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sarb	%cl, 291(%r8,%rax,4)
+# CHECK: sarb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd2,0xbc,0x80,0x23,0x01,0x00,0x00]
+         sarb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: {nf}	sarb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd2,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sarb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: {evex}	sarw	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {evex}	sarw	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	sarw	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sarw	%cl, 291(%r8,%rax,4)
+# CHECK: sarw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         sarw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: {nf}	sarw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sarw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	sarl	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {evex}	sarl	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	sarl	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sarl	%cl, 291(%r8,%rax,4)
+# CHECK: sarl	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         sarl	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: {nf}	sarl	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sarl	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	sarq	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {evex}	sarq	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	sarq	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sarq	%cl, 291(%r8,%rax,4)
+# CHECK: sarq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         sarq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: {nf}	sarq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sarq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: {evex}	sarw	%dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd1,0xfa]
+         {evex}	sarw	%dx
+# CHECK: {nf}	sarw	%dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd1,0xfa]
+         {nf}	sarw	%dx
+# CHECK: sarw	%dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd1,0xfa]
+         sarw	%dx, %dx
+# CHECK: {nf}	sarw	%dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd1,0xfa]
+         {nf}	sarw	%dx, %dx
+# CHECK: {evex}	sarl	%ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd1,0xf9]
+         {evex}	sarl	%ecx
+# CHECK: {nf}	sarl	%ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd1,0xf9]
+         {nf}	sarl	%ecx
+# CHECK: sarl	%ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd1,0xf9]
+         sarl	%ecx, %ecx
+# CHECK: {nf}	sarl	%ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd1,0xf9]
+         {nf}	sarl	%ecx, %ecx
+# CHECK: {evex}	sarq	%r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xf9]
+         {evex}	sarq	%r9
+# CHECK: {nf}	sarq	%r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0xf9]
+         {nf}	sarq	%r9
+# CHECK: sarq	%r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xf9]
+         sarq	%r9, %r9
+# CHECK: {nf}	sarq	%r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0xf9]
+         {nf}	sarq	%r9, %r9
+# CHECK: {evex}	sarb	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd0,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {evex}	sarb	291(%r8,%rax,4)
+# CHECK: {nf}	sarb	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd0,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sarb	291(%r8,%rax,4)
+# CHECK: sarb	291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd0,0xbc,0x80,0x23,0x01,0x00,0x00]
+         sarb	291(%r8,%rax,4), %bl
+# CHECK: {nf}	sarb	291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd0,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sarb	291(%r8,%rax,4), %bl
+# CHECK: {evex}	sarw	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {evex}	sarw	291(%r8,%rax,4)
+# CHECK: {nf}	sarw	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sarw	291(%r8,%rax,4)
+# CHECK: sarw	291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         sarw	291(%r8,%rax,4), %dx
+# CHECK: {nf}	sarw	291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sarw	291(%r8,%rax,4), %dx
+# CHECK: {evex}	sarl	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {evex}	sarl	291(%r8,%rax,4)
+# CHECK: {nf}	sarl	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sarl	291(%r8,%rax,4)
+# CHECK: sarl	291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         sarl	291(%r8,%rax,4), %ecx
+# CHECK: {nf}	sarl	291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sarl	291(%r8,%rax,4), %ecx
+# CHECK: {evex}	sarq	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {evex}	sarq	291(%r8,%rax,4)
+# CHECK: {nf}	sarq	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sarq	291(%r8,%rax,4)
+# CHECK: sarq	291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         sarq	291(%r8,%rax,4), %r9
+# CHECK: {nf}	sarq	291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sarq	291(%r8,%rax,4), %r9
diff --git a/llvm/test/MC/X86/apx/sar-intel.s b/llvm/test/MC/X86/apx/sar-intel.s
new file mode 100644
index 00000000000000..946758d4118c4e
--- /dev/null
+++ b/llvm/test/MC/X86/apx/sar-intel.s
@@ -0,0 +1,284 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding -x86-asm-syntax=intel -output-asm-variant=1 %s | FileCheck %s
+
+# CHECK: {evex}	sar	bl, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc0,0xfb,0x7b]
+         {evex}	sar	bl, 123
+# CHECK: {nf}	sar	bl, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc0,0xfb,0x7b]
+         {nf}	sar	bl, 123
+# CHECK: sar	bl, bl, 123
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xc0,0xfb,0x7b]
+         sar	bl, bl, 123
+# CHECK: {nf}	sar	bl, bl, 123
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xc0,0xfb,0x7b]
+         {nf}	sar	bl, bl, 123
+# CHECK: {evex}	sar	dx, 123
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xc1,0xfa,0x7b]
+         {evex}	sar	dx, 123
+# CHECK: {nf}	sar	dx, 123
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xc1,0xfa,0x7b]
+         {nf}	sar	dx, 123
+# CHECK: sar	dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xc1,0xfa,0x7b]
+         sar	dx, dx, 123
+# CHECK: {nf}	sar	dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xc1,0xfa,0x7b]
+         {nf}	sar	dx, dx, 123
+# CHECK: {evex}	sar	ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc1,0xf9,0x7b]
+         {evex}	sar	ecx, 123
+# CHECK: {nf}	sar	ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc1,0xf9,0x7b]
+         {nf}	sar	ecx, 123
+# CHECK: sar	ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xc1,0xf9,0x7b]
+         sar	ecx, ecx, 123
+# CHECK: {nf}	sar	ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xc1,0xf9,0x7b]
+         {nf}	sar	ecx, ecx, 123
+# CHECK: {evex}	sar	r9, 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xf9,0x7b]
+         {evex}	sar	r9, 123
+# CHECK: {nf}	sar	r9, 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0xf9,0x7b]
+         {nf}	sar	r9, 123
+# CHECK: sar	r9, r9, 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xf9,0x7b]
+         sar	r9, r9, 123
+# CHECK: {nf}	sar	r9, r9, 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0xf9,0x7b]
+         {nf}	sar	r9, r9, 123
+# CHECK: {evex}	sar	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc0,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	sar	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	sar	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc0,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	sar	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: sar	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xc0,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         sar	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	sar	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xc0,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	sar	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	sar	word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	sar	word ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	sar	word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	sar	word ptr [r8 + 4*rax + 291], 123
+# CHECK: sar	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         sar	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	sar	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	sar	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	sar	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	sar	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	sar	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	sar	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: sar	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         sar	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	sar	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	sar	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	sar	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	sar	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	sar	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	sar	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: sar	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         sar	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	sar	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0xbc,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	sar	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	sar	bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd0,0xfb]
+         {evex}	sar	bl
+# CHECK: {nf}	sar	bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd0,0xfb]
+         {nf}	sar	bl
+# CHECK: {evex}	sar	bl, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd2,0xfb]
+         {evex}	sar	bl, cl
+# CHECK: {nf}	sar	bl, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd2,0xfb]
+         {nf}	sar	bl, cl
+# CHECK: sar	bl, bl, cl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xd2,0xfb]
+         sar	bl, bl, cl
+# CHECK: {nf}	sar	bl, bl, cl
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xd2,0xfb]
+         {nf}	sar	bl, bl, cl
+# CHECK: {evex}	sar	dx, cl
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd3,0xfa]
+         {evex}	sar	dx, cl
+# CHECK: {nf}	sar	dx, cl
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd3,0xfa]
+         {nf}	sar	dx, cl
+# CHECK: sar	dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd3,0xfa]
+         sar	dx, dx, cl
+# CHECK: {nf}	sar	dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd3,0xfa]
+         {nf}	sar	dx, dx, cl
+# CHECK: {evex}	sar	ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd3,0xf9]
+         {evex}	sar	ecx, cl
+# CHECK: {nf}	sar	ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd3,0xf9]
+         {nf}	sar	ecx, cl
+# CHECK: sar	ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd3,0xf9]
+         sar	ecx, ecx, cl
+# CHECK: {nf}	sar	ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd3,0xf9]
+         {nf}	sar	ecx, ecx, cl
+# CHECK: {evex}	sar	r9, cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xf9]
+         {evex}	sar	r9, cl
+# CHECK: {nf}	sar	r9, cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0xf9]
+         {nf}	sar	r9, cl
+# CHECK: sar	r9, r9, cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xf9]
+         sar	r9, r9, cl
+# CHECK: {nf}	sar	r9, r9, cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0xf9]
+         {nf}	sar	r9, r9, cl
+# CHECK: {evex}	sar	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd2,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {evex}	sar	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	sar	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd2,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sar	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: sar	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd2,0xbc,0x80,0x23,0x01,0x00,0x00]
+         sar	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	sar	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd2,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sar	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	sar	word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {evex}	sar	word ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	sar	word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sar	word ptr [r8 + 4*rax + 291], cl
+# CHECK: sar	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         sar	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	sar	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sar	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	sar	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {evex}	sar	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	sar	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sar	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: sar	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         sar	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	sar	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sar	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	sar	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {evex}	sar	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	sar	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sar	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: sar	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         sar	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	sar	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sar	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	sar	dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd1,0xfa]
+         {evex}	sar	dx
+# CHECK: {nf}	sar	dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd1,0xfa]
+         {nf}	sar	dx
+# CHECK: sar	dx, dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd1,0xfa]
+         sar	dx, dx
+# CHECK: {nf}	sar	dx, dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd1,0xfa]
+         {nf}	sar	dx, dx
+# CHECK: {evex}	sar	ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd1,0xf9]
+         {evex}	sar	ecx
+# CHECK: {nf}	sar	ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd1,0xf9]
+         {nf}	sar	ecx
+# CHECK: sar	ecx, ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd1,0xf9]
+         sar	ecx, ecx
+# CHECK: {nf}	sar	ecx, ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd1,0xf9]
+         {nf}	sar	ecx, ecx
+# CHECK: {evex}	sar	r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xf9]
+         {evex}	sar	r9
+# CHECK: {nf}	sar	r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0xf9]
+         {nf}	sar	r9
+# CHECK: sar	r9, r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xf9]
+         sar	r9, r9
+# CHECK: {nf}	sar	r9, r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0xf9]
+         {nf}	sar	r9, r9
+# CHECK: {evex}	sar	byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd0,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {evex}	sar	byte ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	sar	byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd0,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sar	byte ptr [r8 + 4*rax + 291]
+# CHECK: sar	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd0,0xbc,0x80,0x23,0x01,0x00,0x00]
+         sar	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	sar	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd0,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sar	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	sar	word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {evex}	sar	word ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	sar	word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sar	word ptr [r8 + 4*rax + 291]
+# CHECK: sar	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         sar	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	sar	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sar	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	sar	dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {evex}	sar	dword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	sar	dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sar	dword ptr [r8 + 4*rax + 291]
+# CHECK: sar	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         sar	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	sar	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sar	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	sar	qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {evex}	sar	qword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	sar	qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sar	qword ptr [r8 + 4*rax + 291]
+# CHECK: sar	r9, qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         sar	r9, qword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	sar	r9, qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0xbc,0x80,0x23,0x01,0x00,0x00]
+         {nf}	sar	r9, qword ptr [r8 + 4*rax + 291]
diff --git a/llvm/test/MC/X86/apx/shl-att.s b/llvm/test/MC/X86/apx/shl-att.s
new file mode 100644
index 00000000000000..d86f6cf05a7db1
--- /dev/null
+++ b/llvm/test/MC/X86/apx/shl-att.s
@@ -0,0 +1,287 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s
+# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+# ERROR-COUNT-94: error:
+# ERROR-NOT: error:
+# CHECK: {evex}	shlb	$123, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc0,0xe3,0x7b]
+         {evex}	shlb	$123, %bl
+# CHECK: {nf}	shlb	$123, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc0,0xe3,0x7b]
+         {nf}	shlb	$123, %bl
+# CHECK: shlb	$123, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xc0,0xe3,0x7b]
+         shlb	$123, %bl, %bl
+# CHECK: {nf}	shlb	$123, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xc0,0xe3,0x7b]
+         {nf}	shlb	$123, %bl, %bl
+# CHECK: {evex}	shlw	$123, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xc1,0xe2,0x7b]
+         {evex}	shlw	$123, %dx
+# CHECK: {nf}	shlw	$123, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xc1,0xe2,0x7b]
+         {nf}	shlw	$123, %dx
+# CHECK: shlw	$123, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xc1,0xe2,0x7b]
+         shlw	$123, %dx, %dx
+# CHECK: {nf}	shlw	$123, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xc1,0xe2,0x7b]
+         {nf}	shlw	$123, %dx, %dx
+# CHECK: {evex}	shll	$123, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc1,0xe1,0x7b]
+         {evex}	shll	$123, %ecx
+# CHECK: {nf}	shll	$123, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc1,0xe1,0x7b]
+         {nf}	shll	$123, %ecx
+# CHECK: shll	$123, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xc1,0xe1,0x7b]
+         shll	$123, %ecx, %ecx
+# CHECK: {nf}	shll	$123, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xc1,0xe1,0x7b]
+         {nf}	shll	$123, %ecx, %ecx
+# CHECK: {evex}	shlq	$123, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xe1,0x7b]
+         {evex}	shlq	$123, %r9
+# CHECK: {nf}	shlq	$123, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0xe1,0x7b]
+         {nf}	shlq	$123, %r9
+# CHECK: shlq	$123, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xe1,0x7b]
+         shlq	$123, %r9, %r9
+# CHECK: {nf}	shlq	$123, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0xe1,0x7b]
+         {nf}	shlq	$123, %r9, %r9
+# CHECK: {evex}	shlb	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc0,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shlb	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	shlb	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc0,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shlb	$123, 291(%r8,%rax,4)
+# CHECK: shlb	$123, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xc0,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shlb	$123, 291(%r8,%rax,4), %bl
+# CHECK: {nf}	shlb	$123, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xc0,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shlb	$123, 291(%r8,%rax,4), %bl
+# CHECK: {evex}	shlw	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shlw	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	shlw	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shlw	$123, 291(%r8,%rax,4)
+# CHECK: shlw	$123, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shlw	$123, 291(%r8,%rax,4), %dx
+# CHECK: {nf}	shlw	$123, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shlw	$123, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	shll	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shll	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	shll	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shll	$123, 291(%r8,%rax,4)
+# CHECK: shll	$123, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shll	$123, 291(%r8,%rax,4), %ecx
+# CHECK: {nf}	shll	$123, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shll	$123, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	shlq	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shlq	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	shlq	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shlq	$123, 291(%r8,%rax,4)
+# CHECK: shlq	$123, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shlq	$123, 291(%r8,%rax,4), %r9
+# CHECK: {nf}	shlq	$123, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shlq	$123, 291(%r8,%rax,4), %r9
+# CHECK: {evex}	shlb	%bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd0,0xe3]
+         {evex}	shlb	%bl
+# CHECK: {nf}	shlb	%bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd0,0xe3]
+         {nf}	shlb	%bl
+# CHECK: {evex}	shlb	%cl, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd2,0xe3]
+         {evex}	shlb	%cl, %bl
+# CHECK: {nf}	shlb	%cl, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd2,0xe3]
+         {nf}	shlb	%cl, %bl
+# CHECK: shlb	%cl, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xd2,0xe3]
+         shlb	%cl, %bl, %bl
+# CHECK: {nf}	shlb	%cl, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xd2,0xe3]
+         {nf}	shlb	%cl, %bl, %bl
+# CHECK: {evex}	shlw	%cl, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd3,0xe2]
+         {evex}	shlw	%cl, %dx
+# CHECK: {nf}	shlw	%cl, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd3,0xe2]
+         {nf}	shlw	%cl, %dx
+# CHECK: shlw	%cl, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd3,0xe2]
+         shlw	%cl, %dx, %dx
+# CHECK: {nf}	shlw	%cl, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd3,0xe2]
+         {nf}	shlw	%cl, %dx, %dx
+# CHECK: {evex}	shll	%cl, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd3,0xe1]
+         {evex}	shll	%cl, %ecx
+# CHECK: {nf}	shll	%cl, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd3,0xe1]
+         {nf}	shll	%cl, %ecx
+# CHECK: shll	%cl, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd3,0xe1]
+         shll	%cl, %ecx, %ecx
+# CHECK: {nf}	shll	%cl, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd3,0xe1]
+         {nf}	shll	%cl, %ecx, %ecx
+# CHECK: {evex}	shlq	%cl, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xe1]
+         {evex}	shlq	%cl, %r9
+# CHECK: {nf}	shlq	%cl, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0xe1]
+         {nf}	shlq	%cl, %r9
+# CHECK: shlq	%cl, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xe1]
+         shlq	%cl, %r9, %r9
+# CHECK: {nf}	shlq	%cl, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0xe1]
+         {nf}	shlq	%cl, %r9, %r9
+# CHECK: {evex}	shlb	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd2,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shlb	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	shlb	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd2,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shlb	%cl, 291(%r8,%rax,4)
+# CHECK: shlb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd2,0xa4,0x80,0x23,0x01,0x00,0x00]
+         shlb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: {nf}	shlb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd2,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shlb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: {evex}	shlw	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shlw	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	shlw	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shlw	%cl, 291(%r8,%rax,4)
+# CHECK: shlw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         shlw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: {nf}	shlw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shlw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	shll	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shll	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	shll	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shll	%cl, 291(%r8,%rax,4)
+# CHECK: shll	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         shll	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: {nf}	shll	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shll	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	shlq	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shlq	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	shlq	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shlq	%cl, 291(%r8,%rax,4)
+# CHECK: shlq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         shlq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: {nf}	shlq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shlq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: {evex}	shlw	%dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd1,0xe2]
+         {evex}	shlw	%dx
+# CHECK: {nf}	shlw	%dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd1,0xe2]
+         {nf}	shlw	%dx
+# CHECK: shlw	%dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd1,0xe2]
+         shlw	%dx, %dx
+# CHECK: {nf}	shlw	%dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd1,0xe2]
+         {nf}	shlw	%dx, %dx
+# CHECK: {evex}	shll	%ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd1,0xe1]
+         {evex}	shll	%ecx
+# CHECK: {nf}	shll	%ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd1,0xe1]
+         {nf}	shll	%ecx
+# CHECK: shll	%ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd1,0xe1]
+         shll	%ecx, %ecx
+# CHECK: {nf}	shll	%ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd1,0xe1]
+         {nf}	shll	%ecx, %ecx
+# CHECK: {evex}	shlq	%r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xe1]
+         {evex}	shlq	%r9
+# CHECK: {nf}	shlq	%r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0xe1]
+         {nf}	shlq	%r9
+# CHECK: shlq	%r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xe1]
+         shlq	%r9, %r9
+# CHECK: {nf}	shlq	%r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0xe1]
+         {nf}	shlq	%r9, %r9
+# CHECK: {evex}	shlb	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd0,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shlb	291(%r8,%rax,4)
+# CHECK: {nf}	shlb	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd0,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shlb	291(%r8,%rax,4)
+# CHECK: shlb	291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd0,0xa4,0x80,0x23,0x01,0x00,0x00]
+         shlb	291(%r8,%rax,4), %bl
+# CHECK: {nf}	shlb	291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd0,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shlb	291(%r8,%rax,4), %bl
+# CHECK: {evex}	shlw	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shlw	291(%r8,%rax,4)
+# CHECK: {nf}	shlw	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shlw	291(%r8,%rax,4)
+# CHECK: shlw	291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         shlw	291(%r8,%rax,4), %dx
+# CHECK: {nf}	shlw	291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shlw	291(%r8,%rax,4), %dx
+# CHECK: {evex}	shll	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shll	291(%r8,%rax,4)
+# CHECK: {nf}	shll	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shll	291(%r8,%rax,4)
+# CHECK: shll	291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         shll	291(%r8,%rax,4), %ecx
+# CHECK: {nf}	shll	291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shll	291(%r8,%rax,4), %ecx
+# CHECK: {evex}	shlq	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shlq	291(%r8,%rax,4)
+# CHECK: {nf}	shlq	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shlq	291(%r8,%rax,4)
+# CHECK: shlq	291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         shlq	291(%r8,%rax,4), %r9
+# CHECK: {nf}	shlq	291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shlq	291(%r8,%rax,4), %r9
diff --git a/llvm/test/MC/X86/apx/shl-intel.s b/llvm/test/MC/X86/apx/shl-intel.s
new file mode 100644
index 00000000000000..2db5203e77c4f8
--- /dev/null
+++ b/llvm/test/MC/X86/apx/shl-intel.s
@@ -0,0 +1,284 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding -x86-asm-syntax=intel -output-asm-variant=1 %s | FileCheck %s
+
+# CHECK: {evex}	shl	bl, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc0,0xe3,0x7b]
+         {evex}	shl	bl, 123
+# CHECK: {nf}	shl	bl, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc0,0xe3,0x7b]
+         {nf}	shl	bl, 123
+# CHECK: shl	bl, bl, 123
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xc0,0xe3,0x7b]
+         shl	bl, bl, 123
+# CHECK: {nf}	shl	bl, bl, 123
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xc0,0xe3,0x7b]
+         {nf}	shl	bl, bl, 123
+# CHECK: {evex}	shl	dx, 123
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xc1,0xe2,0x7b]
+         {evex}	shl	dx, 123
+# CHECK: {nf}	shl	dx, 123
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xc1,0xe2,0x7b]
+         {nf}	shl	dx, 123
+# CHECK: shl	dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xc1,0xe2,0x7b]
+         shl	dx, dx, 123
+# CHECK: {nf}	shl	dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xc1,0xe2,0x7b]
+         {nf}	shl	dx, dx, 123
+# CHECK: {evex}	shl	ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc1,0xe1,0x7b]
+         {evex}	shl	ecx, 123
+# CHECK: {nf}	shl	ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc1,0xe1,0x7b]
+         {nf}	shl	ecx, 123
+# CHECK: shl	ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xc1,0xe1,0x7b]
+         shl	ecx, ecx, 123
+# CHECK: {nf}	shl	ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xc1,0xe1,0x7b]
+         {nf}	shl	ecx, ecx, 123
+# CHECK: {evex}	shl	r9, 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xe1,0x7b]
+         {evex}	shl	r9, 123
+# CHECK: {nf}	shl	r9, 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0xe1,0x7b]
+         {nf}	shl	r9, 123
+# CHECK: shl	r9, r9, 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xe1,0x7b]
+         shl	r9, r9, 123
+# CHECK: {nf}	shl	r9, r9, 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0xe1,0x7b]
+         {nf}	shl	r9, r9, 123
+# CHECK: {evex}	shl	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc0,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shl	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	shl	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc0,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shl	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: shl	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xc0,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shl	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	shl	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xc0,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shl	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	shl	word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shl	word ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	shl	word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shl	word ptr [r8 + 4*rax + 291], 123
+# CHECK: shl	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shl	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	shl	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shl	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	shl	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shl	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	shl	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shl	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: shl	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shl	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	shl	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shl	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	shl	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shl	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	shl	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shl	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: shl	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shl	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	shl	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0xa4,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shl	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	shl	bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd0,0xe3]
+         {evex}	shl	bl
+# CHECK: {nf}	shl	bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd0,0xe3]
+         {nf}	shl	bl
+# CHECK: {evex}	shl	bl, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd2,0xe3]
+         {evex}	shl	bl, cl
+# CHECK: {nf}	shl	bl, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd2,0xe3]
+         {nf}	shl	bl, cl
+# CHECK: shl	bl, bl, cl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xd2,0xe3]
+         shl	bl, bl, cl
+# CHECK: {nf}	shl	bl, bl, cl
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xd2,0xe3]
+         {nf}	shl	bl, bl, cl
+# CHECK: {evex}	shl	dx, cl
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd3,0xe2]
+         {evex}	shl	dx, cl
+# CHECK: {nf}	shl	dx, cl
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd3,0xe2]
+         {nf}	shl	dx, cl
+# CHECK: shl	dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd3,0xe2]
+         shl	dx, dx, cl
+# CHECK: {nf}	shl	dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd3,0xe2]
+         {nf}	shl	dx, dx, cl
+# CHECK: {evex}	shl	ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd3,0xe1]
+         {evex}	shl	ecx, cl
+# CHECK: {nf}	shl	ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd3,0xe1]
+         {nf}	shl	ecx, cl
+# CHECK: shl	ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd3,0xe1]
+         shl	ecx, ecx, cl
+# CHECK: {nf}	shl	ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd3,0xe1]
+         {nf}	shl	ecx, ecx, cl
+# CHECK: {evex}	shl	r9, cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xe1]
+         {evex}	shl	r9, cl
+# CHECK: {nf}	shl	r9, cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0xe1]
+         {nf}	shl	r9, cl
+# CHECK: shl	r9, r9, cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xe1]
+         shl	r9, r9, cl
+# CHECK: {nf}	shl	r9, r9, cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0xe1]
+         {nf}	shl	r9, r9, cl
+# CHECK: {evex}	shl	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd2,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shl	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	shl	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd2,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shl	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: shl	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd2,0xa4,0x80,0x23,0x01,0x00,0x00]
+         shl	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	shl	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd2,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shl	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	shl	word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shl	word ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	shl	word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shl	word ptr [r8 + 4*rax + 291], cl
+# CHECK: shl	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         shl	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	shl	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shl	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	shl	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shl	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	shl	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shl	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: shl	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         shl	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	shl	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shl	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	shl	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shl	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	shl	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shl	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: shl	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         shl	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	shl	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shl	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	shl	dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd1,0xe2]
+         {evex}	shl	dx
+# CHECK: {nf}	shl	dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd1,0xe2]
+         {nf}	shl	dx
+# CHECK: shl	dx, dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd1,0xe2]
+         shl	dx, dx
+# CHECK: {nf}	shl	dx, dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd1,0xe2]
+         {nf}	shl	dx, dx
+# CHECK: {evex}	shl	ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd1,0xe1]
+         {evex}	shl	ecx
+# CHECK: {nf}	shl	ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd1,0xe1]
+         {nf}	shl	ecx
+# CHECK: shl	ecx, ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd1,0xe1]
+         shl	ecx, ecx
+# CHECK: {nf}	shl	ecx, ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd1,0xe1]
+         {nf}	shl	ecx, ecx
+# CHECK: {evex}	shl	r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xe1]
+         {evex}	shl	r9
+# CHECK: {nf}	shl	r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0xe1]
+         {nf}	shl	r9
+# CHECK: shl	r9, r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xe1]
+         shl	r9, r9
+# CHECK: {nf}	shl	r9, r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0xe1]
+         {nf}	shl	r9, r9
+# CHECK: {evex}	shl	byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd0,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shl	byte ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	shl	byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd0,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shl	byte ptr [r8 + 4*rax + 291]
+# CHECK: shl	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd0,0xa4,0x80,0x23,0x01,0x00,0x00]
+         shl	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	shl	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd0,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shl	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	shl	word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shl	word ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	shl	word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shl	word ptr [r8 + 4*rax + 291]
+# CHECK: shl	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         shl	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	shl	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shl	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	shl	dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shl	dword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	shl	dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shl	dword ptr [r8 + 4*rax + 291]
+# CHECK: shl	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         shl	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	shl	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shl	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	shl	qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shl	qword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	shl	qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shl	qword ptr [r8 + 4*rax + 291]
+# CHECK: shl	r9, qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         shl	r9, qword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	shl	r9, qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0xa4,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shl	r9, qword ptr [r8 + 4*rax + 291]
diff --git a/llvm/test/MC/X86/apx/shld-att.s b/llvm/test/MC/X86/apx/shld-att.s
new file mode 100644
index 00000000000000..a279398ee6e609
--- /dev/null
+++ b/llvm/test/MC/X86/apx/shld-att.s
@@ -0,0 +1,149 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s
+# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+# ERROR-COUNT-48: error:
+# ERROR-NOT: error:
+# CHECK: {evex}	shldw	$123, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0x24,0xd2,0x7b]
+         {evex}	shldw	$123, %dx, %dx
+# CHECK: {nf}	shldw	$123, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0x24,0xd2,0x7b]
+         {nf}	shldw	$123, %dx, %dx
+# CHECK: shldw	$123, %dx, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0x24,0xd2,0x7b]
+         shldw	$123, %dx, %dx, %dx
+# CHECK: {nf}	shldw	$123, %dx, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0x24,0xd2,0x7b]
+         {nf}	shldw	$123, %dx, %dx, %dx
+# CHECK: {evex}	shldw	$123, %dx, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0x24,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shldw	$123, %dx, 291(%r8,%rax,4)
+# CHECK: {nf}	shldw	$123, %dx, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0x24,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shldw	$123, %dx, 291(%r8,%rax,4)
+# CHECK: shldw	$123, %dx, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0x24,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shldw	$123, %dx, 291(%r8,%rax,4), %dx
+# CHECK: {nf}	shldw	$123, %dx, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0x24,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shldw	$123, %dx, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	shldl	$123, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0x24,0xc9,0x7b]
+         {evex}	shldl	$123, %ecx, %ecx
+# CHECK: {nf}	shldl	$123, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0x24,0xc9,0x7b]
+         {nf}	shldl	$123, %ecx, %ecx
+# CHECK: shldl	$123, %ecx, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0x24,0xc9,0x7b]
+         shldl	$123, %ecx, %ecx, %ecx
+# CHECK: {nf}	shldl	$123, %ecx, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0x24,0xc9,0x7b]
+         {nf}	shldl	$123, %ecx, %ecx, %ecx
+# CHECK: {evex}	shldl	$123, %ecx, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shldl	$123, %ecx, 291(%r8,%rax,4)
+# CHECK: {nf}	shldl	$123, %ecx, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shldl	$123, %ecx, 291(%r8,%rax,4)
+# CHECK: shldl	$123, %ecx, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shldl	$123, %ecx, 291(%r8,%rax,4), %ecx
+# CHECK: {nf}	shldl	$123, %ecx, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shldl	$123, %ecx, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	shldq	$123, %r9, %r9
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0x24,0xc9,0x7b]
+         {evex}	shldq	$123, %r9, %r9
+# CHECK: {nf}	shldq	$123, %r9, %r9
+# CHECK: encoding: [0x62,0x54,0xfc,0x0c,0x24,0xc9,0x7b]
+         {nf}	shldq	$123, %r9, %r9
+# CHECK: shldq	$123, %r9, %r9, %r9
+# CHECK: encoding: [0x62,0x54,0xb4,0x18,0x24,0xc9,0x7b]
+         shldq	$123, %r9, %r9, %r9
+# CHECK: {nf}	shldq	$123, %r9, %r9, %r9
+# CHECK: encoding: [0x62,0x54,0xb4,0x1c,0x24,0xc9,0x7b]
+         {nf}	shldq	$123, %r9, %r9, %r9
+# CHECK: {evex}	shldq	$123, %r9, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shldq	$123, %r9, 291(%r8,%rax,4)
+# CHECK: {nf}	shldq	$123, %r9, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0x54,0xfc,0x0c,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shldq	$123, %r9, 291(%r8,%rax,4)
+# CHECK: shldq	$123, %r9, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0x54,0xb4,0x18,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shldq	$123, %r9, 291(%r8,%rax,4), %r9
+# CHECK: {nf}	shldq	$123, %r9, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0x54,0xb4,0x1c,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shldq	$123, %r9, 291(%r8,%rax,4), %r9
+# CHECK: {evex}	shldw	%cl, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xa5,0xd2]
+         {evex}	shldw	%cl, %dx, %dx
+# CHECK: {nf}	shldw	%cl, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xa5,0xd2]
+         {nf}	shldw	%cl, %dx, %dx
+# CHECK: shldw	%cl, %dx, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xa5,0xd2]
+         shldw	%cl, %dx, %dx, %dx
+# CHECK: {nf}	shldw	%cl, %dx, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xa5,0xd2]
+         {nf}	shldw	%cl, %dx, %dx, %dx
+# CHECK: {evex}	shldw	%cl, %dx, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xa5,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shldw	%cl, %dx, 291(%r8,%rax,4)
+# CHECK: {nf}	shldw	%cl, %dx, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xa5,0x94,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shldw	%cl, %dx, 291(%r8,%rax,4)
+# CHECK: shldw	%cl, %dx, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xa5,0x94,0x80,0x23,0x01,0x00,0x00]
+         shldw	%cl, %dx, 291(%r8,%rax,4), %dx
+# CHECK: {nf}	shldw	%cl, %dx, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xa5,0x94,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shldw	%cl, %dx, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	shldl	%cl, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xa5,0xc9]
+         {evex}	shldl	%cl, %ecx, %ecx
+# CHECK: {nf}	shldl	%cl, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xa5,0xc9]
+         {nf}	shldl	%cl, %ecx, %ecx
+# CHECK: shldl	%cl, %ecx, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xa5,0xc9]
+         shldl	%cl, %ecx, %ecx, %ecx
+# CHECK: {nf}	shldl	%cl, %ecx, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xa5,0xc9]
+         {nf}	shldl	%cl, %ecx, %ecx, %ecx
+# CHECK: {evex}	shldl	%cl, %ecx, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shldl	%cl, %ecx, 291(%r8,%rax,4)
+# CHECK: {nf}	shldl	%cl, %ecx, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shldl	%cl, %ecx, 291(%r8,%rax,4)
+# CHECK: shldl	%cl, %ecx, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00]
+         shldl	%cl, %ecx, 291(%r8,%rax,4), %ecx
+# CHECK: {nf}	shldl	%cl, %ecx, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shldl	%cl, %ecx, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	shldq	%cl, %r9, %r9
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0xa5,0xc9]
+         {evex}	shldq	%cl, %r9, %r9
+# CHECK: {nf}	shldq	%cl, %r9, %r9
+# CHECK: encoding: [0x62,0x54,0xfc,0x0c,0xa5,0xc9]
+         {nf}	shldq	%cl, %r9, %r9
+# CHECK: shldq	%cl, %r9, %r9, %r9
+# CHECK: encoding: [0x62,0x54,0xb4,0x18,0xa5,0xc9]
+         shldq	%cl, %r9, %r9, %r9
+# CHECK: {nf}	shldq	%cl, %r9, %r9, %r9
+# CHECK: encoding: [0x62,0x54,0xb4,0x1c,0xa5,0xc9]
+         {nf}	shldq	%cl, %r9, %r9, %r9
+# CHECK: {evex}	shldq	%cl, %r9, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shldq	%cl, %r9, 291(%r8,%rax,4)
+# CHECK: {nf}	shldq	%cl, %r9, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0x54,0xfc,0x0c,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shldq	%cl, %r9, 291(%r8,%rax,4)
+# CHECK: shldq	%cl, %r9, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0x54,0xb4,0x18,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00]
+         shldq	%cl, %r9, 291(%r8,%rax,4), %r9
+# CHECK: {nf}	shldq	%cl, %r9, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0x54,0xb4,0x1c,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shldq	%cl, %r9, 291(%r8,%rax,4), %r9
diff --git a/llvm/test/MC/X86/apx/shld-intel.s b/llvm/test/MC/X86/apx/shld-intel.s
new file mode 100644
index 00000000000000..4bd7c30cc2eaf3
--- /dev/null
+++ b/llvm/test/MC/X86/apx/shld-intel.s
@@ -0,0 +1,146 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding -x86-asm-syntax=intel -output-asm-variant=1 %s | FileCheck %s
+
+# CHECK: {evex}	shld	dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0x24,0xd2,0x7b]
+         {evex}	shld	dx, dx, 123
+# CHECK: {nf}	shld	dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0x24,0xd2,0x7b]
+         {nf}	shld	dx, dx, 123
+# CHECK: shld	dx, dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0x24,0xd2,0x7b]
+         shld	dx, dx, dx, 123
+# CHECK: {nf}	shld	dx, dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0x24,0xd2,0x7b]
+         {nf}	shld	dx, dx, dx, 123
+# CHECK: {evex}	shld	word ptr [r8 + 4*rax + 291], dx, 123
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0x24,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shld	word ptr [r8 + 4*rax + 291], dx, 123
+# CHECK: {nf}	shld	word ptr [r8 + 4*rax + 291], dx, 123
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0x24,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shld	word ptr [r8 + 4*rax + 291], dx, 123
+# CHECK: shld	dx, word ptr [r8 + 4*rax + 291], dx, 123
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0x24,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shld	dx, word ptr [r8 + 4*rax + 291], dx, 123
+# CHECK: {nf}	shld	dx, word ptr [r8 + 4*rax + 291], dx, 123
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0x24,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shld	dx, word ptr [r8 + 4*rax + 291], dx, 123
+# CHECK: {evex}	shld	ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0x24,0xc9,0x7b]
+         {evex}	shld	ecx, ecx, 123
+# CHECK: {nf}	shld	ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0x24,0xc9,0x7b]
+         {nf}	shld	ecx, ecx, 123
+# CHECK: shld	ecx, ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0x24,0xc9,0x7b]
+         shld	ecx, ecx, ecx, 123
+# CHECK: {nf}	shld	ecx, ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0x24,0xc9,0x7b]
+         {nf}	shld	ecx, ecx, ecx, 123
+# CHECK: {evex}	shld	dword ptr [r8 + 4*rax + 291], ecx, 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shld	dword ptr [r8 + 4*rax + 291], ecx, 123
+# CHECK: {nf}	shld	dword ptr [r8 + 4*rax + 291], ecx, 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shld	dword ptr [r8 + 4*rax + 291], ecx, 123
+# CHECK: shld	ecx, dword ptr [r8 + 4*rax + 291], ecx, 123
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shld	ecx, dword ptr [r8 + 4*rax + 291], ecx, 123
+# CHECK: {nf}	shld	ecx, dword ptr [r8 + 4*rax + 291], ecx, 123
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shld	ecx, dword ptr [r8 + 4*rax + 291], ecx, 123
+# CHECK: {evex}	shld	r9, r9, 123
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0x24,0xc9,0x7b]
+         {evex}	shld	r9, r9, 123
+# CHECK: {nf}	shld	r9, r9, 123
+# CHECK: encoding: [0x62,0x54,0xfc,0x0c,0x24,0xc9,0x7b]
+         {nf}	shld	r9, r9, 123
+# CHECK: shld	r9, r9, r9, 123
+# CHECK: encoding: [0x62,0x54,0xb4,0x18,0x24,0xc9,0x7b]
+         shld	r9, r9, r9, 123
+# CHECK: {nf}	shld	r9, r9, r9, 123
+# CHECK: encoding: [0x62,0x54,0xb4,0x1c,0x24,0xc9,0x7b]
+         {nf}	shld	r9, r9, r9, 123
+# CHECK: {evex}	shld	qword ptr [r8 + 4*rax + 291], r9, 123
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shld	qword ptr [r8 + 4*rax + 291], r9, 123
+# CHECK: {nf}	shld	qword ptr [r8 + 4*rax + 291], r9, 123
+# CHECK: encoding: [0x62,0x54,0xfc,0x0c,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shld	qword ptr [r8 + 4*rax + 291], r9, 123
+# CHECK: shld	r9, qword ptr [r8 + 4*rax + 291], r9, 123
+# CHECK: encoding: [0x62,0x54,0xb4,0x18,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shld	r9, qword ptr [r8 + 4*rax + 291], r9, 123
+# CHECK: {nf}	shld	r9, qword ptr [r8 + 4*rax + 291], r9, 123
+# CHECK: encoding: [0x62,0x54,0xb4,0x1c,0x24,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shld	r9, qword ptr [r8 + 4*rax + 291], r9, 123
+# CHECK: {evex}	shld	dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xa5,0xd2]
+         {evex}	shld	dx, dx, cl
+# CHECK: {nf}	shld	dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xa5,0xd2]
+         {nf}	shld	dx, dx, cl
+# CHECK: shld	dx, dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xa5,0xd2]
+         shld	dx, dx, dx, cl
+# CHECK: {nf}	shld	dx, dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xa5,0xd2]
+         {nf}	shld	dx, dx, dx, cl
+# CHECK: {evex}	shld	word ptr [r8 + 4*rax + 291], dx, cl
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xa5,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shld	word ptr [r8 + 4*rax + 291], dx, cl
+# CHECK: {nf}	shld	word ptr [r8 + 4*rax + 291], dx, cl
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xa5,0x94,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shld	word ptr [r8 + 4*rax + 291], dx, cl
+# CHECK: shld	dx, word ptr [r8 + 4*rax + 291], dx, cl
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xa5,0x94,0x80,0x23,0x01,0x00,0x00]
+         shld	dx, word ptr [r8 + 4*rax + 291], dx, cl
+# CHECK: {nf}	shld	dx, word ptr [r8 + 4*rax + 291], dx, cl
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xa5,0x94,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shld	dx, word ptr [r8 + 4*rax + 291], dx, cl
+# CHECK: {evex}	shld	ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xa5,0xc9]
+         {evex}	shld	ecx, ecx, cl
+# CHECK: {nf}	shld	ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xa5,0xc9]
+         {nf}	shld	ecx, ecx, cl
+# CHECK: shld	ecx, ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xa5,0xc9]
+         shld	ecx, ecx, ecx, cl
+# CHECK: {nf}	shld	ecx, ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xa5,0xc9]
+         {nf}	shld	ecx, ecx, ecx, cl
+# CHECK: {evex}	shld	dword ptr [r8 + 4*rax + 291], ecx, cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shld	dword ptr [r8 + 4*rax + 291], ecx, cl
+# CHECK: {nf}	shld	dword ptr [r8 + 4*rax + 291], ecx, cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shld	dword ptr [r8 + 4*rax + 291], ecx, cl
+# CHECK: shld	ecx, dword ptr [r8 + 4*rax + 291], ecx, cl
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00]
+         shld	ecx, dword ptr [r8 + 4*rax + 291], ecx, cl
+# CHECK: {nf}	shld	ecx, dword ptr [r8 + 4*rax + 291], ecx, cl
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shld	ecx, dword ptr [r8 + 4*rax + 291], ecx, cl
+# CHECK: {evex}	shld	r9, r9, cl
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0xa5,0xc9]
+         {evex}	shld	r9, r9, cl
+# CHECK: {nf}	shld	r9, r9, cl
+# CHECK: encoding: [0x62,0x54,0xfc,0x0c,0xa5,0xc9]
+         {nf}	shld	r9, r9, cl
+# CHECK: shld	r9, r9, r9, cl
+# CHECK: encoding: [0x62,0x54,0xb4,0x18,0xa5,0xc9]
+         shld	r9, r9, r9, cl
+# CHECK: {nf}	shld	r9, r9, r9, cl
+# CHECK: encoding: [0x62,0x54,0xb4,0x1c,0xa5,0xc9]
+         {nf}	shld	r9, r9, r9, cl
+# CHECK: {evex}	shld	qword ptr [r8 + 4*rax + 291], r9, cl
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shld	qword ptr [r8 + 4*rax + 291], r9, cl
+# CHECK: {nf}	shld	qword ptr [r8 + 4*rax + 291], r9, cl
+# CHECK: encoding: [0x62,0x54,0xfc,0x0c,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shld	qword ptr [r8 + 4*rax + 291], r9, cl
+# CHECK: shld	r9, qword ptr [r8 + 4*rax + 291], r9, cl
+# CHECK: encoding: [0x62,0x54,0xb4,0x18,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00]
+         shld	r9, qword ptr [r8 + 4*rax + 291], r9, cl
+# CHECK: {nf}	shld	r9, qword ptr [r8 + 4*rax + 291], r9, cl
+# CHECK: encoding: [0x62,0x54,0xb4,0x1c,0xa5,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shld	r9, qword ptr [r8 + 4*rax + 291], r9, cl
diff --git a/llvm/test/MC/X86/apx/shr-att.s b/llvm/test/MC/X86/apx/shr-att.s
new file mode 100644
index 00000000000000..86656c325de2ae
--- /dev/null
+++ b/llvm/test/MC/X86/apx/shr-att.s
@@ -0,0 +1,287 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s
+# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+# ERROR-COUNT-94: error:
+# ERROR-NOT: error:
+# CHECK: {evex}	shrb	$123, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc0,0xeb,0x7b]
+         {evex}	shrb	$123, %bl
+# CHECK: {nf}	shrb	$123, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc0,0xeb,0x7b]
+         {nf}	shrb	$123, %bl
+# CHECK: shrb	$123, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xc0,0xeb,0x7b]
+         shrb	$123, %bl, %bl
+# CHECK: {nf}	shrb	$123, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xc0,0xeb,0x7b]
+         {nf}	shrb	$123, %bl, %bl
+# CHECK: {evex}	shrw	$123, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xc1,0xea,0x7b]
+         {evex}	shrw	$123, %dx
+# CHECK: {nf}	shrw	$123, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xc1,0xea,0x7b]
+         {nf}	shrw	$123, %dx
+# CHECK: shrw	$123, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xc1,0xea,0x7b]
+         shrw	$123, %dx, %dx
+# CHECK: {nf}	shrw	$123, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xc1,0xea,0x7b]
+         {nf}	shrw	$123, %dx, %dx
+# CHECK: {evex}	shrl	$123, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc1,0xe9,0x7b]
+         {evex}	shrl	$123, %ecx
+# CHECK: {nf}	shrl	$123, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc1,0xe9,0x7b]
+         {nf}	shrl	$123, %ecx
+# CHECK: shrl	$123, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xc1,0xe9,0x7b]
+         shrl	$123, %ecx, %ecx
+# CHECK: {nf}	shrl	$123, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xc1,0xe9,0x7b]
+         {nf}	shrl	$123, %ecx, %ecx
+# CHECK: {evex}	shrq	$123, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xe9,0x7b]
+         {evex}	shrq	$123, %r9
+# CHECK: {nf}	shrq	$123, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0xe9,0x7b]
+         {nf}	shrq	$123, %r9
+# CHECK: shrq	$123, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xe9,0x7b]
+         shrq	$123, %r9, %r9
+# CHECK: {nf}	shrq	$123, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0xe9,0x7b]
+         {nf}	shrq	$123, %r9, %r9
+# CHECK: {evex}	shrb	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc0,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shrb	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	shrb	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc0,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrb	$123, 291(%r8,%rax,4)
+# CHECK: shrb	$123, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xc0,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shrb	$123, 291(%r8,%rax,4), %bl
+# CHECK: {nf}	shrb	$123, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xc0,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrb	$123, 291(%r8,%rax,4), %bl
+# CHECK: {evex}	shrw	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shrw	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	shrw	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrw	$123, 291(%r8,%rax,4)
+# CHECK: shrw	$123, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shrw	$123, 291(%r8,%rax,4), %dx
+# CHECK: {nf}	shrw	$123, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrw	$123, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	shrl	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shrl	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	shrl	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrl	$123, 291(%r8,%rax,4)
+# CHECK: shrl	$123, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shrl	$123, 291(%r8,%rax,4), %ecx
+# CHECK: {nf}	shrl	$123, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrl	$123, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	shrq	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shrq	$123, 291(%r8,%rax,4)
+# CHECK: {nf}	shrq	$123, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrq	$123, 291(%r8,%rax,4)
+# CHECK: shrq	$123, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shrq	$123, 291(%r8,%rax,4), %r9
+# CHECK: {nf}	shrq	$123, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrq	$123, 291(%r8,%rax,4), %r9
+# CHECK: {evex}	shrb	%bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd0,0xeb]
+         {evex}	shrb	%bl
+# CHECK: {nf}	shrb	%bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd0,0xeb]
+         {nf}	shrb	%bl
+# CHECK: {evex}	shrb	%cl, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd2,0xeb]
+         {evex}	shrb	%cl, %bl
+# CHECK: {nf}	shrb	%cl, %bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd2,0xeb]
+         {nf}	shrb	%cl, %bl
+# CHECK: shrb	%cl, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xd2,0xeb]
+         shrb	%cl, %bl, %bl
+# CHECK: {nf}	shrb	%cl, %bl, %bl
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xd2,0xeb]
+         {nf}	shrb	%cl, %bl, %bl
+# CHECK: {evex}	shrw	%cl, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd3,0xea]
+         {evex}	shrw	%cl, %dx
+# CHECK: {nf}	shrw	%cl, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd3,0xea]
+         {nf}	shrw	%cl, %dx
+# CHECK: shrw	%cl, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd3,0xea]
+         shrw	%cl, %dx, %dx
+# CHECK: {nf}	shrw	%cl, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd3,0xea]
+         {nf}	shrw	%cl, %dx, %dx
+# CHECK: {evex}	shrl	%cl, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd3,0xe9]
+         {evex}	shrl	%cl, %ecx
+# CHECK: {nf}	shrl	%cl, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd3,0xe9]
+         {nf}	shrl	%cl, %ecx
+# CHECK: shrl	%cl, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd3,0xe9]
+         shrl	%cl, %ecx, %ecx
+# CHECK: {nf}	shrl	%cl, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd3,0xe9]
+         {nf}	shrl	%cl, %ecx, %ecx
+# CHECK: {evex}	shrq	%cl, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xe9]
+         {evex}	shrq	%cl, %r9
+# CHECK: {nf}	shrq	%cl, %r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0xe9]
+         {nf}	shrq	%cl, %r9
+# CHECK: shrq	%cl, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xe9]
+         shrq	%cl, %r9, %r9
+# CHECK: {nf}	shrq	%cl, %r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0xe9]
+         {nf}	shrq	%cl, %r9, %r9
+# CHECK: {evex}	shrb	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd2,0xac,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shrb	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	shrb	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd2,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrb	%cl, 291(%r8,%rax,4)
+# CHECK: shrb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd2,0xac,0x80,0x23,0x01,0x00,0x00]
+         shrb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: {nf}	shrb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd2,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrb	%cl, 291(%r8,%rax,4), %bl
+# CHECK: {evex}	shrw	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shrw	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	shrw	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrw	%cl, 291(%r8,%rax,4)
+# CHECK: shrw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         shrw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: {nf}	shrw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrw	%cl, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	shrl	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shrl	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	shrl	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrl	%cl, 291(%r8,%rax,4)
+# CHECK: shrl	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         shrl	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: {nf}	shrl	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrl	%cl, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	shrq	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shrq	%cl, 291(%r8,%rax,4)
+# CHECK: {nf}	shrq	%cl, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrq	%cl, 291(%r8,%rax,4)
+# CHECK: shrq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         shrq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: {nf}	shrq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrq	%cl, 291(%r8,%rax,4), %r9
+# CHECK: {evex}	shrw	%dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd1,0xea]
+         {evex}	shrw	%dx
+# CHECK: {nf}	shrw	%dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd1,0xea]
+         {nf}	shrw	%dx
+# CHECK: shrw	%dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd1,0xea]
+         shrw	%dx, %dx
+# CHECK: {nf}	shrw	%dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd1,0xea]
+         {nf}	shrw	%dx, %dx
+# CHECK: {evex}	shrl	%ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd1,0xe9]
+         {evex}	shrl	%ecx
+# CHECK: {nf}	shrl	%ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd1,0xe9]
+         {nf}	shrl	%ecx
+# CHECK: shrl	%ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd1,0xe9]
+         shrl	%ecx, %ecx
+# CHECK: {nf}	shrl	%ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd1,0xe9]
+         {nf}	shrl	%ecx, %ecx
+# CHECK: {evex}	shrq	%r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xe9]
+         {evex}	shrq	%r9
+# CHECK: {nf}	shrq	%r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0xe9]
+         {nf}	shrq	%r9
+# CHECK: shrq	%r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xe9]
+         shrq	%r9, %r9
+# CHECK: {nf}	shrq	%r9, %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0xe9]
+         {nf}	shrq	%r9, %r9
+# CHECK: {evex}	shrb	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd0,0xac,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shrb	291(%r8,%rax,4)
+# CHECK: {nf}	shrb	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd0,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrb	291(%r8,%rax,4)
+# CHECK: shrb	291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd0,0xac,0x80,0x23,0x01,0x00,0x00]
+         shrb	291(%r8,%rax,4), %bl
+# CHECK: {nf}	shrb	291(%r8,%rax,4), %bl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd0,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrb	291(%r8,%rax,4), %bl
+# CHECK: {evex}	shrw	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shrw	291(%r8,%rax,4)
+# CHECK: {nf}	shrw	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrw	291(%r8,%rax,4)
+# CHECK: shrw	291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         shrw	291(%r8,%rax,4), %dx
+# CHECK: {nf}	shrw	291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrw	291(%r8,%rax,4), %dx
+# CHECK: {evex}	shrl	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shrl	291(%r8,%rax,4)
+# CHECK: {nf}	shrl	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrl	291(%r8,%rax,4)
+# CHECK: shrl	291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         shrl	291(%r8,%rax,4), %ecx
+# CHECK: {nf}	shrl	291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrl	291(%r8,%rax,4), %ecx
+# CHECK: {evex}	shrq	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shrq	291(%r8,%rax,4)
+# CHECK: {nf}	shrq	291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrq	291(%r8,%rax,4)
+# CHECK: shrq	291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         shrq	291(%r8,%rax,4), %r9
+# CHECK: {nf}	shrq	291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrq	291(%r8,%rax,4), %r9
diff --git a/llvm/test/MC/X86/apx/shr-intel.s b/llvm/test/MC/X86/apx/shr-intel.s
new file mode 100644
index 00000000000000..2a44177a438100
--- /dev/null
+++ b/llvm/test/MC/X86/apx/shr-intel.s
@@ -0,0 +1,284 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding -x86-asm-syntax=intel -output-asm-variant=1 %s | FileCheck %s
+
+# CHECK: {evex}	shr	bl, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc0,0xeb,0x7b]
+         {evex}	shr	bl, 123
+# CHECK: {nf}	shr	bl, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc0,0xeb,0x7b]
+         {nf}	shr	bl, 123
+# CHECK: shr	bl, bl, 123
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xc0,0xeb,0x7b]
+         shr	bl, bl, 123
+# CHECK: {nf}	shr	bl, bl, 123
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xc0,0xeb,0x7b]
+         {nf}	shr	bl, bl, 123
+# CHECK: {evex}	shr	dx, 123
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xc1,0xea,0x7b]
+         {evex}	shr	dx, 123
+# CHECK: {nf}	shr	dx, 123
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xc1,0xea,0x7b]
+         {nf}	shr	dx, 123
+# CHECK: shr	dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xc1,0xea,0x7b]
+         shr	dx, dx, 123
+# CHECK: {nf}	shr	dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xc1,0xea,0x7b]
+         {nf}	shr	dx, dx, 123
+# CHECK: {evex}	shr	ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xc1,0xe9,0x7b]
+         {evex}	shr	ecx, 123
+# CHECK: {nf}	shr	ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xc1,0xe9,0x7b]
+         {nf}	shr	ecx, 123
+# CHECK: shr	ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xc1,0xe9,0x7b]
+         shr	ecx, ecx, 123
+# CHECK: {nf}	shr	ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xc1,0xe9,0x7b]
+         {nf}	shr	ecx, ecx, 123
+# CHECK: {evex}	shr	r9, 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xe9,0x7b]
+         {evex}	shr	r9, 123
+# CHECK: {nf}	shr	r9, 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0xe9,0x7b]
+         {nf}	shr	r9, 123
+# CHECK: shr	r9, r9, 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xe9,0x7b]
+         shr	r9, r9, 123
+# CHECK: {nf}	shr	r9, r9, 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0xe9,0x7b]
+         {nf}	shr	r9, r9, 123
+# CHECK: {evex}	shr	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc0,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shr	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	shr	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc0,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shr	byte ptr [r8 + 4*rax + 291], 123
+# CHECK: shr	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xc0,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shr	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	shr	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xc0,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shr	bl, byte ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	shr	word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shr	word ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	shr	word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shr	word ptr [r8 + 4*rax + 291], 123
+# CHECK: shr	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shr	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	shr	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shr	dx, word ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	shr	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shr	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	shr	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shr	dword ptr [r8 + 4*rax + 291], 123
+# CHECK: shr	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shr	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	shr	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shr	ecx, dword ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	shr	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shr	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	shr	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shr	qword ptr [r8 + 4*rax + 291], 123
+# CHECK: shr	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shr	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: {nf}	shr	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xc1,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shr	r9, qword ptr [r8 + 4*rax + 291], 123
+# CHECK: {evex}	shr	bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd0,0xeb]
+         {evex}	shr	bl
+# CHECK: {nf}	shr	bl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd0,0xeb]
+         {nf}	shr	bl
+# CHECK: {evex}	shr	bl, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd2,0xeb]
+         {evex}	shr	bl, cl
+# CHECK: {nf}	shr	bl, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd2,0xeb]
+         {nf}	shr	bl, cl
+# CHECK: shr	bl, bl, cl
+# CHECK: encoding: [0x62,0xf4,0x64,0x18,0xd2,0xeb]
+         shr	bl, bl, cl
+# CHECK: {nf}	shr	bl, bl, cl
+# CHECK: encoding: [0x62,0xf4,0x64,0x1c,0xd2,0xeb]
+         {nf}	shr	bl, bl, cl
+# CHECK: {evex}	shr	dx, cl
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd3,0xea]
+         {evex}	shr	dx, cl
+# CHECK: {nf}	shr	dx, cl
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd3,0xea]
+         {nf}	shr	dx, cl
+# CHECK: shr	dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd3,0xea]
+         shr	dx, dx, cl
+# CHECK: {nf}	shr	dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd3,0xea]
+         {nf}	shr	dx, dx, cl
+# CHECK: {evex}	shr	ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd3,0xe9]
+         {evex}	shr	ecx, cl
+# CHECK: {nf}	shr	ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd3,0xe9]
+         {nf}	shr	ecx, cl
+# CHECK: shr	ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd3,0xe9]
+         shr	ecx, ecx, cl
+# CHECK: {nf}	shr	ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd3,0xe9]
+         {nf}	shr	ecx, ecx, cl
+# CHECK: {evex}	shr	r9, cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xe9]
+         {evex}	shr	r9, cl
+# CHECK: {nf}	shr	r9, cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0xe9]
+         {nf}	shr	r9, cl
+# CHECK: shr	r9, r9, cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xe9]
+         shr	r9, r9, cl
+# CHECK: {nf}	shr	r9, r9, cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0xe9]
+         {nf}	shr	r9, r9, cl
+# CHECK: {evex}	shr	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd2,0xac,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shr	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	shr	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd2,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shr	byte ptr [r8 + 4*rax + 291], cl
+# CHECK: shr	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd2,0xac,0x80,0x23,0x01,0x00,0x00]
+         shr	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	shr	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd2,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shr	bl, byte ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	shr	word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shr	word ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	shr	word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shr	word ptr [r8 + 4*rax + 291], cl
+# CHECK: shr	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         shr	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	shr	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shr	dx, word ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	shr	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shr	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	shr	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shr	dword ptr [r8 + 4*rax + 291], cl
+# CHECK: shr	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         shr	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	shr	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shr	ecx, dword ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	shr	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shr	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	shr	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shr	qword ptr [r8 + 4*rax + 291], cl
+# CHECK: shr	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         shr	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: {nf}	shr	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd3,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shr	r9, qword ptr [r8 + 4*rax + 291], cl
+# CHECK: {evex}	shr	dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xd1,0xea]
+         {evex}	shr	dx
+# CHECK: {nf}	shr	dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xd1,0xea]
+         {nf}	shr	dx
+# CHECK: shr	dx, dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xd1,0xea]
+         shr	dx, dx
+# CHECK: {nf}	shr	dx, dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xd1,0xea]
+         {nf}	shr	dx, dx
+# CHECK: {evex}	shr	ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xd1,0xe9]
+         {evex}	shr	ecx
+# CHECK: {nf}	shr	ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xd1,0xe9]
+         {nf}	shr	ecx
+# CHECK: shr	ecx, ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xd1,0xe9]
+         shr	ecx, ecx
+# CHECK: {nf}	shr	ecx, ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xd1,0xe9]
+         {nf}	shr	ecx, ecx
+# CHECK: {evex}	shr	r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xe9]
+         {evex}	shr	r9
+# CHECK: {nf}	shr	r9
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0xe9]
+         {nf}	shr	r9
+# CHECK: shr	r9, r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xe9]
+         shr	r9, r9
+# CHECK: {nf}	shr	r9, r9
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0xe9]
+         {nf}	shr	r9, r9
+# CHECK: {evex}	shr	byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd0,0xac,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shr	byte ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	shr	byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd0,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shr	byte ptr [r8 + 4*rax + 291]
+# CHECK: shr	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x64,0x18,0xd0,0xac,0x80,0x23,0x01,0x00,0x00]
+         shr	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	shr	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x64,0x1c,0xd0,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shr	bl, byte ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	shr	word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shr	word ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	shr	word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shr	word ptr [r8 + 4*rax + 291]
+# CHECK: shr	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         shr	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	shr	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shr	dx, word ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	shr	dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shr	dword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	shr	dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shr	dword ptr [r8 + 4*rax + 291]
+# CHECK: shr	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         shr	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	shr	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shr	ecx, dword ptr [r8 + 4*rax + 291]
+# CHECK: {evex}	shr	qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xfc,0x08,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shr	qword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	shr	qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xfc,0x0c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shr	qword ptr [r8 + 4*rax + 291]
+# CHECK: shr	r9, qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xb4,0x18,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         shr	r9, qword ptr [r8 + 4*rax + 291]
+# CHECK: {nf}	shr	r9, qword ptr [r8 + 4*rax + 291]
+# CHECK: encoding: [0x62,0xd4,0xb4,0x1c,0xd1,0xac,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shr	r9, qword ptr [r8 + 4*rax + 291]
diff --git a/llvm/test/MC/X86/apx/shrd-att.s b/llvm/test/MC/X86/apx/shrd-att.s
new file mode 100644
index 00000000000000..b3684a9299ab80
--- /dev/null
+++ b/llvm/test/MC/X86/apx/shrd-att.s
@@ -0,0 +1,149 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s
+# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+# ERROR-COUNT-48: error:
+# ERROR-NOT: error:
+# CHECK: {evex}	shrdw	$123, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0x2c,0xd2,0x7b]
+         {evex}	shrdw	$123, %dx, %dx
+# CHECK: {nf}	shrdw	$123, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0x2c,0xd2,0x7b]
+         {nf}	shrdw	$123, %dx, %dx
+# CHECK: shrdw	$123, %dx, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0x2c,0xd2,0x7b]
+         shrdw	$123, %dx, %dx, %dx
+# CHECK: {nf}	shrdw	$123, %dx, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0x2c,0xd2,0x7b]
+         {nf}	shrdw	$123, %dx, %dx, %dx
+# CHECK: {evex}	shrdw	$123, %dx, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0x2c,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shrdw	$123, %dx, 291(%r8,%rax,4)
+# CHECK: {nf}	shrdw	$123, %dx, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0x2c,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrdw	$123, %dx, 291(%r8,%rax,4)
+# CHECK: shrdw	$123, %dx, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0x2c,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shrdw	$123, %dx, 291(%r8,%rax,4), %dx
+# CHECK: {nf}	shrdw	$123, %dx, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0x2c,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrdw	$123, %dx, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	shrdl	$123, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0x2c,0xc9,0x7b]
+         {evex}	shrdl	$123, %ecx, %ecx
+# CHECK: {nf}	shrdl	$123, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0x2c,0xc9,0x7b]
+         {nf}	shrdl	$123, %ecx, %ecx
+# CHECK: shrdl	$123, %ecx, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0x2c,0xc9,0x7b]
+         shrdl	$123, %ecx, %ecx, %ecx
+# CHECK: {nf}	shrdl	$123, %ecx, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0x2c,0xc9,0x7b]
+         {nf}	shrdl	$123, %ecx, %ecx, %ecx
+# CHECK: {evex}	shrdl	$123, %ecx, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shrdl	$123, %ecx, 291(%r8,%rax,4)
+# CHECK: {nf}	shrdl	$123, %ecx, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrdl	$123, %ecx, 291(%r8,%rax,4)
+# CHECK: shrdl	$123, %ecx, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shrdl	$123, %ecx, 291(%r8,%rax,4), %ecx
+# CHECK: {nf}	shrdl	$123, %ecx, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrdl	$123, %ecx, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	shrdq	$123, %r9, %r9
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0x2c,0xc9,0x7b]
+         {evex}	shrdq	$123, %r9, %r9
+# CHECK: {nf}	shrdq	$123, %r9, %r9
+# CHECK: encoding: [0x62,0x54,0xfc,0x0c,0x2c,0xc9,0x7b]
+         {nf}	shrdq	$123, %r9, %r9
+# CHECK: shrdq	$123, %r9, %r9, %r9
+# CHECK: encoding: [0x62,0x54,0xb4,0x18,0x2c,0xc9,0x7b]
+         shrdq	$123, %r9, %r9, %r9
+# CHECK: {nf}	shrdq	$123, %r9, %r9, %r9
+# CHECK: encoding: [0x62,0x54,0xb4,0x1c,0x2c,0xc9,0x7b]
+         {nf}	shrdq	$123, %r9, %r9, %r9
+# CHECK: {evex}	shrdq	$123, %r9, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shrdq	$123, %r9, 291(%r8,%rax,4)
+# CHECK: {nf}	shrdq	$123, %r9, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0x54,0xfc,0x0c,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrdq	$123, %r9, 291(%r8,%rax,4)
+# CHECK: shrdq	$123, %r9, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0x54,0xb4,0x18,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shrdq	$123, %r9, 291(%r8,%rax,4), %r9
+# CHECK: {nf}	shrdq	$123, %r9, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0x54,0xb4,0x1c,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrdq	$123, %r9, 291(%r8,%rax,4), %r9
+# CHECK: {evex}	shrdw	%cl, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xad,0xd2]
+         {evex}	shrdw	%cl, %dx, %dx
+# CHECK: {nf}	shrdw	%cl, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xad,0xd2]
+         {nf}	shrdw	%cl, %dx, %dx
+# CHECK: shrdw	%cl, %dx, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xad,0xd2]
+         shrdw	%cl, %dx, %dx, %dx
+# CHECK: {nf}	shrdw	%cl, %dx, %dx, %dx
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xad,0xd2]
+         {nf}	shrdw	%cl, %dx, %dx, %dx
+# CHECK: {evex}	shrdw	%cl, %dx, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xad,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shrdw	%cl, %dx, 291(%r8,%rax,4)
+# CHECK: {nf}	shrdw	%cl, %dx, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xad,0x94,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrdw	%cl, %dx, 291(%r8,%rax,4)
+# CHECK: shrdw	%cl, %dx, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xad,0x94,0x80,0x23,0x01,0x00,0x00]
+         shrdw	%cl, %dx, 291(%r8,%rax,4), %dx
+# CHECK: {nf}	shrdw	%cl, %dx, 291(%r8,%rax,4), %dx
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xad,0x94,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrdw	%cl, %dx, 291(%r8,%rax,4), %dx
+# CHECK: {evex}	shrdl	%cl, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xad,0xc9]
+         {evex}	shrdl	%cl, %ecx, %ecx
+# CHECK: {nf}	shrdl	%cl, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xad,0xc9]
+         {nf}	shrdl	%cl, %ecx, %ecx
+# CHECK: shrdl	%cl, %ecx, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xad,0xc9]
+         shrdl	%cl, %ecx, %ecx, %ecx
+# CHECK: {nf}	shrdl	%cl, %ecx, %ecx, %ecx
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xad,0xc9]
+         {nf}	shrdl	%cl, %ecx, %ecx, %ecx
+# CHECK: {evex}	shrdl	%cl, %ecx, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xad,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shrdl	%cl, %ecx, 291(%r8,%rax,4)
+# CHECK: {nf}	shrdl	%cl, %ecx, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xad,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrdl	%cl, %ecx, 291(%r8,%rax,4)
+# CHECK: shrdl	%cl, %ecx, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xad,0x8c,0x80,0x23,0x01,0x00,0x00]
+         shrdl	%cl, %ecx, 291(%r8,%rax,4), %ecx
+# CHECK: {nf}	shrdl	%cl, %ecx, 291(%r8,%rax,4), %ecx
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xad,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrdl	%cl, %ecx, 291(%r8,%rax,4), %ecx
+# CHECK: {evex}	shrdq	%cl, %r9, %r9
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0xad,0xc9]
+         {evex}	shrdq	%cl, %r9, %r9
+# CHECK: {nf}	shrdq	%cl, %r9, %r9
+# CHECK: encoding: [0x62,0x54,0xfc,0x0c,0xad,0xc9]
+         {nf}	shrdq	%cl, %r9, %r9
+# CHECK: shrdq	%cl, %r9, %r9, %r9
+# CHECK: encoding: [0x62,0x54,0xb4,0x18,0xad,0xc9]
+         shrdq	%cl, %r9, %r9, %r9
+# CHECK: {nf}	shrdq	%cl, %r9, %r9, %r9
+# CHECK: encoding: [0x62,0x54,0xb4,0x1c,0xad,0xc9]
+         {nf}	shrdq	%cl, %r9, %r9, %r9
+# CHECK: {evex}	shrdq	%cl, %r9, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0xad,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shrdq	%cl, %r9, 291(%r8,%rax,4)
+# CHECK: {nf}	shrdq	%cl, %r9, 291(%r8,%rax,4)
+# CHECK: encoding: [0x62,0x54,0xfc,0x0c,0xad,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrdq	%cl, %r9, 291(%r8,%rax,4)
+# CHECK: shrdq	%cl, %r9, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0x54,0xb4,0x18,0xad,0x8c,0x80,0x23,0x01,0x00,0x00]
+         shrdq	%cl, %r9, 291(%r8,%rax,4), %r9
+# CHECK: {nf}	shrdq	%cl, %r9, 291(%r8,%rax,4), %r9
+# CHECK: encoding: [0x62,0x54,0xb4,0x1c,0xad,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrdq	%cl, %r9, 291(%r8,%rax,4), %r9
diff --git a/llvm/test/MC/X86/apx/shrd-intel.s b/llvm/test/MC/X86/apx/shrd-intel.s
new file mode 100644
index 00000000000000..d0f14eb78ee895
--- /dev/null
+++ b/llvm/test/MC/X86/apx/shrd-intel.s
@@ -0,0 +1,146 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding -x86-asm-syntax=intel -output-asm-variant=1 %s | FileCheck %s
+
+# CHECK: {evex}	shrd	dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0x2c,0xd2,0x7b]
+         {evex}	shrd	dx, dx, 123
+# CHECK: {nf}	shrd	dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0x2c,0xd2,0x7b]
+         {nf}	shrd	dx, dx, 123
+# CHECK: shrd	dx, dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0x2c,0xd2,0x7b]
+         shrd	dx, dx, dx, 123
+# CHECK: {nf}	shrd	dx, dx, dx, 123
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0x2c,0xd2,0x7b]
+         {nf}	shrd	dx, dx, dx, 123
+# CHECK: {evex}	shrd	word ptr [r8 + 4*rax + 291], dx, 123
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0x2c,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shrd	word ptr [r8 + 4*rax + 291], dx, 123
+# CHECK: {nf}	shrd	word ptr [r8 + 4*rax + 291], dx, 123
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0x2c,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrd	word ptr [r8 + 4*rax + 291], dx, 123
+# CHECK: shrd	dx, word ptr [r8 + 4*rax + 291], dx, 123
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0x2c,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shrd	dx, word ptr [r8 + 4*rax + 291], dx, 123
+# CHECK: {nf}	shrd	dx, word ptr [r8 + 4*rax + 291], dx, 123
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0x2c,0x94,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrd	dx, word ptr [r8 + 4*rax + 291], dx, 123
+# CHECK: {evex}	shrd	ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0x2c,0xc9,0x7b]
+         {evex}	shrd	ecx, ecx, 123
+# CHECK: {nf}	shrd	ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0x2c,0xc9,0x7b]
+         {nf}	shrd	ecx, ecx, 123
+# CHECK: shrd	ecx, ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0x2c,0xc9,0x7b]
+         shrd	ecx, ecx, ecx, 123
+# CHECK: {nf}	shrd	ecx, ecx, ecx, 123
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0x2c,0xc9,0x7b]
+         {nf}	shrd	ecx, ecx, ecx, 123
+# CHECK: {evex}	shrd	dword ptr [r8 + 4*rax + 291], ecx, 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shrd	dword ptr [r8 + 4*rax + 291], ecx, 123
+# CHECK: {nf}	shrd	dword ptr [r8 + 4*rax + 291], ecx, 123
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrd	dword ptr [r8 + 4*rax + 291], ecx, 123
+# CHECK: shrd	ecx, dword ptr [r8 + 4*rax + 291], ecx, 123
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shrd	ecx, dword ptr [r8 + 4*rax + 291], ecx, 123
+# CHECK: {nf}	shrd	ecx, dword ptr [r8 + 4*rax + 291], ecx, 123
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrd	ecx, dword ptr [r8 + 4*rax + 291], ecx, 123
+# CHECK: {evex}	shrd	r9, r9, 123
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0x2c,0xc9,0x7b]
+         {evex}	shrd	r9, r9, 123
+# CHECK: {nf}	shrd	r9, r9, 123
+# CHECK: encoding: [0x62,0x54,0xfc,0x0c,0x2c,0xc9,0x7b]
+         {nf}	shrd	r9, r9, 123
+# CHECK: shrd	r9, r9, r9, 123
+# CHECK: encoding: [0x62,0x54,0xb4,0x18,0x2c,0xc9,0x7b]
+         shrd	r9, r9, r9, 123
+# CHECK: {nf}	shrd	r9, r9, r9, 123
+# CHECK: encoding: [0x62,0x54,0xb4,0x1c,0x2c,0xc9,0x7b]
+         {nf}	shrd	r9, r9, r9, 123
+# CHECK: {evex}	shrd	qword ptr [r8 + 4*rax + 291], r9, 123
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {evex}	shrd	qword ptr [r8 + 4*rax + 291], r9, 123
+# CHECK: {nf}	shrd	qword ptr [r8 + 4*rax + 291], r9, 123
+# CHECK: encoding: [0x62,0x54,0xfc,0x0c,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrd	qword ptr [r8 + 4*rax + 291], r9, 123
+# CHECK: shrd	r9, qword ptr [r8 + 4*rax + 291], r9, 123
+# CHECK: encoding: [0x62,0x54,0xb4,0x18,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         shrd	r9, qword ptr [r8 + 4*rax + 291], r9, 123
+# CHECK: {nf}	shrd	r9, qword ptr [r8 + 4*rax + 291], r9, 123
+# CHECK: encoding: [0x62,0x54,0xb4,0x1c,0x2c,0x8c,0x80,0x23,0x01,0x00,0x00,0x7b]
+         {nf}	shrd	r9, qword ptr [r8 + 4*rax + 291], r9, 123
+# CHECK: {evex}	shrd	dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x7d,0x08,0xad,0xd2]
+         {evex}	shrd	dx, dx, cl
+# CHECK: {nf}	shrd	dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x7d,0x0c,0xad,0xd2]
+         {nf}	shrd	dx, dx, cl
+# CHECK: shrd	dx, dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x6d,0x18,0xad,0xd2]
+         shrd	dx, dx, dx, cl
+# CHECK: {nf}	shrd	dx, dx, dx, cl
+# CHECK: encoding: [0x62,0xf4,0x6d,0x1c,0xad,0xd2]
+         {nf}	shrd	dx, dx, dx, cl
+# CHECK: {evex}	shrd	word ptr [r8 + 4*rax + 291], dx, cl
+# CHECK: encoding: [0x62,0xd4,0x7d,0x08,0xad,0x94,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shrd	word ptr [r8 + 4*rax + 291], dx, cl
+# CHECK: {nf}	shrd	word ptr [r8 + 4*rax + 291], dx, cl
+# CHECK: encoding: [0x62,0xd4,0x7d,0x0c,0xad,0x94,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrd	word ptr [r8 + 4*rax + 291], dx, cl
+# CHECK: shrd	dx, word ptr [r8 + 4*rax + 291], dx, cl
+# CHECK: encoding: [0x62,0xd4,0x6d,0x18,0xad,0x94,0x80,0x23,0x01,0x00,0x00]
+         shrd	dx, word ptr [r8 + 4*rax + 291], dx, cl
+# CHECK: {nf}	shrd	dx, word ptr [r8 + 4*rax + 291], dx, cl
+# CHECK: encoding: [0x62,0xd4,0x6d,0x1c,0xad,0x94,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrd	dx, word ptr [r8 + 4*rax + 291], dx, cl
+# CHECK: {evex}	shrd	ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x08,0xad,0xc9]
+         {evex}	shrd	ecx, ecx, cl
+# CHECK: {nf}	shrd	ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x7c,0x0c,0xad,0xc9]
+         {nf}	shrd	ecx, ecx, cl
+# CHECK: shrd	ecx, ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x74,0x18,0xad,0xc9]
+         shrd	ecx, ecx, ecx, cl
+# CHECK: {nf}	shrd	ecx, ecx, ecx, cl
+# CHECK: encoding: [0x62,0xf4,0x74,0x1c,0xad,0xc9]
+         {nf}	shrd	ecx, ecx, ecx, cl
+# CHECK: {evex}	shrd	dword ptr [r8 + 4*rax + 291], ecx, cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x08,0xad,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shrd	dword ptr [r8 + 4*rax + 291], ecx, cl
+# CHECK: {nf}	shrd	dword ptr [r8 + 4*rax + 291], ecx, cl
+# CHECK: encoding: [0x62,0xd4,0x7c,0x0c,0xad,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrd	dword ptr [r8 + 4*rax + 291], ecx, cl
+# CHECK: shrd	ecx, dword ptr [r8 + 4*rax + 291], ecx, cl
+# CHECK: encoding: [0x62,0xd4,0x74,0x18,0xad,0x8c,0x80,0x23,0x01,0x00,0x00]
+         shrd	ecx, dword ptr [r8 + 4*rax + 291], ecx, cl
+# CHECK: {nf}	shrd	ecx, dword ptr [r8 + 4*rax + 291], ecx, cl
+# CHECK: encoding: [0x62,0xd4,0x74,0x1c,0xad,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrd	ecx, dword ptr [r8 + 4*rax + 291], ecx, cl
+# CHECK: {evex}	shrd	r9, r9, cl
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0xad,0xc9]
+         {evex}	shrd	r9, r9, cl
+# CHECK: {nf}	shrd	r9, r9, cl
+# CHECK: encoding: [0x62,0x54,0xfc,0x0c,0xad,0xc9]
+         {nf}	shrd	r9, r9, cl
+# CHECK: shrd	r9, r9, r9, cl
+# CHECK: encoding: [0x62,0x54,0xb4,0x18,0xad,0xc9]
+         shrd	r9, r9, r9, cl
+# CHECK: {nf}	shrd	r9, r9, r9, cl
+# CHECK: encoding: [0x62,0x54,0xb4,0x1c,0xad,0xc9]
+         {nf}	shrd	r9, r9, r9, cl
+# CHECK: {evex}	shrd	qword ptr [r8 + 4*rax + 291], r9, cl
+# CHECK: encoding: [0x62,0x54,0xfc,0x08,0xad,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {evex}	shrd	qword ptr [r8 + 4*rax + 291], r9, cl
+# CHECK: {nf}	shrd	qword ptr [r8 + 4*rax + 291], r9, cl
+# CHECK: encoding: [0x62,0x54,0xfc,0x0c,0xad,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrd	qword ptr [r8 + 4*rax + 291], r9, cl
+# CHECK: shrd	r9, qword ptr [r8 + 4*rax + 291], r9, cl
+# CHECK: encoding: [0x62,0x54,0xb4,0x18,0xad,0x8c,0x80,0x23,0x01,0x00,0x00]
+         shrd	r9, qword ptr [r8 + 4*rax + 291], r9, cl
+# CHECK: {nf}	shrd	r9, qword ptr [r8 + 4*rax + 291], r9, cl
+# CHECK: encoding: [0x62,0x54,0xb4,0x1c,0xad,0x8c,0x80,0x23,0x01,0x00,0x00]
+         {nf}	shrd	r9, qword ptr [r8 + 4*rax + 291], r9, cl
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 02e4ae52cc9146..9b12e4af00bf74 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -150,41 +150,77 @@ static const X86FoldTableEntry Table2Addr[] = {
   {X86::RCR8rCL, X86::RCR8mCL, TB_NO_REVERSE},
   {X86::RCR8ri, X86::RCR8mi, TB_NO_REVERSE},
   {X86::ROL16r1, X86::ROL16m1, TB_NO_REVERSE},
+  {X86::ROL16r1_NF, X86::ROL16m1_NF, TB_NO_REVERSE},
   {X86::ROL16rCL, X86::ROL16mCL, TB_NO_REVERSE},
+  {X86::ROL16rCL_NF, X86::ROL16mCL_NF, TB_NO_REVERSE},
   {X86::ROL16ri, X86::ROL16mi, TB_NO_REVERSE},
+  {X86::ROL16ri_NF, X86::ROL16mi_NF, TB_NO_REVERSE},
   {X86::ROL32r1, X86::ROL32m1, TB_NO_REVERSE},
+  {X86::ROL32r1_NF, X86::ROL32m1_NF, TB_NO_REVERSE},
   {X86::ROL32rCL, X86::ROL32mCL, TB_NO_REVERSE},
+  {X86::ROL32rCL_NF, X86::ROL32mCL_NF, TB_NO_REVERSE},
   {X86::ROL32ri, X86::ROL32mi, TB_NO_REVERSE},
+  {X86::ROL32ri_NF, X86::ROL32mi_NF, TB_NO_REVERSE},
   {X86::ROL64r1, X86::ROL64m1, TB_NO_REVERSE},
+  {X86::ROL64r1_NF, X86::ROL64m1_NF, TB_NO_REVERSE},
   {X86::ROL64rCL, X86::ROL64mCL, TB_NO_REVERSE},
+  {X86::ROL64rCL_NF, X86::ROL64mCL_NF, TB_NO_REVERSE},
   {X86::ROL64ri, X86::ROL64mi, TB_NO_REVERSE},
+  {X86::ROL64ri_NF, X86::ROL64mi_NF, TB_NO_REVERSE},
   {X86::ROL8r1, X86::ROL8m1, TB_NO_REVERSE},
+  {X86::ROL8r1_NF, X86::ROL8m1_NF, TB_NO_REVERSE},
   {X86::ROL8rCL, X86::ROL8mCL, TB_NO_REVERSE},
+  {X86::ROL8rCL_NF, X86::ROL8mCL_NF, TB_NO_REVERSE},
   {X86::ROL8ri, X86::ROL8mi, TB_NO_REVERSE},
+  {X86::ROL8ri_NF, X86::ROL8mi_NF, TB_NO_REVERSE},
   {X86::ROR16r1, X86::ROR16m1, TB_NO_REVERSE},
+  {X86::ROR16r1_NF, X86::ROR16m1_NF, TB_NO_REVERSE},
   {X86::ROR16rCL, X86::ROR16mCL, TB_NO_REVERSE},
+  {X86::ROR16rCL_NF, X86::ROR16mCL_NF, TB_NO_REVERSE},
   {X86::ROR16ri, X86::ROR16mi, TB_NO_REVERSE},
+  {X86::ROR16ri_NF, X86::ROR16mi_NF, TB_NO_REVERSE},
   {X86::ROR32r1, X86::ROR32m1, TB_NO_REVERSE},
+  {X86::ROR32r1_NF, X86::ROR32m1_NF, TB_NO_REVERSE},
   {X86::ROR32rCL, X86::ROR32mCL, TB_NO_REVERSE},
+  {X86::ROR32rCL_NF, X86::ROR32mCL_NF, TB_NO_REVERSE},
   {X86::ROR32ri, X86::ROR32mi, TB_NO_REVERSE},
+  {X86::ROR32ri_NF, X86::ROR32mi_NF, TB_NO_REVERSE},
   {X86::ROR64r1, X86::ROR64m1, TB_NO_REVERSE},
+  {X86::ROR64r1_NF, X86::ROR64m1_NF, TB_NO_REVERSE},
   {X86::ROR64rCL, X86::ROR64mCL, TB_NO_REVERSE},
+  {X86::ROR64rCL_NF, X86::ROR64mCL_NF, TB_NO_REVERSE},
   {X86::ROR64ri, X86::ROR64mi, TB_NO_REVERSE},
+  {X86::ROR64ri_NF, X86::ROR64mi_NF, TB_NO_REVERSE},
   {X86::ROR8r1, X86::ROR8m1, TB_NO_REVERSE},
+  {X86::ROR8r1_NF, X86::ROR8m1_NF, TB_NO_REVERSE},
   {X86::ROR8rCL, X86::ROR8mCL, TB_NO_REVERSE},
+  {X86::ROR8rCL_NF, X86::ROR8mCL_NF, TB_NO_REVERSE},
   {X86::ROR8ri, X86::ROR8mi, TB_NO_REVERSE},
+  {X86::ROR8ri_NF, X86::ROR8mi_NF, TB_NO_REVERSE},
   {X86::SAR16r1, X86::SAR16m1, TB_NO_REVERSE},
+  {X86::SAR16r1_NF, X86::SAR16m1_NF, TB_NO_REVERSE},
   {X86::SAR16rCL, X86::SAR16mCL, TB_NO_REVERSE},
+  {X86::SAR16rCL_NF, X86::SAR16mCL_NF, TB_NO_REVERSE},
   {X86::SAR16ri, X86::SAR16mi, TB_NO_REVERSE},
+  {X86::SAR16ri_NF, X86::SAR16mi_NF, TB_NO_REVERSE},
   {X86::SAR32r1, X86::SAR32m1, TB_NO_REVERSE},
+  {X86::SAR32r1_NF, X86::SAR32m1_NF, TB_NO_REVERSE},
   {X86::SAR32rCL, X86::SAR32mCL, TB_NO_REVERSE},
+  {X86::SAR32rCL_NF, X86::SAR32mCL_NF, TB_NO_REVERSE},
   {X86::SAR32ri, X86::SAR32mi, TB_NO_REVERSE},
+  {X86::SAR32ri_NF, X86::SAR32mi_NF, TB_NO_REVERSE},
   {X86::SAR64r1, X86::SAR64m1, TB_NO_REVERSE},
+  {X86::SAR64r1_NF, X86::SAR64m1_NF, TB_NO_REVERSE},
   {X86::SAR64rCL, X86::SAR64mCL, TB_NO_REVERSE},
+  {X86::SAR64rCL_NF, X86::SAR64mCL_NF, TB_NO_REVERSE},
   {X86::SAR64ri, X86::SAR64mi, TB_NO_REVERSE},
+  {X86::SAR64ri_NF, X86::SAR64mi_NF, TB_NO_REVERSE},
   {X86::SAR8r1, X86::SAR8m1, TB_NO_REVERSE},
+  {X86::SAR8r1_NF, X86::SAR8m1_NF, TB_NO_REVERSE},
   {X86::SAR8rCL, X86::SAR8mCL, TB_NO_REVERSE},
+  {X86::SAR8rCL_NF, X86::SAR8mCL_NF, TB_NO_REVERSE},
   {X86::SAR8ri, X86::SAR8mi, TB_NO_REVERSE},
+  {X86::SAR8ri_NF, X86::SAR8mi_NF, TB_NO_REVERSE},
   {X86::SBB16ri, X86::SBB16mi, TB_NO_REVERSE},
   {X86::SBB16ri8, X86::SBB16mi8, TB_NO_REVERSE},
   {X86::SBB16rr, X86::SBB16mr, TB_NO_REVERSE},
@@ -198,41 +234,77 @@ static const X86FoldTableEntry Table2Addr[] = {
   {X86::SBB8ri8, X86::SBB8mi8, TB_NO_REVERSE},
   {X86::SBB8rr, X86::SBB8mr, TB_NO_REVERSE},
   {X86::SHL16r1, X86::SHL16m1, TB_NO_REVERSE},
+  {X86::SHL16r1_NF, X86::SHL16m1_NF, TB_NO_REVERSE},
   {X86::SHL16rCL, X86::SHL16mCL, TB_NO_REVERSE},
+  {X86::SHL16rCL_NF, X86::SHL16mCL_NF, TB_NO_REVERSE},
   {X86::SHL16ri, X86::SHL16mi, TB_NO_REVERSE},
+  {X86::SHL16ri_NF, X86::SHL16mi_NF, TB_NO_REVERSE},
   {X86::SHL32r1, X86::SHL32m1, TB_NO_REVERSE},
+  {X86::SHL32r1_NF, X86::SHL32m1_NF, TB_NO_REVERSE},
   {X86::SHL32rCL, X86::SHL32mCL, TB_NO_REVERSE},
+  {X86::SHL32rCL_NF, X86::SHL32mCL_NF, TB_NO_REVERSE},
   {X86::SHL32ri, X86::SHL32mi, TB_NO_REVERSE},
+  {X86::SHL32ri_NF, X86::SHL32mi_NF, TB_NO_REVERSE},
   {X86::SHL64r1, X86::SHL64m1, TB_NO_REVERSE},
+  {X86::SHL64r1_NF, X86::SHL64m1_NF, TB_NO_REVERSE},
   {X86::SHL64rCL, X86::SHL64mCL, TB_NO_REVERSE},
+  {X86::SHL64rCL_NF, X86::SHL64mCL_NF, TB_NO_REVERSE},
   {X86::SHL64ri, X86::SHL64mi, TB_NO_REVERSE},
+  {X86::SHL64ri_NF, X86::SHL64mi_NF, TB_NO_REVERSE},
   {X86::SHL8r1, X86::SHL8m1, TB_NO_REVERSE},
+  {X86::SHL8r1_NF, X86::SHL8m1_NF, TB_NO_REVERSE},
   {X86::SHL8rCL, X86::SHL8mCL, TB_NO_REVERSE},
+  {X86::SHL8rCL_NF, X86::SHL8mCL_NF, TB_NO_REVERSE},
   {X86::SHL8ri, X86::SHL8mi, TB_NO_REVERSE},
+  {X86::SHL8ri_NF, X86::SHL8mi_NF, TB_NO_REVERSE},
   {X86::SHLD16rrCL, X86::SHLD16mrCL, TB_NO_REVERSE},
+  {X86::SHLD16rrCL_NF, X86::SHLD16mrCL_NF, TB_NO_REVERSE},
   {X86::SHLD16rri8, X86::SHLD16mri8, TB_NO_REVERSE},
+  {X86::SHLD16rri8_NF, X86::SHLD16mri8_NF, TB_NO_REVERSE},
   {X86::SHLD32rrCL, X86::SHLD32mrCL, TB_NO_REVERSE},
+  {X86::SHLD32rrCL_NF, X86::SHLD32mrCL_NF, TB_NO_REVERSE},
   {X86::SHLD32rri8, X86::SHLD32mri8, TB_NO_REVERSE},
+  {X86::SHLD32rri8_NF, X86::SHLD32mri8_NF, TB_NO_REVERSE},
   {X86::SHLD64rrCL, X86::SHLD64mrCL, TB_NO_REVERSE},
+  {X86::SHLD64rrCL_NF, X86::SHLD64mrCL_NF, TB_NO_REVERSE},
   {X86::SHLD64rri8, X86::SHLD64mri8, TB_NO_REVERSE},
+  {X86::SHLD64rri8_NF, X86::SHLD64mri8_NF, TB_NO_REVERSE},
   {X86::SHR16r1, X86::SHR16m1, TB_NO_REVERSE},
+  {X86::SHR16r1_NF, X86::SHR16m1_NF, TB_NO_REVERSE},
   {X86::SHR16rCL, X86::SHR16mCL, TB_NO_REVERSE},
+  {X86::SHR16rCL_NF, X86::SHR16mCL_NF, TB_NO_REVERSE},
   {X86::SHR16ri, X86::SHR16mi, TB_NO_REVERSE},
+  {X86::SHR16ri_NF, X86::SHR16mi_NF, TB_NO_REVERSE},
   {X86::SHR32r1, X86::SHR32m1, TB_NO_REVERSE},
+  {X86::SHR32r1_NF, X86::SHR32m1_NF, TB_NO_REVERSE},
   {X86::SHR32rCL, X86::SHR32mCL, TB_NO_REVERSE},
+  {X86::SHR32rCL_NF, X86::SHR32mCL_NF, TB_NO_REVERSE},
   {X86::SHR32ri, X86::SHR32mi, TB_NO_REVERSE},
+  {X86::SHR32ri_NF, X86::SHR32mi_NF, TB_NO_REVERSE},
   {X86::SHR64r1, X86::SHR64m1, TB_NO_REVERSE},
+  {X86::SHR64r1_NF, X86::SHR64m1_NF, TB_NO_REVERSE},
   {X86::SHR64rCL, X86::SHR64mCL, TB_NO_REVERSE},
+  {X86::SHR64rCL_NF, X86::SHR64mCL_NF, TB_NO_REVERSE},
   {X86::SHR64ri, X86::SHR64mi, TB_NO_REVERSE},
+  {X86::SHR64ri_NF, X86::SHR64mi_NF, TB_NO_REVERSE},
   {X86::SHR8r1, X86::SHR8m1, TB_NO_REVERSE},
+  {X86::SHR8r1_NF, X86::SHR8m1_NF, TB_NO_REVERSE},
   {X86::SHR8rCL, X86::SHR8mCL, TB_NO_REVERSE},
+  {X86::SHR8rCL_NF, X86::SHR8mCL_NF, TB_NO_REVERSE},
   {X86::SHR8ri, X86::SHR8mi, TB_NO_REVERSE},
+  {X86::SHR8ri_NF, X86::SHR8mi_NF, TB_NO_REVERSE},
   {X86::SHRD16rrCL, X86::SHRD16mrCL, TB_NO_REVERSE},
+  {X86::SHRD16rrCL_NF, X86::SHRD16mrCL_NF, TB_NO_REVERSE},
   {X86::SHRD16rri8, X86::SHRD16mri8, TB_NO_REVERSE},
+  {X86::SHRD16rri8_NF, X86::SHRD16mri8_NF, TB_NO_REVERSE},
   {X86::SHRD32rrCL, X86::SHRD32mrCL, TB_NO_REVERSE},
+  {X86::SHRD32rrCL_NF, X86::SHRD32mrCL_NF, TB_NO_REVERSE},
   {X86::SHRD32rri8, X86::SHRD32mri8, TB_NO_REVERSE},
+  {X86::SHRD32rri8_NF, X86::SHRD32mri8_NF, TB_NO_REVERSE},
   {X86::SHRD64rrCL, X86::SHRD64mrCL, TB_NO_REVERSE},
+  {X86::SHRD64rrCL_NF, X86::SHRD64mrCL_NF, TB_NO_REVERSE},
   {X86::SHRD64rri8, X86::SHRD64mri8, TB_NO_REVERSE},
+  {X86::SHRD64rri8_NF, X86::SHRD64mri8_NF, TB_NO_REVERSE},
   {X86::SUB16ri, X86::SUB16mi, TB_NO_REVERSE},
   {X86::SUB16ri8, X86::SUB16mi8, TB_NO_REVERSE},
   {X86::SUB16ri8_NF, X86::SUB16mi8_NF, TB_NO_REVERSE},
@@ -783,8 +855,80 @@ static const X86FoldTableEntry Table1[] = {
   {X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16},
   {X86::PSWAPDrr, X86::PSWAPDrm, 0},
   {X86::PTESTrr, X86::PTESTrm, TB_ALIGN_16},
+  {X86::RCL16r1_ND, X86::RCL16m1_ND, 0},
+  {X86::RCL16rCL_ND, X86::RCL16mCL_ND, 0},
+  {X86::RCL16ri_ND, X86::RCL16mi_ND, 0},
+  {X86::RCL32r1_ND, X86::RCL32m1_ND, 0},
+  {X86::RCL32rCL_ND, X86::RCL32mCL_ND, 0},
+  {X86::RCL32ri_ND, X86::RCL32mi_ND, 0},
+  {X86::RCL64r1_ND, X86::RCL64m1_ND, 0},
+  {X86::RCL64rCL_ND, X86::RCL64mCL_ND, 0},
+  {X86::RCL64ri_ND, X86::RCL64mi_ND, 0},
+  {X86::RCL8r1_ND, X86::RCL8m1_ND, 0},
+  {X86::RCL8rCL_ND, X86::RCL8mCL_ND, 0},
+  {X86::RCL8ri_ND, X86::RCL8mi_ND, 0},
   {X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16},
   {X86::RCPSSr, X86::RCPSSm, 0},
+  {X86::RCR16r1_ND, X86::RCR16m1_ND, 0},
+  {X86::RCR16rCL_ND, X86::RCR16mCL_ND, 0},
+  {X86::RCR16ri_ND, X86::RCR16mi_ND, 0},
+  {X86::RCR32r1_ND, X86::RCR32m1_ND, 0},
+  {X86::RCR32rCL_ND, X86::RCR32mCL_ND, 0},
+  {X86::RCR32ri_ND, X86::RCR32mi_ND, 0},
+  {X86::RCR64r1_ND, X86::RCR64m1_ND, 0},
+  {X86::RCR64rCL_ND, X86::RCR64mCL_ND, 0},
+  {X86::RCR64ri_ND, X86::RCR64mi_ND, 0},
+  {X86::RCR8r1_ND, X86::RCR8m1_ND, 0},
+  {X86::RCR8rCL_ND, X86::RCR8mCL_ND, 0},
+  {X86::RCR8ri_ND, X86::RCR8mi_ND, 0},
+  {X86::ROL16r1_ND, X86::ROL16m1_ND, 0},
+  {X86::ROL16r1_NF_ND, X86::ROL16m1_NF_ND, 0},
+  {X86::ROL16rCL_ND, X86::ROL16mCL_ND, 0},
+  {X86::ROL16rCL_NF_ND, X86::ROL16mCL_NF_ND, 0},
+  {X86::ROL16ri_ND, X86::ROL16mi_ND, 0},
+  {X86::ROL16ri_NF_ND, X86::ROL16mi_NF_ND, 0},
+  {X86::ROL32r1_ND, X86::ROL32m1_ND, 0},
+  {X86::ROL32r1_NF_ND, X86::ROL32m1_NF_ND, 0},
+  {X86::ROL32rCL_ND, X86::ROL32mCL_ND, 0},
+  {X86::ROL32rCL_NF_ND, X86::ROL32mCL_NF_ND, 0},
+  {X86::ROL32ri_ND, X86::ROL32mi_ND, 0},
+  {X86::ROL32ri_NF_ND, X86::ROL32mi_NF_ND, 0},
+  {X86::ROL64r1_ND, X86::ROL64m1_ND, 0},
+  {X86::ROL64r1_NF_ND, X86::ROL64m1_NF_ND, 0},
+  {X86::ROL64rCL_ND, X86::ROL64mCL_ND, 0},
+  {X86::ROL64rCL_NF_ND, X86::ROL64mCL_NF_ND, 0},
+  {X86::ROL64ri_ND, X86::ROL64mi_ND, 0},
+  {X86::ROL64ri_NF_ND, X86::ROL64mi_NF_ND, 0},
+  {X86::ROL8r1_ND, X86::ROL8m1_ND, 0},
+  {X86::ROL8r1_NF_ND, X86::ROL8m1_NF_ND, 0},
+  {X86::ROL8rCL_ND, X86::ROL8mCL_ND, 0},
+  {X86::ROL8rCL_NF_ND, X86::ROL8mCL_NF_ND, 0},
+  {X86::ROL8ri_ND, X86::ROL8mi_ND, 0},
+  {X86::ROL8ri_NF_ND, X86::ROL8mi_NF_ND, 0},
+  {X86::ROR16r1_ND, X86::ROR16m1_ND, 0},
+  {X86::ROR16r1_NF_ND, X86::ROR16m1_NF_ND, 0},
+  {X86::ROR16rCL_ND, X86::ROR16mCL_ND, 0},
+  {X86::ROR16rCL_NF_ND, X86::ROR16mCL_NF_ND, 0},
+  {X86::ROR16ri_ND, X86::ROR16mi_ND, 0},
+  {X86::ROR16ri_NF_ND, X86::ROR16mi_NF_ND, 0},
+  {X86::ROR32r1_ND, X86::ROR32m1_ND, 0},
+  {X86::ROR32r1_NF_ND, X86::ROR32m1_NF_ND, 0},
+  {X86::ROR32rCL_ND, X86::ROR32mCL_ND, 0},
+  {X86::ROR32rCL_NF_ND, X86::ROR32mCL_NF_ND, 0},
+  {X86::ROR32ri_ND, X86::ROR32mi_ND, 0},
+  {X86::ROR32ri_NF_ND, X86::ROR32mi_NF_ND, 0},
+  {X86::ROR64r1_ND, X86::ROR64m1_ND, 0},
+  {X86::ROR64r1_NF_ND, X86::ROR64m1_NF_ND, 0},
+  {X86::ROR64rCL_ND, X86::ROR64mCL_ND, 0},
+  {X86::ROR64rCL_NF_ND, X86::ROR64mCL_NF_ND, 0},
+  {X86::ROR64ri_ND, X86::ROR64mi_ND, 0},
+  {X86::ROR64ri_NF_ND, X86::ROR64mi_NF_ND, 0},
+  {X86::ROR8r1_ND, X86::ROR8m1_ND, 0},
+  {X86::ROR8r1_NF_ND, X86::ROR8m1_NF_ND, 0},
+  {X86::ROR8rCL_ND, X86::ROR8mCL_ND, 0},
+  {X86::ROR8rCL_NF_ND, X86::ROR8mCL_NF_ND, 0},
+  {X86::ROR8ri_ND, X86::ROR8mi_ND, 0},
+  {X86::ROR8ri_NF_ND, X86::ROR8mi_NF_ND, 0},
   {X86::RORX32ri, X86::RORX32mi, 0},
   {X86::RORX32ri_EVEX, X86::RORX32mi_EVEX, 0},
   {X86::RORX64ri, X86::RORX64mi, 0},
@@ -795,6 +939,30 @@ static const X86FoldTableEntry Table1[] = {
   {X86::ROUNDSSr, X86::ROUNDSSm, 0},
   {X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16},
   {X86::RSQRTSSr, X86::RSQRTSSm, 0},
+  {X86::SAR16r1_ND, X86::SAR16m1_ND, 0},
+  {X86::SAR16r1_NF_ND, X86::SAR16m1_NF_ND, 0},
+  {X86::SAR16rCL_ND, X86::SAR16mCL_ND, 0},
+  {X86::SAR16rCL_NF_ND, X86::SAR16mCL_NF_ND, 0},
+  {X86::SAR16ri_ND, X86::SAR16mi_ND, 0},
+  {X86::SAR16ri_NF_ND, X86::SAR16mi_NF_ND, 0},
+  {X86::SAR32r1_ND, X86::SAR32m1_ND, 0},
+  {X86::SAR32r1_NF_ND, X86::SAR32m1_NF_ND, 0},
+  {X86::SAR32rCL_ND, X86::SAR32mCL_ND, 0},
+  {X86::SAR32rCL_NF_ND, X86::SAR32mCL_NF_ND, 0},
+  {X86::SAR32ri_ND, X86::SAR32mi_ND, 0},
+  {X86::SAR32ri_NF_ND, X86::SAR32mi_NF_ND, 0},
+  {X86::SAR64r1_ND, X86::SAR64m1_ND, 0},
+  {X86::SAR64r1_NF_ND, X86::SAR64m1_NF_ND, 0},
+  {X86::SAR64rCL_ND, X86::SAR64mCL_ND, 0},
+  {X86::SAR64rCL_NF_ND, X86::SAR64mCL_NF_ND, 0},
+  {X86::SAR64ri_ND, X86::SAR64mi_ND, 0},
+  {X86::SAR64ri_NF_ND, X86::SAR64mi_NF_ND, 0},
+  {X86::SAR8r1_ND, X86::SAR8m1_ND, 0},
+  {X86::SAR8r1_NF_ND, X86::SAR8m1_NF_ND, 0},
+  {X86::SAR8rCL_ND, X86::SAR8mCL_ND, 0},
+  {X86::SAR8rCL_NF_ND, X86::SAR8mCL_NF_ND, 0},
+  {X86::SAR8ri_ND, X86::SAR8mi_ND, 0},
+  {X86::SAR8ri_NF_ND, X86::SAR8mi_NF_ND, 0},
   {X86::SARX32rr, X86::SARX32rm, 0},
   {X86::SARX32rr_EVEX, X86::SARX32rm_EVEX, 0},
   {X86::SARX64rr, X86::SARX64rm, 0},
@@ -810,10 +978,82 @@ static const X86FoldTableEntry Table1[] = {
   {X86::SBB64rr_ND, X86::SBB64mr_ND, 0},
   {X86::SBB8ri_ND, X86::SBB8mi_ND, 0},
   {X86::SBB8rr_ND, X86::SBB8mr_ND, 0},
+  {X86::SHL16r1_ND, X86::SHL16m1_ND, 0},
+  {X86::SHL16r1_NF_ND, X86::SHL16m1_NF_ND, 0},
+  {X86::SHL16rCL_ND, X86::SHL16mCL_ND, 0},
+  {X86::SHL16rCL_NF_ND, X86::SHL16mCL_NF_ND, 0},
+  {X86::SHL16ri_ND, X86::SHL16mi_ND, 0},
+  {X86::SHL16ri_NF_ND, X86::SHL16mi_NF_ND, 0},
+  {X86::SHL32r1_ND, X86::SHL32m1_ND, 0},
+  {X86::SHL32r1_NF_ND, X86::SHL32m1_NF_ND, 0},
+  {X86::SHL32rCL_ND, X86::SHL32mCL_ND, 0},
+  {X86::SHL32rCL_NF_ND, X86::SHL32mCL_NF_ND, 0},
+  {X86::SHL32ri_ND, X86::SHL32mi_ND, 0},
+  {X86::SHL32ri_NF_ND, X86::SHL32mi_NF_ND, 0},
+  {X86::SHL64r1_ND, X86::SHL64m1_ND, 0},
+  {X86::SHL64r1_NF_ND, X86::SHL64m1_NF_ND, 0},
+  {X86::SHL64rCL_ND, X86::SHL64mCL_ND, 0},
+  {X86::SHL64rCL_NF_ND, X86::SHL64mCL_NF_ND, 0},
+  {X86::SHL64ri_ND, X86::SHL64mi_ND, 0},
+  {X86::SHL64ri_NF_ND, X86::SHL64mi_NF_ND, 0},
+  {X86::SHL8r1_ND, X86::SHL8m1_ND, 0},
+  {X86::SHL8r1_NF_ND, X86::SHL8m1_NF_ND, 0},
+  {X86::SHL8rCL_ND, X86::SHL8mCL_ND, 0},
+  {X86::SHL8rCL_NF_ND, X86::SHL8mCL_NF_ND, 0},
+  {X86::SHL8ri_ND, X86::SHL8mi_ND, 0},
+  {X86::SHL8ri_NF_ND, X86::SHL8mi_NF_ND, 0},
+  {X86::SHLD16rrCL_ND, X86::SHLD16mrCL_ND, 0},
+  {X86::SHLD16rrCL_NF_ND, X86::SHLD16mrCL_NF_ND, 0},
+  {X86::SHLD16rri8_ND, X86::SHLD16mri8_ND, 0},
+  {X86::SHLD16rri8_NF_ND, X86::SHLD16mri8_NF_ND, 0},
+  {X86::SHLD32rrCL_ND, X86::SHLD32mrCL_ND, 0},
+  {X86::SHLD32rrCL_NF_ND, X86::SHLD32mrCL_NF_ND, 0},
+  {X86::SHLD32rri8_ND, X86::SHLD32mri8_ND, 0},
+  {X86::SHLD32rri8_NF_ND, X86::SHLD32mri8_NF_ND, 0},
+  {X86::SHLD64rrCL_ND, X86::SHLD64mrCL_ND, 0},
+  {X86::SHLD64rrCL_NF_ND, X86::SHLD64mrCL_NF_ND, 0},
+  {X86::SHLD64rri8_ND, X86::SHLD64mri8_ND, 0},
+  {X86::SHLD64rri8_NF_ND, X86::SHLD64mri8_NF_ND, 0},
   {X86::SHLX32rr, X86::SHLX32rm, 0},
   {X86::SHLX32rr_EVEX, X86::SHLX32rm_EVEX, 0},
   {X86::SHLX64rr, X86::SHLX64rm, 0},
   {X86::SHLX64rr_EVEX, X86::SHLX64rm_EVEX, 0},
+  {X86::SHR16r1_ND, X86::SHR16m1_ND, 0},
+  {X86::SHR16r1_NF_ND, X86::SHR16m1_NF_ND, 0},
+  {X86::SHR16rCL_ND, X86::SHR16mCL_ND, 0},
+  {X86::SHR16rCL_NF_ND, X86::SHR16mCL_NF_ND, 0},
+  {X86::SHR16ri_ND, X86::SHR16mi_ND, 0},
+  {X86::SHR16ri_NF_ND, X86::SHR16mi_NF_ND, 0},
+  {X86::SHR32r1_ND, X86::SHR32m1_ND, 0},
+  {X86::SHR32r1_NF_ND, X86::SHR32m1_NF_ND, 0},
+  {X86::SHR32rCL_ND, X86::SHR32mCL_ND, 0},
+  {X86::SHR32rCL_NF_ND, X86::SHR32mCL_NF_ND, 0},
+  {X86::SHR32ri_ND, X86::SHR32mi_ND, 0},
+  {X86::SHR32ri_NF_ND, X86::SHR32mi_NF_ND, 0},
+  {X86::SHR64r1_ND, X86::SHR64m1_ND, 0},
+  {X86::SHR64r1_NF_ND, X86::SHR64m1_NF_ND, 0},
+  {X86::SHR64rCL_ND, X86::SHR64mCL_ND, 0},
+  {X86::SHR64rCL_NF_ND, X86::SHR64mCL_NF_ND, 0},
+  {X86::SHR64ri_ND, X86::SHR64mi_ND, 0},
+  {X86::SHR64ri_NF_ND, X86::SHR64mi_NF_ND, 0},
+  {X86::SHR8r1_ND, X86::SHR8m1_ND, 0},
+  {X86::SHR8r1_NF_ND, X86::SHR8m1_NF_ND, 0},
+  {X86::SHR8rCL_ND, X86::SHR8mCL_ND, 0},
+  {X86::SHR8rCL_NF_ND, X86::SHR8mCL_NF_ND, 0},
+  {X86::SHR8ri_ND, X86::SHR8mi_ND, 0},
+  {X86::SHR8ri_NF_ND, X86::SHR8mi_NF_ND, 0},
+  {X86::SHRD16rrCL_ND, X86::SHRD16mrCL_ND, 0},
+  {X86::SHRD16rrCL_NF_ND, X86::SHRD16mrCL_NF_ND, 0},
+  {X86::SHRD16rri8_ND, X86::SHRD16mri8_ND, 0},
+  {X86::SHRD16rri8_NF_ND, X86::SHRD16mri8_NF_ND, 0},
+  {X86::SHRD32rrCL_ND, X86::SHRD32mrCL_ND, 0},
+  {X86::SHRD32rrCL_NF_ND, X86::SHRD32mrCL_NF_ND, 0},
+  {X86::SHRD32rri8_ND, X86::SHRD32mri8_ND, 0},
+  {X86::SHRD32rri8_NF_ND, X86::SHRD32mri8_NF_ND, 0},
+  {X86::SHRD64rrCL_ND, X86::SHRD64mrCL_ND, 0},
+  {X86::SHRD64rrCL_NF_ND, X86::SHRD64mrCL_NF_ND, 0},
+  {X86::SHRD64rri8_ND, X86::SHRD64mri8_ND, 0},
+  {X86::SHRD64rri8_NF_ND, X86::SHRD64mri8_NF_ND, 0},
   {X86::SHRX32rr, X86::SHRX32rm, 0},
   {X86::SHRX32rr_EVEX, X86::SHRX32rm_EVEX, 0},
   {X86::SHRX64rr, X86::SHRX64rm, 0},

>From d2dadf165f95043c3fd52e1e3259225bef041be8 Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Sun, 21 Jan 2024 21:48:16 +0800
Subject: [PATCH 2/4] add pattern in X86InstrCompiler.td

---
 llvm/lib/Target/X86/X86InstrCompiler.td   | 129 +++++---
 llvm/test/CodeGen/X86/rotate4.ll          | 132 ++++++++
 llvm/test/CodeGen/X86/shift-amount-mod.ll | 354 ++++++++++++++++++++++
 llvm/test/CodeGen/X86/shift-and-x86_64.ll |  17 ++
 llvm/test/CodeGen/X86/shift-and.ll        |  58 ++++
 5 files changed, 652 insertions(+), 38 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 8e412204c989c6..14507b1d3c7ee4 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1789,24 +1789,47 @@ let Predicates = [HasNDD] in {
 // Shift amount is implicitly masked.
 multiclass MaskedShiftAmountPats<SDNode frag> {
   // (shift x (and y, 31)) ==> (shift x, y)
-  def : Pat<(frag GR8:$src1, (shiftMask32 CL)),
-            (!cast<Instruction>(NAME # "8rCL") GR8:$src1)>;
-  def : Pat<(frag GR16:$src1, (shiftMask32 CL)),
-            (!cast<Instruction>(NAME # "16rCL") GR16:$src1)>;
-  def : Pat<(frag GR32:$src1, (shiftMask32 CL)),
-            (!cast<Instruction>(NAME # "32rCL") GR32:$src1)>;
+  // (shift x (and y, 63)) ==> (shift x, y)
+  let Predicates = [NoNDD] in {
+    def : Pat<(frag GR8:$src1, (shiftMask32 CL)),
+              (!cast<Instruction>(NAME # "8rCL") GR8:$src1)>;
+    def : Pat<(frag GR16:$src1, (shiftMask32 CL)),
+              (!cast<Instruction>(NAME # "16rCL") GR16:$src1)>;
+    def : Pat<(frag GR32:$src1, (shiftMask32 CL)),
+              (!cast<Instruction>(NAME # "32rCL") GR32:$src1)>;
+    def : Pat<(frag GR64:$src1, (shiftMask64 CL)),
+              (!cast<Instruction>(NAME # "64rCL") GR64:$src1)>;
+  }
+  let Predicates = [HasNDD] in {
+    def : Pat<(frag GR8:$src1, (shiftMask32 CL)),
+              (!cast<Instruction>(NAME # "8rCL_ND") GR8:$src1)>;
+    def : Pat<(frag GR16:$src1, (shiftMask32 CL)),
+              (!cast<Instruction>(NAME # "16rCL_ND") GR16:$src1)>;
+    def : Pat<(frag GR32:$src1, (shiftMask32 CL)),
+              (!cast<Instruction>(NAME # "32rCL_ND") GR32:$src1)>;
+    def : Pat<(frag GR64:$src1, (shiftMask64 CL)),
+              (!cast<Instruction>(NAME # "64rCL_ND") GR64:$src1)>;
+  }
+
   def : Pat<(store (frag (loadi8 addr:$dst), (shiftMask32 CL)), addr:$dst),
             (!cast<Instruction>(NAME # "8mCL") addr:$dst)>;
   def : Pat<(store (frag (loadi16 addr:$dst), (shiftMask32 CL)), addr:$dst),
             (!cast<Instruction>(NAME # "16mCL") addr:$dst)>;
   def : Pat<(store (frag (loadi32 addr:$dst), (shiftMask32 CL)), addr:$dst),
             (!cast<Instruction>(NAME # "32mCL") addr:$dst)>;
-
-  // (shift x (and y, 63)) ==> (shift x, y)
-  def : Pat<(frag GR64:$src1, (shiftMask64 CL)),
-            (!cast<Instruction>(NAME # "64rCL") GR64:$src1)>;
   def : Pat<(store (frag (loadi64 addr:$dst), (shiftMask64 CL)), addr:$dst),
             (!cast<Instruction>(NAME # "64mCL") addr:$dst)>;
+
+  let Predicates = [HasNDD] in {
+    def : Pat<(frag (loadi8 addr:$src), (shiftMask32 CL)),
+              (!cast<Instruction>(NAME # "8mCL_ND") addr:$src)>;
+    def : Pat<(frag (loadi16 addr:$src), (shiftMask32 CL)),
+              (!cast<Instruction>(NAME # "16mCL_ND") addr:$src)>;
+    def : Pat<(frag (loadi32 addr:$src), (shiftMask32 CL)),
+              (!cast<Instruction>(NAME # "32mCL_ND") addr:$src)>;
+    def : Pat<(frag (loadi64 addr:$src), (shiftMask64 CL)),
+              (!cast<Instruction>(NAME # "64mCL_ND") addr:$src)>;
+  }
 }
 
 defm SHL : MaskedShiftAmountPats<shl>;
@@ -1821,47 +1844,77 @@ defm SAR : MaskedShiftAmountPats<sra>;
 // not tracking flags for these nodes.
 multiclass MaskedRotateAmountPats<SDNode frag> {
   // (rot x (and y, BitWidth - 1)) ==> (rot x, y)
-  def : Pat<(frag GR8:$src1, (shiftMask8 CL)),
-            (!cast<Instruction>(NAME # "8rCL") GR8:$src1)>;
-  def : Pat<(frag GR16:$src1, (shiftMask16 CL)),
-            (!cast<Instruction>(NAME # "16rCL") GR16:$src1)>;
-  def : Pat<(frag GR32:$src1, (shiftMask32 CL)),
-            (!cast<Instruction>(NAME # "32rCL") GR32:$src1)>;
+  // (rot x (and y, 63)) ==> (rot x, y)
+  let Predicates = [NoNDD] in {
+    def : Pat<(frag GR8:$src1, (shiftMask8 CL)),
+              (!cast<Instruction>(NAME # "8rCL") GR8:$src1)>;
+    def : Pat<(frag GR16:$src1, (shiftMask16 CL)),
+              (!cast<Instruction>(NAME # "16rCL") GR16:$src1)>;
+    def : Pat<(frag GR32:$src1, (shiftMask32 CL)),
+              (!cast<Instruction>(NAME # "32rCL") GR32:$src1)>;
+    def : Pat<(frag GR64:$src1, (shiftMask64 CL)),
+              (!cast<Instruction>(NAME # "64rCL") GR64:$src1)>;
+  }
+  let Predicates = [HasNDD] in {
+    def : Pat<(frag GR8:$src1, (shiftMask8 CL)),
+              (!cast<Instruction>(NAME # "8rCL_ND") GR8:$src1)>;
+    def : Pat<(frag GR16:$src1, (shiftMask16 CL)),
+              (!cast<Instruction>(NAME # "16rCL_ND") GR16:$src1)>;
+    def : Pat<(frag GR32:$src1, (shiftMask32 CL)),
+              (!cast<Instruction>(NAME # "32rCL_ND") GR32:$src1)>;
+    def : Pat<(frag GR64:$src1, (shiftMask64 CL)),
+              (!cast<Instruction>(NAME # "64rCL_ND") GR64:$src1)>;
+  }
+
   def : Pat<(store (frag (loadi8 addr:$dst), (shiftMask8 CL)), addr:$dst),
             (!cast<Instruction>(NAME # "8mCL") addr:$dst)>;
   def : Pat<(store (frag (loadi16 addr:$dst), (shiftMask16 CL)), addr:$dst),
             (!cast<Instruction>(NAME # "16mCL") addr:$dst)>;
   def : Pat<(store (frag (loadi32 addr:$dst), (shiftMask32 CL)), addr:$dst),
             (!cast<Instruction>(NAME # "32mCL") addr:$dst)>;
-
-  // (rot x (and y, 63)) ==> (rot x, y)
-  def : Pat<(frag GR64:$src1, (shiftMask64 CL)),
-            (!cast<Instruction>(NAME # "64rCL") GR64:$src1)>;
   def : Pat<(store (frag (loadi64 addr:$dst), (shiftMask64 CL)), addr:$dst),
             (!cast<Instruction>(NAME # "64mCL") addr:$dst)>;
+
+  let Predicates = [HasNDD] in {
+    def : Pat<(frag (loadi8 addr:$src), (shiftMask8 CL)),
+              (!cast<Instruction>(NAME # "8mCL_ND") addr:$src)>;
+    def : Pat<(frag (loadi16 addr:$src), (shiftMask16 CL)),
+              (!cast<Instruction>(NAME # "16mCL_ND") addr:$src)>;
+    def : Pat<(frag (loadi32 addr:$src), (shiftMask32 CL)),
+              (!cast<Instruction>(NAME # "32mCL_ND") addr:$src)>;
+    def : Pat<(frag (loadi64 addr:$src), (shiftMask64 CL)),
+              (!cast<Instruction>(NAME # "64mCL_ND") addr:$src)>;
+  }
 }
 
 defm ROL : MaskedRotateAmountPats<rotl>;
 defm ROR : MaskedRotateAmountPats<rotr>;
 
-// Double "funnel" shift amount is implicitly masked.
-// (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y) (NOTE: modulo32)
-def : Pat<(X86fshl GR16:$src1, GR16:$src2, (shiftMask32 CL)),
-          (SHLD16rrCL GR16:$src1, GR16:$src2)>;
-def : Pat<(X86fshr GR16:$src2, GR16:$src1, (shiftMask32 CL)),
-          (SHRD16rrCL GR16:$src1, GR16:$src2)>;
-
-// (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y)
-def : Pat<(fshl GR32:$src1, GR32:$src2, (shiftMask32 CL)),
-          (SHLD32rrCL GR32:$src1, GR32:$src2)>;
-def : Pat<(fshr GR32:$src2, GR32:$src1, (shiftMask32 CL)),
-          (SHRD32rrCL GR32:$src1, GR32:$src2)>;
-
-// (fshl/fshr x (and y, 63)) ==> (fshl/fshr x, y)
-def : Pat<(fshl GR64:$src1, GR64:$src2, (shiftMask64 CL)),
-          (SHLD64rrCL GR64:$src1, GR64:$src2)>;
-def : Pat<(fshr GR64:$src2, GR64:$src1, (shiftMask64 CL)),
-          (SHRD64rrCL GR64:$src1, GR64:$src2)>;
+multiclass MaskedShlrdAmountPats<string suffix, Predicate p> {
+  let Predicates = [p] in {
+    // Double "funnel" shift amount is implicitly masked.
+    // (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y) (NOTE: modulo32)
+    def : Pat<(X86fshl GR16:$src1, GR16:$src2, (shiftMask32 CL)),
+              (!cast<Instruction>(SHLD16rrCL#suffix) GR16:$src1, GR16:$src2)>;
+    def : Pat<(X86fshr GR16:$src2, GR16:$src1, (shiftMask32 CL)),
+              (!cast<Instruction>(SHRD16rrCL#suffix) GR16:$src1, GR16:$src2)>;
+
+    // (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y)
+    def : Pat<(fshl GR32:$src1, GR32:$src2, (shiftMask32 CL)),
+              (!cast<Instruction>(SHLD32rrCL#suffix) GR32:$src1, GR32:$src2)>;
+    def : Pat<(fshr GR32:$src2, GR32:$src1, (shiftMask32 CL)),
+              (!cast<Instruction>(SHRD32rrCL#suffix) GR32:$src1, GR32:$src2)>;
+
+    // (fshl/fshr x (and y, 63)) ==> (fshl/fshr x, y)
+    def : Pat<(fshl GR64:$src1, GR64:$src2, (shiftMask64 CL)),
+              (!cast<Instruction>(SHLD64rrCL#suffix) GR64:$src1, GR64:$src2)>;
+    def : Pat<(fshr GR64:$src2, GR64:$src1, (shiftMask64 CL)),
+              (!cast<Instruction>(SHRD64rrCL#suffix) GR64:$src1, GR64:$src2)>;
+  }
+}
+
+defm : MaskedShlrdAmountPats<"", NoNDD>;
+defm : MaskedShlrdAmountPats<"_ND", HasNDD>;
 
 // Use BTR/BTS/BTC for clearing/setting/toggling a bit in a variable location.
 multiclass OneBitPats<RegisterClass rc, ValueType vt, Instruction btr,
diff --git a/llvm/test/CodeGen/X86/rotate4.ll b/llvm/test/CodeGen/X86/rotate4.ll
index 0cc9f465dd75a8..68ed10b81fcdec 100644
--- a/llvm/test/CodeGen/X86/rotate4.ll
+++ b/llvm/test/CodeGen/X86/rotate4.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ndd --show-mc-encoding | FileCheck %s --check-prefixes=NDD
 
 ; Check that we recognize this idiom for rotation too:
 ;    a << (b & (OpSize-1)) | a >> ((0 - b) & (OpSize-1))
@@ -20,6 +21,13 @@ define i32 @rotate_left_32(i32 %a, i32 %b) {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    roll %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_left_32:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    roll %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xc7]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i32 %b, 31
   %shl = shl i32 %a, %and
   %t0 = sub i32 0, %b
@@ -44,6 +52,13 @@ define i32 @rotate_right_32(i32 %a, i32 %b) {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    rorl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_right_32:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    rorl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xcf]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i32 %b, 31
   %shl = lshr i32 %a, %and
   %t0 = sub i32 0, %b
@@ -105,6 +120,13 @@ define i64 @rotate_left_64(i64 %a, i64 %b) {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    rolq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_left_64:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $rcx
+; NDD-NEXT:    rolq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xc7]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %b, 63
   %shl = shl i64 %a, %and
   %t0 = sub i64 0, %b
@@ -166,6 +188,13 @@ define i64 @rotate_right_64(i64 %a, i64 %b) {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    rorq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_right_64:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $rcx
+; NDD-NEXT:    rorq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xcf]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %b, 63
   %shl = lshr i64 %a, %and
   %t0 = sub i64 0, %b
@@ -191,6 +220,13 @@ define void @rotate_left_m32(ptr%pa, i32 %b) {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    roll %cl, (%rdi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_left_m32:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    roll %cl, (%rdi) # encoding: [0xd3,0x07]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %a = load i32, ptr %pa, align 16
   %and = and i32 %b, 31
   %shl = shl i32 %a, %and
@@ -216,6 +252,13 @@ define void @rotate_right_m32(ptr%pa, i32 %b) {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    rorl %cl, (%rdi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_right_m32:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    rorl %cl, (%rdi) # encoding: [0xd3,0x0f]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %a = load i32, ptr %pa, align 16
   %and = and i32 %b, 31
   %shl = lshr i32 %a, %and
@@ -286,6 +329,13 @@ define void @rotate_left_m64(ptr%pa, i64 %b) {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    rolq %cl, (%rdi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_left_m64:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $rcx
+; NDD-NEXT:    rolq %cl, (%rdi) # encoding: [0x48,0xd3,0x07]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %a = load i64, ptr %pa, align 16
   %and = and i64 %b, 63
   %shl = shl i64 %a, %and
@@ -356,6 +406,13 @@ define void @rotate_right_m64(ptr%pa, i64 %b) {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    rorq %cl, (%rdi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_right_m64:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movq %rsi, %rcx # encoding: [0x48,0x89,0xf1]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $rcx
+; NDD-NEXT:    rorq %cl, (%rdi) # encoding: [0x48,0xd3,0x0f]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %a = load i64, ptr %pa, align 16
   %and = and i64 %b, 63
   %shl = lshr i64 %a, %and
@@ -386,6 +443,13 @@ define i8 @rotate_left_8(i8 %x, i32 %amount) {
 ; X64-NEXT:    rolb %cl, %al
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_left_8:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    rolb %cl, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0xc7]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %amt = trunc i32 %amount to i8
   %sub = sub i8 0, %amt
   %maskamt = and i8 %amt, 7
@@ -412,6 +476,13 @@ define i8 @rotate_right_8(i8 %x, i32 %amount) {
 ; X64-NEXT:    rorb %cl, %al
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_right_8:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    rorb %cl, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0xcf]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %amt = trunc i32 %amount to i8
   %sub = sub i8 0, %amt
   %maskamt = and i8 %amt, 7
@@ -438,6 +509,13 @@ define i16 @rotate_left_16(i16 %x, i32 %amount) {
 ; X64-NEXT:    rolw %cl, %ax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_left_16:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    rolw %cl, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xd3,0xc7]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %amt = trunc i32 %amount to i16
   %sub = sub i16 0, %amt
   %maskamt = and i16 %amt, 15
@@ -464,6 +542,13 @@ define i16 @rotate_right_16(i16 %x, i32 %amount) {
 ; X64-NEXT:    rorw %cl, %ax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_right_16:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    rorw %cl, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xd3,0xcf]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %amt = trunc i32 %amount to i16
   %sub = sub i16 0, %amt
   %maskamt = and i16 %amt, 15
@@ -488,6 +573,13 @@ define void @rotate_left_m8(ptr %p, i32 %amount) {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    rolb %cl, (%rdi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_left_m8:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    rolb %cl, (%rdi) # encoding: [0xd2,0x07]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %x = load i8, ptr %p, align 1
   %amt = trunc i32 %amount to i8
   %sub = sub i8 0, %amt
@@ -514,6 +606,13 @@ define void @rotate_right_m8(ptr %p, i32 %amount) {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    rorb %cl, (%rdi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_right_m8:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    rorb %cl, (%rdi) # encoding: [0xd2,0x0f]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %x = load i8, ptr %p, align 1
   %amt = trunc i32 %amount to i8
   %sub = sub i8 0, %amt
@@ -540,6 +639,13 @@ define void @rotate_left_m16(ptr %p, i32 %amount) {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    rolw %cl, (%rdi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_left_m16:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    rolw %cl, (%rdi) # encoding: [0x66,0xd3,0x07]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %x = load i16, ptr %p, align 1
   %amt = trunc i32 %amount to i16
   %sub = sub i16 0, %amt
@@ -566,6 +672,13 @@ define void @rotate_right_m16(ptr %p, i32 %amount) {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    rorw %cl, (%rdi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_right_m16:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %esi, %ecx # encoding: [0x89,0xf1]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    rorw %cl, (%rdi) # encoding: [0x66,0xd3,0x0f]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %x = load i16, ptr %p, align 1
   %amt = trunc i32 %amount to i16
   %sub = sub i16 0, %amt
@@ -595,6 +708,12 @@ define i32 @rotate_demanded_bits(i32, i32) {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    roll %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_demanded_bits:
+; NDD:       # %bb.0:
+; NDD-NEXT:    andb $30, %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xe6,0x1e]
+; NDD-NEXT:    roll %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xc7]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %3 = and i32 %1, 30
   %4 = shl i32 %0, %3
   %5 = sub nsw i32 0, %3
@@ -621,6 +740,12 @@ define i32 @rotate_demanded_bits_2(i32, i32) {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    roll %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_demanded_bits_2:
+; NDD:       # %bb.0:
+; NDD-NEXT:    andb $23, %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xe6,0x17]
+; NDD-NEXT:    roll %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xc7]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %3 = and i32 %1, 23
   %4 = shl i32 %0, %3
   %5 = sub nsw i32 0, %3
@@ -647,6 +772,13 @@ define i32 @rotate_demanded_bits_3(i32, i32) {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    roll %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: rotate_demanded_bits_3:
+; NDD:       # %bb.0:
+; NDD-NEXT:    addl %esi, %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x01,0xf6]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    roll %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xc7]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %3 = shl i32 %1, 1
   %4 = and i32 %3, 30
   %5 = shl i32 %0, %4
diff --git a/llvm/test/CodeGen/X86/shift-amount-mod.ll b/llvm/test/CodeGen/X86/shift-amount-mod.ll
index c89db15d12f45d..58e7fe4d6d39a3 100644
--- a/llvm/test/CodeGen/X86/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/X86/shift-amount-mod.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-unknown-unknown   | FileCheck %s --check-prefix=X32
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ndd --show-mc-encoding | FileCheck %s --check-prefix=NDD
 
 ;==============================================================================;
 ; the shift amount is negated (shiftbitwidth - shiftamt)
@@ -27,6 +28,12 @@ define i32 @reg32_shl_by_negated(i32 %val, i32 %shamt) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shll %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg32_shl_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xde]
+; NDD-NEXT:    shll %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xe7]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %negshamt = sub i32 32, %shamt
   %shifted = shl i32 %val, %negshamt
   ret i32 %shifted
@@ -50,6 +57,12 @@ define i32 @load32_shl_by_negated(ptr %valptr, i32 %shamt) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shll %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: load32_shl_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xde]
+; NDD-NEXT:    shll %cl, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0x27]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %val = load i32, ptr %valptr
   %negshamt = sub i32 32, %shamt
   %shifted = shl i32 %val, %negshamt
@@ -75,6 +88,13 @@ define void @store32_shl_by_negated(i32 %val, ptr %dstptr, i32 %shamt) nounwind
 ; X64-NEXT:    shll %cl, %edi
 ; X64-NEXT:    movl %edi, (%rsi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: store32_shl_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %dl, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xda]
+; NDD-NEXT:    shll %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xe7]
+; NDD-NEXT:    movl %eax, (%rsi) # encoding: [0x89,0x06]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %negshamt = sub i32 32, %shamt
   %shifted = shl i32 %val, %negshamt
   store i32 %shifted, ptr %dstptr
@@ -95,6 +115,13 @@ define void @modify32_shl_by_negated(ptr %valptr, i32 %shamt) nounwind {
 ; X64-NEXT:    subb %sil, %cl
 ; X64-NEXT:    shll %cl, (%rdi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: modify32_shl_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movb $32, %al # encoding: [0xb0,0x20]
+; NDD-NEXT:    subb %sil, %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0x28,0xf0]
+; NDD-NEXT:    shll %cl, (%rdi) # encoding: [0xd3,0x27]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %val = load i32, ptr %valptr
   %negshamt = sub i32 32, %shamt
   %shifted = shl i32 %val, %negshamt
@@ -130,6 +157,12 @@ define i64 @reg64_shl_by_negated(i64 %val, i64 %shamt) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shlq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg64_shl_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xde]
+; NDD-NEXT:    shlq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xe7]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %negshamt = sub i64 64, %shamt
   %shifted = shl i64 %val, %negshamt
   ret i64 %shifted
@@ -163,6 +196,12 @@ define i64 @load64_shl_by_negated(ptr %valptr, i64 %shamt) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shlq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: load64_shl_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xde]
+; NDD-NEXT:    shlq %cl, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0x27]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %val = load i64, ptr %valptr
   %negshamt = sub i64 64, %shamt
   %shifted = shl i64 %val, %negshamt
@@ -201,6 +240,13 @@ define void @store64_shl_by_negated(i64 %val, ptr %dstptr, i64 %shamt) nounwind
 ; X64-NEXT:    shlq %cl, %rdi
 ; X64-NEXT:    movq %rdi, (%rsi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: store64_shl_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %dl, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xda]
+; NDD-NEXT:    shlq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xe7]
+; NDD-NEXT:    movq %rax, (%rsi) # encoding: [0x48,0x89,0x06]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %negshamt = sub i64 64, %shamt
   %shifted = shl i64 %val, %negshamt
   store i64 %shifted, ptr %dstptr
@@ -237,6 +283,13 @@ define void @modify64_shl_by_negated(ptr %valptr, i64 %shamt) nounwind {
 ; X64-NEXT:    subb %sil, %cl
 ; X64-NEXT:    shlq %cl, (%rdi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: modify64_shl_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movb $64, %al # encoding: [0xb0,0x40]
+; NDD-NEXT:    subb %sil, %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0x28,0xf0]
+; NDD-NEXT:    shlq %cl, (%rdi) # encoding: [0x48,0xd3,0x27]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %val = load i64, ptr %valptr
   %negshamt = sub i64 64, %shamt
   %shifted = shl i64 %val, %negshamt
@@ -265,6 +318,12 @@ define i32 @reg32_lshr_by_negated(i32 %val, i32 %shamt) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg32_lshr_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xde]
+; NDD-NEXT:    shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %negshamt = sub i32 32, %shamt
   %shifted = lshr i32 %val, %negshamt
   ret i32 %shifted
@@ -288,6 +347,12 @@ define i32 @load32_lshr_by_negated(ptr %valptr, i32 %shamt) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: load32_lshr_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xde]
+; NDD-NEXT:    shrl %cl, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0x2f]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %val = load i32, ptr %valptr
   %negshamt = sub i32 32, %shamt
   %shifted = lshr i32 %val, %negshamt
@@ -313,6 +378,13 @@ define void @store32_lshr_by_negated(i32 %val, ptr %dstptr, i32 %shamt) nounwind
 ; X64-NEXT:    shrl %cl, %edi
 ; X64-NEXT:    movl %edi, (%rsi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: store32_lshr_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %dl, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xda]
+; NDD-NEXT:    shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef]
+; NDD-NEXT:    movl %eax, (%rsi) # encoding: [0x89,0x06]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %negshamt = sub i32 32, %shamt
   %shifted = lshr i32 %val, %negshamt
   store i32 %shifted, ptr %dstptr
@@ -333,6 +405,13 @@ define void @modify32_lshr_by_negated(ptr %valptr, i32 %shamt) nounwind {
 ; X64-NEXT:    subb %sil, %cl
 ; X64-NEXT:    shrl %cl, (%rdi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: modify32_lshr_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movb $32, %al # encoding: [0xb0,0x20]
+; NDD-NEXT:    subb %sil, %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0x28,0xf0]
+; NDD-NEXT:    shrl %cl, (%rdi) # encoding: [0xd3,0x2f]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %val = load i32, ptr %valptr
   %negshamt = sub i32 32, %shamt
   %shifted = lshr i32 %val, %negshamt
@@ -368,6 +447,12 @@ define i64 @reg64_lshr_by_negated(i64 %val, i64 %shamt) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg64_lshr_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xde]
+; NDD-NEXT:    shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %negshamt = sub i64 64, %shamt
   %shifted = lshr i64 %val, %negshamt
   ret i64 %shifted
@@ -401,6 +486,12 @@ define i64 @load64_lshr_by_negated(ptr %valptr, i64 %shamt) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: load64_lshr_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xde]
+; NDD-NEXT:    shrq %cl, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0x2f]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %val = load i64, ptr %valptr
   %negshamt = sub i64 64, %shamt
   %shifted = lshr i64 %val, %negshamt
@@ -439,6 +530,13 @@ define void @store64_lshr_by_negated(i64 %val, ptr %dstptr, i64 %shamt) nounwind
 ; X64-NEXT:    shrq %cl, %rdi
 ; X64-NEXT:    movq %rdi, (%rsi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: store64_lshr_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %dl, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xda]
+; NDD-NEXT:    shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef]
+; NDD-NEXT:    movq %rax, (%rsi) # encoding: [0x48,0x89,0x06]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %negshamt = sub i64 64, %shamt
   %shifted = lshr i64 %val, %negshamt
   store i64 %shifted, ptr %dstptr
@@ -475,6 +573,13 @@ define void @modify64_lshr_by_negated(ptr %valptr, i64 %shamt) nounwind {
 ; X64-NEXT:    subb %sil, %cl
 ; X64-NEXT:    shrq %cl, (%rdi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: modify64_lshr_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movb $64, %al # encoding: [0xb0,0x40]
+; NDD-NEXT:    subb %sil, %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0x28,0xf0]
+; NDD-NEXT:    shrq %cl, (%rdi) # encoding: [0x48,0xd3,0x2f]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %val = load i64, ptr %valptr
   %negshamt = sub i64 64, %shamt
   %shifted = lshr i64 %val, %negshamt
@@ -503,6 +608,12 @@ define i32 @reg32_ashr_by_negated(i32 %val, i32 %shamt) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    sarl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg32_ashr_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xde]
+; NDD-NEXT:    sarl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xff]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %negshamt = sub i32 32, %shamt
   %shifted = ashr i32 %val, %negshamt
   ret i32 %shifted
@@ -526,6 +637,12 @@ define i32 @load32_ashr_by_negated(ptr %valptr, i32 %shamt) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    sarl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: load32_ashr_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xde]
+; NDD-NEXT:    sarl %cl, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0x3f]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %val = load i32, ptr %valptr
   %negshamt = sub i32 32, %shamt
   %shifted = ashr i32 %val, %negshamt
@@ -551,6 +668,13 @@ define void @store32_ashr_by_negated(i32 %val, ptr %dstptr, i32 %shamt) nounwind
 ; X64-NEXT:    sarl %cl, %edi
 ; X64-NEXT:    movl %edi, (%rsi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: store32_ashr_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %dl, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xda]
+; NDD-NEXT:    sarl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xff]
+; NDD-NEXT:    movl %eax, (%rsi) # encoding: [0x89,0x06]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %negshamt = sub i32 32, %shamt
   %shifted = ashr i32 %val, %negshamt
   store i32 %shifted, ptr %dstptr
@@ -571,6 +695,13 @@ define void @modify32_ashr_by_negated(ptr %valptr, i32 %shamt) nounwind {
 ; X64-NEXT:    subb %sil, %cl
 ; X64-NEXT:    sarl %cl, (%rdi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: modify32_ashr_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movb $32, %al # encoding: [0xb0,0x20]
+; NDD-NEXT:    subb %sil, %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0x28,0xf0]
+; NDD-NEXT:    sarl %cl, (%rdi) # encoding: [0xd3,0x3f]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %val = load i32, ptr %valptr
   %negshamt = sub i32 32, %shamt
   %shifted = ashr i32 %val, %negshamt
@@ -607,6 +738,12 @@ define i64 @reg64_ashr_by_negated(i64 %val, i64 %shamt) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    sarq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg64_ashr_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xde]
+; NDD-NEXT:    sarq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xff]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %negshamt = sub i64 64, %shamt
   %shifted = ashr i64 %val, %negshamt
   ret i64 %shifted
@@ -641,6 +778,12 @@ define i64 @load64_ashr_by_negated(ptr %valptr, i64 %shamt) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    sarq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: load64_ashr_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xde]
+; NDD-NEXT:    sarq %cl, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0x3f]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %val = load i64, ptr %valptr
   %negshamt = sub i64 64, %shamt
   %shifted = ashr i64 %val, %negshamt
@@ -680,6 +823,13 @@ define void @store64_ashr_by_negated(i64 %val, ptr %dstptr, i64 %shamt) nounwind
 ; X64-NEXT:    sarq %cl, %rdi
 ; X64-NEXT:    movq %rdi, (%rsi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: store64_ashr_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %dl, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xda]
+; NDD-NEXT:    sarq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xff]
+; NDD-NEXT:    movq %rax, (%rsi) # encoding: [0x48,0x89,0x06]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %negshamt = sub i64 64, %shamt
   %shifted = ashr i64 %val, %negshamt
   store i64 %shifted, ptr %dstptr
@@ -717,6 +867,13 @@ define void @modify64_ashr_by_negated(ptr %valptr, i64 %shamt) nounwind {
 ; X64-NEXT:    subb %sil, %cl
 ; X64-NEXT:    sarq %cl, (%rdi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: modify64_ashr_by_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movb $64, %al # encoding: [0xb0,0x40]
+; NDD-NEXT:    subb %sil, %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0x28,0xf0]
+; NDD-NEXT:    sarq %cl, (%rdi) # encoding: [0x48,0xd3,0x3f]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %val = load i64, ptr %valptr
   %negshamt = sub i64 64, %shamt
   %shifted = ashr i64 %val, %negshamt
@@ -752,6 +909,13 @@ define i32 @reg32_lshr_by_sub_from_negated(i32 %val, i32 %a, i32 %b) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg32_lshr_by_sub_from_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    addl %edx, %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xd6]
+; NDD-NEXT:    negb %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xd8]
+; NDD-NEXT:    shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i32 32, %a
   %negasubb = sub i32 %nega, %b
   %shifted = lshr i32 %val, %negasubb
@@ -787,6 +951,13 @@ define i64 @reg64_lshr_by_sub_from_negated(i64 %val, i64 %a, i64 %b) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg64_lshr_by_sub_from_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    addl %edx, %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xd6]
+; NDD-NEXT:    negb %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xd8]
+; NDD-NEXT:    shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i64 64, %a
   %negasubb = sub i64 %nega, %b
   %shifted = lshr i64 %val, %negasubb
@@ -815,6 +986,13 @@ define i32 @reg32_lshr_by_sub_of_negated(i32 %val, i32 %a, i32 %b) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg32_lshr_by_sub_of_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    addl %edx, %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x01,0xd6]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i32 32, %a
   %negasubb = sub i32 %b, %nega
   %shifted = lshr i32 %val, %negasubb
@@ -848,6 +1026,13 @@ define i64 @reg64_lshr_by_sub_of_negated(i64 %val, i64 %a, i64 %b) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg64_lshr_by_sub_of_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    addl %edx, %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x01,0xd6]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i64 64, %a
   %negasubb = sub i64 %b, %nega
   %shifted = lshr i64 %val, %negasubb
@@ -876,6 +1061,13 @@ define i32 @reg32_lshr_by_add_to_negated(i32 %val, i32 %a, i32 %b) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg32_lshr_by_add_to_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    subl %esi, %edx, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x29,0xf2]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i32 32, %a
   %negasubb = add i32 %nega, %b
   %shifted = lshr i32 %val, %negasubb
@@ -910,6 +1102,13 @@ define i64 @reg64_lshr_by_add_to_negated(i64 %val, i64 %a, i64 %b) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg64_lshr_by_add_to_negated:
+; NDD:       # %bb.0:
+; NDD-NEXT:    subl %esi, %edx, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x29,0xf2]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i64 64, %a
   %negasubb = add i64 %nega, %b
   %shifted = lshr i64 %val, %negasubb
@@ -937,6 +1136,13 @@ define i32 @reg32_lshr_by_sub_of_negated_amts(i32 %val, i32 %a, i32 %b) nounwind
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg32_lshr_by_sub_of_negated_amts:
+; NDD:       # %bb.0:
+; NDD-NEXT:    subl %esi, %edx, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x29,0xf2]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i32 32, %a
   %negb = sub i32 32, %b
   %negasubnegb = sub i32 %nega, %negb
@@ -971,6 +1177,13 @@ define i64 @reg64_lshr_by_sub_of_negated_amts(i64 %val, i64 %a, i64 %b) nounwind
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg64_lshr_by_sub_of_negated_amts:
+; NDD:       # %bb.0:
+; NDD-NEXT:    subl %esi, %edx, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x29,0xf2]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i64 64, %a
   %negb = sub i64 64, %b
   %negasubnegb = sub i64 %nega, %negb
@@ -1002,6 +1215,13 @@ define i32 @reg32_lshr_by_add_of_negated_amts(i32 %val, i32 %a, i32 %b) nounwind
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg32_lshr_by_add_of_negated_amts:
+; NDD:       # %bb.0:
+; NDD-NEXT:    addl %edx, %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xd6]
+; NDD-NEXT:    negb %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xd8]
+; NDD-NEXT:    shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i32 32, %a
   %negb = sub i32 32, %b
   %negasubnegb = add i32 %nega, %negb
@@ -1038,6 +1258,13 @@ define i64 @reg64_lshr_by_add_of_negated_amts(i64 %val, i64 %a, i64 %b) nounwind
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg64_lshr_by_add_of_negated_amts:
+; NDD:       # %bb.0:
+; NDD-NEXT:    addl %edx, %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xd6]
+; NDD-NEXT:    negb %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xd8]
+; NDD-NEXT:    shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i64 64, %a
   %negb = sub i64 64, %b
   %negasubnegb = add i64 %nega, %negb
@@ -1066,6 +1293,12 @@ define i32 @reg32_lshr_by_negated_unfolded(i32 %val, i32 %shamt) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg32_lshr_by_negated_unfolded:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xde]
+; NDD-NEXT:    shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %negshamt = sub i32 0, %shamt
   %negaaddbitwidth = add i32 %negshamt, 32
   %shifted = lshr i32 %val, %negaaddbitwidth
@@ -1099,6 +1332,12 @@ define i64 @reg64_lshr_by_negated_unfolded(i64 %val, i64 %shamt) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg64_lshr_by_negated_unfolded:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xde]
+; NDD-NEXT:    shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %negshamt = sub i64 0, %shamt
   %negaaddbitwidth = add i64 %negshamt, 64
   %shifted = lshr i64 %val, %negaaddbitwidth
@@ -1126,6 +1365,13 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
+; NDD:       # %bb.0:
+; NDD-NEXT:    addl %edx, %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xd6]
+; NDD-NEXT:    negb %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xd8]
+; NDD-NEXT:    shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i32 0, %a
   %negaaddbitwidth = add i32 %nega, 32
   %negaaddbitwidthsubb = sub i32 %negaaddbitwidth, %b
@@ -1162,6 +1408,13 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg64_lshr_by_negated_unfolded_sub_b:
+; NDD:       # %bb.0:
+; NDD-NEXT:    addl %edx, %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xd6]
+; NDD-NEXT:    negb %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xd8]
+; NDD-NEXT:    shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i64 0, %a
   %negaaddbitwidth = add i64 %nega, 64
   %negaaddbitwidthsubb = sub i64 %negaaddbitwidth, %b
@@ -1188,6 +1441,13 @@ define i32 @reg32_lshr_by_b_sub_negated_unfolded(i32 %val, i32 %a, i32 %b) nounw
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg32_lshr_by_b_sub_negated_unfolded:
+; NDD:       # %bb.0:
+; NDD-NEXT:    addl %esi, %edx, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x01,0xf2]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i32 0, %a
   %negaaddbitwidth = add i32 %nega, 32
   %negaaddbitwidthsubb = sub i32 %b, %negaaddbitwidth
@@ -1222,6 +1482,13 @@ define i64 @reg64_lshr_by_b_sub_negated_unfolded(i64 %val, i64 %a, i64 %b) nounw
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg64_lshr_by_b_sub_negated_unfolded:
+; NDD:       # %bb.0:
+; NDD-NEXT:    addl %edx, %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x01,0xd6]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i64 0, %a
   %negaaddbitwidth = add i64 %nega, 64
   %negaaddbitwidthsubb = sub i64 %b, %negaaddbitwidth
@@ -1247,6 +1514,13 @@ define i32 @reg32_lshr_by_negated_unfolded_add_b(i32 %val, i32 %a, i32 %b) nounw
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg32_lshr_by_negated_unfolded_add_b:
+; NDD:       # %bb.0:
+; NDD-NEXT:    subl %esi, %edx, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x29,0xf2]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i32 0, %a
   %negaaddbitwidth = add i32 %nega, 32
   %negaaddbitwidthaddb = add i32 %negaaddbitwidth, %b
@@ -1282,6 +1556,13 @@ define i64 @reg64_lshr_by_negated_unfolded_add_b(i64 %val, i64 %a, i64 %b) nounw
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg64_lshr_by_negated_unfolded_add_b:
+; NDD:       # %bb.0:
+; NDD-NEXT:    subl %esi, %edx, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x29,0xf2]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i64 0, %a
   %negaaddbitwidth = add i64 %nega, 64
   %negaaddbitwidthaddb = add i64 %negaaddbitwidth, %b
@@ -1310,6 +1591,12 @@ define i32 @reg32_lshr_by_masked_negated_unfolded(i32 %val, i32 %shamt) nounwind
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg32_lshr_by_masked_negated_unfolded:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xde]
+; NDD-NEXT:    shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %negshamt = sub i32 0, %shamt
   %negaaddbitwidth = and i32 %negshamt, 31
   %shifted = lshr i32 %val, %negaaddbitwidth
@@ -1344,6 +1631,12 @@ define i64 @reg64_lshr_by_masked_negated_unfolded(i64 %val, i64 %shamt) nounwind
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg64_lshr_by_masked_negated_unfolded:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negb %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0xf6,0xde]
+; NDD-NEXT:    shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %negshamt = sub i64 0, %shamt
   %negaaddbitwidth = and i64 %negshamt, 63
   %shifted = lshr i64 %val, %negaaddbitwidth
@@ -1372,6 +1665,15 @@ define i32 @reg32_lshr_by_masked_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg32_lshr_by_masked_negated_unfolded_sub_b:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negl %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xf7,0xde]
+; NDD-NEXT:    andl $31, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xe0,0x1f]
+; NDD-NEXT:    subl %edx, %eax, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x29,0xd0]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i32 0, %a
   %negaaddbitwidth = and i32 %nega, 31
   %negaaddbitwidthsubb = sub i32 %negaaddbitwidth, %b
@@ -1410,6 +1712,15 @@ define i64 @reg64_lshr_by_masked_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg64_lshr_by_masked_negated_unfolded_sub_b:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negl %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xf7,0xde]
+; NDD-NEXT:    andl $63, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xe0,0x3f]
+; NDD-NEXT:    subl %edx, %eax, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x29,0xd0]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i64 0, %a
   %negaaddbitwidth = and i64 %nega, 63
   %negaaddbitwidthsubb = sub i64 %negaaddbitwidth, %b
@@ -1440,6 +1751,15 @@ define i32 @reg32_lshr_by_masked_b_sub_negated_unfolded(i32 %val, i32 %a, i32 %b
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg32_lshr_by_masked_b_sub_negated_unfolded:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negl %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xf7,0xde]
+; NDD-NEXT:    andl $31, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xe0,0x1f]
+; NDD-NEXT:    subl %eax, %edx, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x29,0xc2]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i32 0, %a
   %negaaddbitwidth = and i32 %nega, 31
   %negaaddbitwidthsubb = sub i32 %b, %negaaddbitwidth
@@ -1479,6 +1799,15 @@ define i64 @reg64_lshr_by_masked_b_sub_negated_unfolded(i64 %val, i64 %a, i64 %b
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg64_lshr_by_masked_b_sub_negated_unfolded:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negl %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xf7,0xde]
+; NDD-NEXT:    andl $63, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xe0,0x3f]
+; NDD-NEXT:    subl %eax, %edx, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x29,0xc2]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i64 0, %a
   %negaaddbitwidth = and i64 %nega, 63
   %negaaddbitwidthsubb = sub i64 %b, %negaaddbitwidth
@@ -1509,6 +1838,15 @@ define i32 @reg32_lshr_by_masked_negated_unfolded_add_b(i32 %val, i32 %a, i32 %b
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg32_lshr_by_masked_negated_unfolded_add_b:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negl %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xf7,0xde]
+; NDD-NEXT:    andl $31, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xe0,0x1f]
+; NDD-NEXT:    addl %edx, %eax, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x01,0xd0]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i32 0, %a
   %negaaddbitwidth = and i32 %nega, 31
   %negaaddbitwidthaddb = add i32 %negaaddbitwidth, %b
@@ -1546,6 +1884,15 @@ define i64 @reg64_lshr_by_masked_negated_unfolded_add_b(i64 %val, i64 %a, i64 %b
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: reg64_lshr_by_masked_negated_unfolded_add_b:
+; NDD:       # %bb.0:
+; NDD-NEXT:    negl %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xf7,0xde]
+; NDD-NEXT:    andl $63, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xe0,0x3f]
+; NDD-NEXT:    addl %edx, %eax, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x01,0xd0]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %nega = sub i64 0, %a
   %negaaddbitwidth = and i64 %nega, 63
   %negaaddbitwidthaddb = add i64 %negaaddbitwidth, %b
@@ -1569,6 +1916,13 @@ define i16 @sh_trunc_sh(i64 %x) {
 ; X64-NEXT:    andl $15, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: sh_trunc_sh:
+; NDD:       # %bb.0:
+; NDD-NEXT:    shrq $36, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x24]
+; NDD-NEXT:    andl $15, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xe0,0x0f]
+; NDD-NEXT:    # kill: def $ax killed $ax killed $eax
+; NDD-NEXT:    retq # encoding: [0xc3]
   %s = lshr i64 %x, 24
   %t = trunc i64 %s to i16
   %r = lshr i16 %t, 12
diff --git a/llvm/test/CodeGen/X86/shift-and-x86_64.ll b/llvm/test/CodeGen/X86/shift-and-x86_64.ll
index 9dd7974d0ff4d6..99f2fc8df1cc76 100644
--- a/llvm/test/CodeGen/X86/shift-and-x86_64.ll
+++ b/llvm/test/CodeGen/X86/shift-and-x86_64.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ndd --show-mc-encoding | FileCheck --check-prefix=NDD %s
 
 define { i64, i64 } @PR36721_u8(i64, i64, i8 zeroext) nounwind {
 ; CHECK-LABEL: PR36721_u8:
@@ -11,6 +12,14 @@ define { i64, i64 } @PR36721_u8(i64, i64, i8 zeroext) nounwind {
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; CHECK-NEXT:    shlq %cl, %rax
 ; CHECK-NEXT:    retq
+;
+; NDD-LABEL: PR36721_u8:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %edx, %ecx # encoding: [0x89,0xd1]
+; NDD-NEXT:    shldq %cl, %rdi, %rsi, %rdx # encoding: [0x62,0xf4,0xec,0x18,0xa5,0xfe]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shlq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xe7]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %4 = zext i64 %1 to i128
   %5 = shl nuw i128 %4, 64
   %6 = zext i64 %0 to i128
@@ -36,6 +45,14 @@ define { i64, i64 } @PR36721_u32(i64, i64, i32) nounwind {
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; CHECK-NEXT:    shlq %cl, %rax
 ; CHECK-NEXT:    retq
+;
+; NDD-LABEL: PR36721_u32:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %edx, %ecx # encoding: [0x89,0xd1]
+; NDD-NEXT:    shldq %cl, %rdi, %rsi, %rdx # encoding: [0x62,0xf4,0xec,0x18,0xa5,0xfe]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shlq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xe7]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %4 = zext i64 %1 to i128
   %5 = shl nuw i128 %4, 64
   %6 = zext i64 %0 to i128
diff --git a/llvm/test/CodeGen/X86/shift-and.ll b/llvm/test/CodeGen/X86/shift-and.ll
index f6d73b1fbc6e7c..77891a22b108d0 100644
--- a/llvm/test/CodeGen/X86/shift-and.ll
+++ b/llvm/test/CodeGen/X86/shift-and.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i386-unknown-unknown   | FileCheck %s --check-prefix=X32
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ndd --show-mc-encoding | FileCheck %s --check-prefix=NDD
 
 define i32 @t1(i32 %t, i32 %val) nounwind {
 ; X32-LABEL: t1:
@@ -17,6 +18,13 @@ define i32 @t1(i32 %t, i32 %val) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shll %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: t1:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %edi, %ecx # encoding: [0x89,0xf9]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shll %cl, %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xe6]
+; NDD-NEXT:    retq # encoding: [0xc3]
        %shamt = and i32 %t, 31
        %res = shl i32 %val, %shamt
        ret i32 %res
@@ -37,6 +45,13 @@ define i32 @t2(i32 %t, i32 %val) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shll %cl, %eax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: t2:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %edi, %ecx # encoding: [0x89,0xf9]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    shll %cl, %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xe6]
+; NDD-NEXT:    retq # encoding: [0xc3]
        %shamt = and i32 %t, 63
        %res = shl i32 %val, %shamt
        ret i32 %res
@@ -57,6 +72,14 @@ define void @t3(i16 %t) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    sarw %cl, X(%rip)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: t3:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %edi, %ecx # encoding: [0x89,0xf9]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $ecx
+; NDD-NEXT:    sarw %cl, X(%rip) # encoding: [0x66,0xd3,0x3d,A,A,A,A]
+; NDD-NEXT:    # fixup A - offset: 3, value: X-4, kind: reloc_riprel_4byte
+; NDD-NEXT:    retq # encoding: [0xc3]
        %shamt = and i16 %t, 31
        %tmp = load i16, ptr @X
        %tmp1 = ashr i16 %tmp, %shamt
@@ -90,6 +113,13 @@ define i64 @t4(i64 %t, i64 %val) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: t4:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movq %rdi, %rcx # encoding: [0x48,0x89,0xf9]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $rcx
+; NDD-NEXT:    shrq %cl, %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xee]
+; NDD-NEXT:    retq # encoding: [0xc3]
        %shamt = and i64 %t, 63
        %res = lshr i64 %val, %shamt
        ret i64 %res
@@ -121,6 +151,13 @@ define i64 @t5(i64 %t, i64 %val) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: t5:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movq %rdi, %rcx # encoding: [0x48,0x89,0xf9]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $rcx
+; NDD-NEXT:    shrq %cl, %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xee]
+; NDD-NEXT:    retq # encoding: [0xc3]
        %shamt = and i64 %t, 191
        %res = lshr i64 %val, %shamt
        ret i64 %res
@@ -156,6 +193,13 @@ define void @t5ptr(i64 %t, ptr %ptr) nounwind {
 ; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shrq %cl, (%rsi)
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: t5ptr:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movq %rdi, %rcx # encoding: [0x48,0x89,0xf9]
+; NDD-NEXT:    # kill: def $cl killed $cl killed $rcx
+; NDD-NEXT:    shrq %cl, (%rsi) # encoding: [0x48,0xd3,0x2e]
+; NDD-NEXT:    retq # encoding: [0xc3]
        %shamt = and i64 %t, 191
        %tmp = load i64, ptr %ptr
        %tmp1 = lshr i64 %tmp, %shamt
@@ -191,6 +235,13 @@ define i64 @t6(i64 %key, ptr nocapture %val) nounwind {
 ; X64-NEXT:    decq %rax
 ; X64-NEXT:    andq %rdi, %rax
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: t6:
+; NDD:       # %bb.0:
+; NDD-NEXT:    shrq $3, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x03]
+; NDD-NEXT:    decq (%rsi), %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xff,0x0e]
+; NDD-NEXT:    andq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x21,0xc8]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %shr = lshr i64 %key, 3
   %1 = load i64, ptr %val, align 8
   %sub = add i64 %1, 2305843009213693951
@@ -213,6 +264,13 @@ define i64 @big_mask_constant(i64 %x) nounwind {
 ; X64-NEXT:    shrq $7, %rax
 ; X64-NEXT:    andl $134217728, %eax # imm = 0x8000000
 ; X64-NEXT:    retq
+;
+; NDD-LABEL: big_mask_constant:
+; NDD:       # %bb.0:
+; NDD-NEXT:    shrq $7, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x07]
+; NDD-NEXT:    andl $134217728, %eax # EVEX TO LEGACY Compression encoding: [0x25,0x00,0x00,0x00,0x08]
+; NDD-NEXT:    # imm = 0x8000000
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %x, 17179869184 ; 0x400000000
   %sh = lshr i64 %and, 7
   ret i64 %sh

>From 2db4eb20434d962d0ded97b741fd057106f7e172 Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Mon, 22 Jan 2024 13:49:59 +0800
Subject: [PATCH 3/4] address review comment

---
 llvm/lib/Target/X86/X86InstrCompiler.td    |   1 -
 llvm/lib/Target/X86/X86InstrShiftRotate.td | 254 ++++++++++-----------
 llvm/lib/Target/X86/X86InstrUtils.td       |  24 +-
 3 files changed, 139 insertions(+), 140 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 14507b1d3c7ee4..9f1712274bc304 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1844,7 +1844,6 @@ defm SAR : MaskedShiftAmountPats<sra>;
 // not tracking flags for these nodes.
 multiclass MaskedRotateAmountPats<SDNode frag> {
   // (rot x (and y, BitWidth - 1)) ==> (rot x, y)
-  // (rot x (and y, 63)) ==> (rot x, y)
   let Predicates = [NoNDD] in {
     def : Pat<(frag GR8:$src1, (shiftMask8 CL)),
               (!cast<Instruction>(NAME # "8rCL") GR8:$src1)>;
diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td
index 7e2893f340973a..2a5488847e6489 100644
--- a/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -33,10 +33,10 @@ multiclass ShiftRotate<string m, Format RegMRM, Format MemMRM, SDPatternOperator
         def 64ri_ND : BinOpRI8U_R<m, RegMRM, Xi64, node, 1>, Sched<[ri]>, DefEFLAGS;
       }
       let Predicates = [In64BitMode] in {
-        def 8ri_EVEX  : BinOpRI8U_R<m, RegMRM, Xi8, null_frag>, Sched<[ri]>, DefEFLAGS, PL;
-        def 16ri_EVEX : BinOpRI8U_R<m, RegMRM, Xi16, null_frag>, Sched<[ri]>, DefEFLAGS, PL, PD;
-        def 32ri_EVEX : BinOpRI8U_R<m, RegMRM, Xi32, null_frag>, Sched<[ri]>, DefEFLAGS, PL;
-        def 64ri_EVEX : BinOpRI8U_R<m, RegMRM, Xi64, null_frag>, Sched<[ri]>, DefEFLAGS, PL;
+        def 8ri_EVEX  : BinOpRI8U_R<m, RegMRM, Xi8>, Sched<[ri]>, DefEFLAGS, PL;
+        def 16ri_EVEX : BinOpRI8U_R<m, RegMRM, Xi16>, Sched<[ri]>, DefEFLAGS, PL, PD;
+        def 32ri_EVEX : BinOpRI8U_R<m, RegMRM, Xi32>, Sched<[ri]>, DefEFLAGS, PL;
+        def 64ri_EVEX : BinOpRI8U_R<m, RegMRM, Xi64>, Sched<[ri]>, DefEFLAGS, PL;
       }
     }
 
@@ -51,95 +51,95 @@ multiclass ShiftRotate<string m, Format RegMRM, Format MemMRM, SDPatternOperator
       def 64mi_ND : BinOpMI8U_R<m, MemMRM, Xi64, node>, Sched<[mi, ri]>, DefEFLAGS;
     }
     let Predicates = [In64BitMode] in {
-      def 8mi_EVEX  : BinOpMI8U_M<m, MemMRM, Xi8, null_frag>, Sched<[mi, WriteRMW]>, DefEFLAGS, PL;
-      def 16mi_EVEX : BinOpMI8U_M<m, MemMRM, Xi16, null_frag>, Sched<[mi, WriteRMW]>, DefEFLAGS, PL, PD;
-      def 32mi_EVEX : BinOpMI8U_M<m, MemMRM, Xi32, null_frag>, Sched<[mi, WriteRMW]>, DefEFLAGS, PL;
-      def 64mi_EVEX : BinOpMI8U_M<m, MemMRM, Xi64, null_frag>, Sched<[mi, WriteRMW]>, DefEFLAGS, PL;
+      def 8mi_EVEX  : BinOpMI8U_M<m, MemMRM, Xi8>, Sched<[mi, WriteRMW]>, DefEFLAGS, PL;
+      def 16mi_EVEX : BinOpMI8U_M<m, MemMRM, Xi16>, Sched<[mi, WriteRMW]>, DefEFLAGS, PL, PD;
+      def 32mi_EVEX : BinOpMI8U_M<m, MemMRM, Xi32>, Sched<[mi, WriteRMW]>, DefEFLAGS, PL;
+      def 64mi_EVEX : BinOpMI8U_M<m, MemMRM, Xi64>, Sched<[mi, WriteRMW]>, DefEFLAGS, PL;
     }
 
     let SchedRW = [ri] in {
-      def 8r1  : UnaryOpR_RF<0xD1, RegMRM, m, Xi8, null_frag>;
-      def 16r1 : UnaryOpR_RF<0xD1, RegMRM, m, Xi16, null_frag>, OpSize16;
-      def 32r1 : UnaryOpR_RF<0xD1, RegMRM, m, Xi32, null_frag>, OpSize32;
-      def 64r1 : UnaryOpR_RF<0xD1, RegMRM, m, Xi64, null_frag>;
+      def 8r1  : UnaryOpR_RF<0xD1, RegMRM, m, Xi8>;
+      def 16r1 : UnaryOpR_RF<0xD1, RegMRM, m, Xi16>, OpSize16;
+      def 32r1 : UnaryOpR_RF<0xD1, RegMRM, m, Xi32>, OpSize32;
+      def 64r1 : UnaryOpR_RF<0xD1, RegMRM, m, Xi64>;
 
       // FIXME: Assembler can't tell whether it's 8r1_ND or 8rCL when the source register is cl, e.g.
       //
       //  shlb %cl, %al
       //
       // GNU binutils distinguish them by adding an explicit $1 to asm string of 8r1_ND. But we haven't support
-      // constant immediate in ams string for X86 in TD. So we add DisassembleOnly for 8r1_ND for the time being.
+      // constant immediate in asm string for X86 in TD. So we add DisassembleOnly for 8r1_ND for the time being.
       let Predicates = [In64BitMode] in {
         def 8r1_ND  : UnaryOpR_RF<0xD1, RegMRM, m, Xi8, null_frag, 1>, DisassembleOnly;
         def 16r1_ND : UnaryOpR_RF<0xD1, RegMRM, m, Xi16, null_frag, 1>, PD;
         def 32r1_ND : UnaryOpR_RF<0xD1, RegMRM, m, Xi32, null_frag, 1>;
         def 64r1_ND : UnaryOpR_RF<0xD1, RegMRM, m, Xi64, null_frag, 1>;
 
-        def 8r1_EVEX  : UnaryOpR_RF<0xD1, RegMRM, m, Xi8, null_frag>, PL;
-        def 16r1_EVEX : UnaryOpR_RF<0xD1, RegMRM, m, Xi16, null_frag>, PL, PD;
-        def 32r1_EVEX : UnaryOpR_RF<0xD1, RegMRM, m, Xi32, null_frag>, PL;
-        def 64r1_EVEX : UnaryOpR_RF<0xD1, RegMRM, m, Xi64, null_frag>, PL;
+        def 8r1_EVEX  : UnaryOpR_RF<0xD1, RegMRM, m, Xi8>, PL;
+        def 16r1_EVEX : UnaryOpR_RF<0xD1, RegMRM, m, Xi16>, PL, PD;
+        def 32r1_EVEX : UnaryOpR_RF<0xD1, RegMRM, m, Xi32>, PL;
+        def 64r1_EVEX : UnaryOpR_RF<0xD1, RegMRM, m, Xi64>, PL;
       }
     }
 
     let SchedRW = [mi, WriteRMW] in {
-      def 8m1  : UnaryOpM_MF<0xD1, MemMRM, m, Xi8, null_frag>;
-      def 16m1 : UnaryOpM_MF<0xD1, MemMRM, m, Xi16, null_frag>, OpSize16;
-      def 32m1 : UnaryOpM_MF<0xD1, MemMRM, m, Xi32, null_frag>, OpSize32;
-      def 64m1 : UnaryOpM_MF<0xD1, MemMRM, m, Xi64, null_frag>, Requires<[In64BitMode]>;
+      def 8m1  : UnaryOpM_MF<0xD1, MemMRM, m, Xi8>;
+      def 16m1 : UnaryOpM_MF<0xD1, MemMRM, m, Xi16>, OpSize16;
+      def 32m1 : UnaryOpM_MF<0xD1, MemMRM, m, Xi32>, OpSize32;
+      def 64m1 : UnaryOpM_MF<0xD1, MemMRM, m, Xi64>, Requires<[In64BitMode]>;
 
       let Predicates = [In64BitMode] in {
-        def 8m1_EVEX  : UnaryOpM_MF<0xD1, MemMRM, m, Xi8, null_frag>, PL;
-        def 16m1_EVEX : UnaryOpM_MF<0xD1, MemMRM, m, Xi16, null_frag>, PL, PD;
-        def 32m1_EVEX : UnaryOpM_MF<0xD1, MemMRM, m, Xi32, null_frag>, PL;
-        def 64m1_EVEX : UnaryOpM_MF<0xD1, MemMRM, m, Xi64, null_frag>, PL;
+        def 8m1_EVEX  : UnaryOpM_MF<0xD1, MemMRM, m, Xi8>, PL;
+        def 16m1_EVEX : UnaryOpM_MF<0xD1, MemMRM, m, Xi16>, PL, PD;
+        def 32m1_EVEX : UnaryOpM_MF<0xD1, MemMRM, m, Xi32>, PL;
+        def 64m1_EVEX : UnaryOpM_MF<0xD1, MemMRM, m, Xi64>, PL;
       }
     }
     let SchedRW = [mi, ri], Predicates = [In64BitMode] in {
-      def 8m1_ND  : UnaryOpM_RF<0xD1, MemMRM, m, Xi8, null_frag>;
-      def 16m1_ND : UnaryOpM_RF<0xD1, MemMRM, m, Xi16, null_frag>, PD;
-      def 32m1_ND : UnaryOpM_RF<0xD1, MemMRM, m, Xi32, null_frag>;
-      def 64m1_ND : UnaryOpM_RF<0xD1, MemMRM, m, Xi64, null_frag>;
+      def 8m1_ND  : UnaryOpM_RF<0xD1, MemMRM, m, Xi8>;
+      def 16m1_ND : UnaryOpM_RF<0xD1, MemMRM, m, Xi16>, PD;
+      def 32m1_ND : UnaryOpM_RF<0xD1, MemMRM, m, Xi32>;
+      def 64m1_ND : UnaryOpM_RF<0xD1, MemMRM, m, Xi64>;
     }
   }
 
-  let Uses = !listconcat([CL], uses) in {
+  let Uses = !listconcat([CL], uses), Defs = [EFLAGS] in {
     let Predicates = [NoNDD] in {
-      def 8rCL  : BinOpRC_R<m, RegMRM, Xi8, node>, Sched<[rCL]>, DefEFLAGS;
-      def 16rCL : BinOpRC_R<m, RegMRM, Xi16, node>, Sched<[rCL]>, DefEFLAGS, OpSize16;
-      def 32rCL : BinOpRC_R<m, RegMRM, Xi32, node>, Sched<[rCL]>, DefEFLAGS, OpSize32;
-      def 64rCL : BinOpRC_R<m, RegMRM, Xi64, node>, Sched<[rCL]>, DefEFLAGS;
+      def 8rCL  : BinOpRC_R<m, RegMRM, Xi8, node>, Sched<[rCL]>;
+      def 16rCL : BinOpRC_R<m, RegMRM, Xi16, node>, Sched<[rCL]>, OpSize16;
+      def 32rCL : BinOpRC_R<m, RegMRM, Xi32, node>, Sched<[rCL]>, OpSize32;
+      def 64rCL : BinOpRC_R<m, RegMRM, Xi64, node>, Sched<[rCL]>;
     }
     let Predicates = [HasNDD, In64BitMode] in {
-      def 8rCL_ND  : BinOpRC_R<m, RegMRM, Xi8, node, 1>, Sched<[rCL]>, DefEFLAGS;
-      def 16rCL_ND : BinOpRC_R<m, RegMRM, Xi16, node, 1>, Sched<[rCL]>, DefEFLAGS, PD;
-      def 32rCL_ND : BinOpRC_R<m, RegMRM, Xi32, node, 1>, Sched<[rCL]>, DefEFLAGS;
-      def 64rCL_ND : BinOpRC_R<m, RegMRM, Xi64, node, 1>, Sched<[rCL]>, DefEFLAGS;
+      def 8rCL_ND  : BinOpRC_R<m, RegMRM, Xi8, node, 1>, Sched<[rCL]>;
+      def 16rCL_ND : BinOpRC_R<m, RegMRM, Xi16, node, 1>, Sched<[rCL]>, PD;
+      def 32rCL_ND : BinOpRC_R<m, RegMRM, Xi32, node, 1>, Sched<[rCL]>;
+      def 64rCL_ND : BinOpRC_R<m, RegMRM, Xi64, node, 1>, Sched<[rCL]>;
     }
     let Predicates = [In64BitMode] in {
-      def 8rCL_EVEX  : BinOpRC_R<m, RegMRM, Xi8, null_frag>, Sched<[rCL]>, DefEFLAGS, PL;
-      def 16rCL_EVEX : BinOpRC_R<m, RegMRM, Xi16, null_frag>, Sched<[rCL]>, DefEFLAGS, PL, PD;
-      def 32rCL_EVEX : BinOpRC_R<m, RegMRM, Xi32, null_frag>, Sched<[rCL]>, DefEFLAGS, PL;
-      def 64rCL_EVEX : BinOpRC_R<m, RegMRM, Xi64, null_frag>, Sched<[rCL]>, DefEFLAGS, PL;
+      def 8rCL_EVEX  : BinOpRC_R<m, RegMRM, Xi8>, Sched<[rCL]>, PL;
+      def 16rCL_EVEX : BinOpRC_R<m, RegMRM, Xi16>, Sched<[rCL]>, PL, PD;
+      def 32rCL_EVEX : BinOpRC_R<m, RegMRM, Xi32>, Sched<[rCL]>, PL;
+      def 64rCL_EVEX : BinOpRC_R<m, RegMRM, Xi64>, Sched<[rCL]>, PL;
     }
 
-    def 8mCL  : BinOpMC_M<m, MemMRM, Xi8, node>, Sched<[mCL, WriteRMW]>, DefEFLAGS;
-    def 16mCL : BinOpMC_M<m, MemMRM, Xi16, node>, Sched<[mCL, WriteRMW]>, DefEFLAGS, OpSize16;
-    def 32mCL : BinOpMC_M<m, MemMRM, Xi32, node>, Sched<[mCL, WriteRMW]>, DefEFLAGS, OpSize32;
-    def 64mCL : BinOpMC_M<m, MemMRM, Xi64, node>, Sched<[mCL, WriteRMW]>, DefEFLAGS, Requires<[In64BitMode]>;
+    def 8mCL  : BinOpMC_M<m, MemMRM, Xi8, node>, Sched<[mCL, WriteRMW]>;
+    def 16mCL : BinOpMC_M<m, MemMRM, Xi16, node>, Sched<[mCL, WriteRMW]>, OpSize16;
+    def 32mCL : BinOpMC_M<m, MemMRM, Xi32, node>, Sched<[mCL, WriteRMW]>, OpSize32;
+    def 64mCL : BinOpMC_M<m, MemMRM, Xi64, node>, Sched<[mCL, WriteRMW]>, Requires<[In64BitMode]>;
 
     let Predicates = [HasNDD, In64BitMode] in {
-      def 8mCL_ND  : BinOpMC_R<m, MemMRM, Xi8, node>, Sched<[mCL, rCL]>, DefEFLAGS;
-      def 16mCL_ND : BinOpMC_R<m, MemMRM, Xi16, node>, Sched<[mCL, rCL]>, DefEFLAGS, PD;
-      def 32mCL_ND : BinOpMC_R<m, MemMRM, Xi32, node>, Sched<[mCL, rCL]>, DefEFLAGS;
-      def 64mCL_ND : BinOpMC_R<m, MemMRM, Xi64, node>, Sched<[mCL, rCL]>, DefEFLAGS;
+      def 8mCL_ND  : BinOpMC_R<m, MemMRM, Xi8, node>, Sched<[mCL, rCL]>;
+      def 16mCL_ND : BinOpMC_R<m, MemMRM, Xi16, node>, Sched<[mCL, rCL]>, PD;
+      def 32mCL_ND : BinOpMC_R<m, MemMRM, Xi32, node>, Sched<[mCL, rCL]>;
+      def 64mCL_ND : BinOpMC_R<m, MemMRM, Xi64, node>, Sched<[mCL, rCL]>;
     }
 
     let Predicates = [In64BitMode] in {
-      def 8mCL_EVEX  : BinOpMC_M<m, MemMRM, Xi8, null_frag>, Sched<[mCL, WriteRMW]>, DefEFLAGS, PL;
-      def 16mCL_EVEX : BinOpMC_M<m, MemMRM, Xi16, null_frag>, Sched<[mCL, WriteRMW]>, DefEFLAGS, PL, PD;
-      def 32mCL_EVEX : BinOpMC_M<m, MemMRM, Xi32, null_frag>, Sched<[mCL, WriteRMW]>, DefEFLAGS, PL;
-      def 64mCL_EVEX : BinOpMC_M<m, MemMRM, Xi64, null_frag>, Sched<[mCL, WriteRMW]>, DefEFLAGS, PL;
+      def 8mCL_EVEX  : BinOpMC_M<m, MemMRM, Xi8>, Sched<[mCL, WriteRMW]>, PL;
+      def 16mCL_EVEX : BinOpMC_M<m, MemMRM, Xi16>, Sched<[mCL, WriteRMW]>, PL, PD;
+      def 32mCL_EVEX : BinOpMC_M<m, MemMRM, Xi32>, Sched<[mCL, WriteRMW]>, PL;
+      def 64mCL_EVEX : BinOpMC_M<m, MemMRM, Xi64>, Sched<[mCL, WriteRMW]>, PL;
     }
   }
 }
@@ -148,10 +148,10 @@ multiclass ShiftRotate_NF<string m, Format RegMRM, Format MemMRM, SchedReadWrite
                           SchedReadWrite ri, SchedReadWrite mCL, SchedReadWrite mi> {
   let Predicates = [In64BitMode] in {
     let isConvertibleToThreeAddress = !if(!eq(m, "shl"), 1, 0) in {
-      def 8ri_NF  : BinOpRI8U_R<m, RegMRM, Xi8, null_frag>, Sched<[ri]>, NF;
-      def 16ri_NF : BinOpRI8U_R<m, RegMRM, Xi16, null_frag>, Sched<[ri]>, NF, PD;
-      def 32ri_NF : BinOpRI8U_R<m, RegMRM, Xi32, null_frag>, Sched<[ri]>, NF;
-      def 64ri_NF : BinOpRI8U_R<m, RegMRM, Xi64, null_frag>, Sched<[ri]>, NF;
+      def 8ri_NF  : BinOpRI8U_R<m, RegMRM, Xi8>, Sched<[ri]>, NF;
+      def 16ri_NF : BinOpRI8U_R<m, RegMRM, Xi16>, Sched<[ri]>, NF, PD;
+      def 32ri_NF : BinOpRI8U_R<m, RegMRM, Xi32>, Sched<[ri]>, NF;
+      def 64ri_NF : BinOpRI8U_R<m, RegMRM, Xi64>, Sched<[ri]>, NF;
 
       def 8ri_NF_ND  : BinOpRI8U_R<m, RegMRM, Xi8, null_frag, 1>, Sched<[ri]>, EVEX_NF;
       def 16ri_NF_ND : BinOpRI8U_R<m, RegMRM, Xi16, null_frag, 1>, Sched<[ri]>, EVEX_NF, PD;
@@ -159,15 +159,15 @@ multiclass ShiftRotate_NF<string m, Format RegMRM, Format MemMRM, SchedReadWrite
       def 64ri_NF_ND : BinOpRI8U_R<m, RegMRM, Xi64, null_frag, 1>, Sched<[ri]>, EVEX_NF;
     }
 
-    def 8mi_NF  : BinOpMI8U_M<m, MemMRM, Xi8, null_frag>, Sched<[mi, WriteRMW]>, NF;
-    def 16mi_NF : BinOpMI8U_M<m, MemMRM, Xi16, null_frag>, Sched<[mi, WriteRMW]>, NF, PD;
-    def 32mi_NF : BinOpMI8U_M<m, MemMRM, Xi32, null_frag>, Sched<[mi, WriteRMW]>, NF;
-    def 64mi_NF : BinOpMI8U_M<m, MemMRM, Xi64, null_frag>, Sched<[mi, WriteRMW]>, NF;
+    def 8mi_NF  : BinOpMI8U_M<m, MemMRM, Xi8>, Sched<[mi, WriteRMW]>, NF;
+    def 16mi_NF : BinOpMI8U_M<m, MemMRM, Xi16>, Sched<[mi, WriteRMW]>, NF, PD;
+    def 32mi_NF : BinOpMI8U_M<m, MemMRM, Xi32>, Sched<[mi, WriteRMW]>, NF;
+    def 64mi_NF : BinOpMI8U_M<m, MemMRM, Xi64>, Sched<[mi, WriteRMW]>, NF;
 
-    def 8mi_NF_ND  : BinOpMI8U_R<m, MemMRM, Xi8, null_frag>, Sched<[mi, ri]>, EVEX_NF;
-    def 16mi_NF_ND : BinOpMI8U_R<m, MemMRM, Xi16, null_frag>, Sched<[mi, ri]>, EVEX_NF, PD;
-    def 32mi_NF_ND : BinOpMI8U_R<m, MemMRM, Xi32, null_frag>, Sched<[mi, ri]>, EVEX_NF;
-    def 64mi_NF_ND : BinOpMI8U_R<m, MemMRM, Xi64, null_frag>, Sched<[mi, ri]>, EVEX_NF;
+    def 8mi_NF_ND  : BinOpMI8U_R<m, MemMRM, Xi8>, Sched<[mi, ri]>, EVEX_NF;
+    def 16mi_NF_ND : BinOpMI8U_R<m, MemMRM, Xi16>, Sched<[mi, ri]>, EVEX_NF, PD;
+    def 32mi_NF_ND : BinOpMI8U_R<m, MemMRM, Xi32>, Sched<[mi, ri]>, EVEX_NF;
+    def 64mi_NF_ND : BinOpMI8U_R<m, MemMRM, Xi64>, Sched<[mi, ri]>, EVEX_NF;
 
     let SchedRW = [ri] in {
       // FIXME: Assembler can't tell whether it's 8r1_NF_ND or 8rCL_NF when the source register is cl, e.g.
@@ -175,11 +175,11 @@ multiclass ShiftRotate_NF<string m, Format RegMRM, Format MemMRM, SchedReadWrite
       //  {nf} shlb %cl, %al
       //
       // GNU binutils distinguish them by adding an explicit $1 to asm string of 8r1_NF_ND. But we haven't support
-      // constant immediate in ams string for X86 in TD. So we add DisassembleOnly for 8r1_NF_ND for the time being.
-      def 8r1_NF  : UnaryOpR_R<0xD1, RegMRM, m, Xi8, null_frag>, NF;
-      def 16r1_NF : UnaryOpR_R<0xD1, RegMRM, m, Xi16, null_frag>, NF, PD;
-      def 32r1_NF : UnaryOpR_R<0xD1, RegMRM, m, Xi32, null_frag>, NF;
-      def 64r1_NF : UnaryOpR_R<0xD1, RegMRM, m, Xi64, null_frag>, NF;
+      // constant immediate in asm string for X86 in TD. So we add DisassembleOnly for 8r1_NF_ND for the time being.
+      def 8r1_NF  : UnaryOpR_R<0xD1, RegMRM, m, Xi8>, NF;
+      def 16r1_NF : UnaryOpR_R<0xD1, RegMRM, m, Xi16>, NF, PD;
+      def 32r1_NF : UnaryOpR_R<0xD1, RegMRM, m, Xi32>, NF;
+      def 64r1_NF : UnaryOpR_R<0xD1, RegMRM, m, Xi64>, NF;
 
       def 8r1_NF_ND  : UnaryOpR_R<0xD1, RegMRM, m, Xi8, null_frag, 1>, EVEX_NF, DisassembleOnly;
       def 16r1_NF_ND : UnaryOpR_R<0xD1, RegMRM, m, Xi16, null_frag, 1>, EVEX_NF, PD;
@@ -188,38 +188,38 @@ multiclass ShiftRotate_NF<string m, Format RegMRM, Format MemMRM, SchedReadWrite
     }
 
     let SchedRW = [mi, WriteRMW] in {
-      def 8m1_NF  : UnaryOpM_M<0xD1, MemMRM, m, Xi8, null_frag>, NF;
-      def 16m1_NF : UnaryOpM_M<0xD1, MemMRM, m, Xi16, null_frag>, NF, PD;
-      def 32m1_NF : UnaryOpM_M<0xD1, MemMRM, m, Xi32, null_frag>, NF;
-      def 64m1_NF : UnaryOpM_M<0xD1, MemMRM, m, Xi64, null_frag>, NF;
+      def 8m1_NF  : UnaryOpM_M<0xD1, MemMRM, m, Xi8>, NF;
+      def 16m1_NF : UnaryOpM_M<0xD1, MemMRM, m, Xi16>, NF, PD;
+      def 32m1_NF : UnaryOpM_M<0xD1, MemMRM, m, Xi32>, NF;
+      def 64m1_NF : UnaryOpM_M<0xD1, MemMRM, m, Xi64>, NF;
     }
     let SchedRW = [mi, ri] in {
-      def 8m1_NF_ND  : UnaryOpM_R<0xD1, MemMRM, m, Xi8, null_frag>, EVEX_NF;
-      def 16m1_NF_ND : UnaryOpM_R<0xD1, MemMRM, m, Xi16, null_frag>, EVEX_NF, PD;
-      def 32m1_NF_ND : UnaryOpM_R<0xD1, MemMRM, m, Xi32, null_frag>, EVEX_NF;
-      def 64m1_NF_ND : UnaryOpM_R<0xD1, MemMRM, m, Xi64, null_frag>, EVEX_NF;
+      def 8m1_NF_ND  : UnaryOpM_R<0xD1, MemMRM, m, Xi8>, EVEX_NF;
+      def 16m1_NF_ND : UnaryOpM_R<0xD1, MemMRM, m, Xi16>, EVEX_NF, PD;
+      def 32m1_NF_ND : UnaryOpM_R<0xD1, MemMRM, m, Xi32>, EVEX_NF;
+      def 64m1_NF_ND : UnaryOpM_R<0xD1, MemMRM, m, Xi64>, EVEX_NF;
     }
 
     let Uses = [CL] in {
-      def 8rCL_NF  : BinOpRC_R<m, RegMRM, Xi8, null_frag>, Sched<[rCL]>, NF;
-      def 16rCL_NF : BinOpRC_R<m, RegMRM, Xi16, null_frag>, Sched<[rCL]>, NF, PD;
-      def 32rCL_NF : BinOpRC_R<m, RegMRM, Xi32, null_frag>, Sched<[rCL]>, NF;
-      def 64rCL_NF : BinOpRC_R<m, RegMRM, Xi64, null_frag>, Sched<[rCL]>, NF;
+      def 8rCL_NF  : BinOpRC_R<m, RegMRM, Xi8>, Sched<[rCL]>, NF;
+      def 16rCL_NF : BinOpRC_R<m, RegMRM, Xi16>, Sched<[rCL]>, NF, PD;
+      def 32rCL_NF : BinOpRC_R<m, RegMRM, Xi32>, Sched<[rCL]>, NF;
+      def 64rCL_NF : BinOpRC_R<m, RegMRM, Xi64>, Sched<[rCL]>, NF;
 
       def 8rCL_NF_ND  : BinOpRC_R<m, RegMRM, Xi8, null_frag, 1>, Sched<[rCL]>, EVEX_NF;
       def 16rCL_NF_ND : BinOpRC_R<m, RegMRM, Xi16, null_frag, 1>, Sched<[rCL]>, EVEX_NF, PD;
       def 32rCL_NF_ND : BinOpRC_R<m, RegMRM, Xi32, null_frag, 1>, Sched<[rCL]>, EVEX_NF;
       def 64rCL_NF_ND : BinOpRC_R<m, RegMRM, Xi64, null_frag, 1>, Sched<[rCL]>, EVEX_NF;
 
-      def 8mCL_NF  : BinOpMC_M<m, MemMRM, Xi8, null_frag>, Sched<[mCL, WriteRMW]>, NF;
-      def 16mCL_NF : BinOpMC_M<m, MemMRM, Xi16, null_frag>, Sched<[mCL, WriteRMW]>, NF, PD;
-      def 32mCL_NF : BinOpMC_M<m, MemMRM, Xi32, null_frag>, Sched<[mCL, WriteRMW]>, NF;
-      def 64mCL_NF : BinOpMC_M<m, MemMRM, Xi64, null_frag>, Sched<[mCL, WriteRMW]>, NF;
+      def 8mCL_NF  : BinOpMC_M<m, MemMRM, Xi8>, Sched<[mCL, WriteRMW]>, NF;
+      def 16mCL_NF : BinOpMC_M<m, MemMRM, Xi16>, Sched<[mCL, WriteRMW]>, NF, PD;
+      def 32mCL_NF : BinOpMC_M<m, MemMRM, Xi32>, Sched<[mCL, WriteRMW]>, NF;
+      def 64mCL_NF : BinOpMC_M<m, MemMRM, Xi64>, Sched<[mCL, WriteRMW]>, NF;
 
-      def 8mCL_NF_ND  : BinOpMC_R<m, MemMRM, Xi8, null_frag>, Sched<[mCL, rCL]>, EVEX_NF;
-      def 16mCL_NF_ND : BinOpMC_R<m, MemMRM, Xi16, null_frag>, Sched<[mCL, rCL]>, EVEX_NF, PD;
-      def 32mCL_NF_ND : BinOpMC_R<m, MemMRM, Xi32, null_frag>, Sched<[mCL, rCL]>, EVEX_NF;
-      def 64mCL_NF_ND : BinOpMC_R<m, MemMRM, Xi64, null_frag>, Sched<[mCL, rCL]>, EVEX_NF;
+      def 8mCL_NF_ND  : BinOpMC_R<m, MemMRM, Xi8>, Sched<[mCL, rCL]>, EVEX_NF;
+      def 16mCL_NF_ND : BinOpMC_R<m, MemMRM, Xi16>, Sched<[mCL, rCL]>, EVEX_NF, PD;
+      def 32mCL_NF_ND : BinOpMC_R<m, MemMRM, Xi32>, Sched<[mCL, rCL]>, EVEX_NF;
+      def 64mCL_NF_ND : BinOpMC_R<m, MemMRM, Xi64>, Sched<[mCL, rCL]>, EVEX_NF;
     }
   }
 }
@@ -343,7 +343,7 @@ let Predicates = [HasNDD] in {
 // Double precision shift instructions (generalizations of rotate)
 //===----------------------------------------------------------------------===//
 
-class ShlrdOpRRI8U_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0>
+class ShlrdOpRRI8U_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node = null_frag, bit ndd = 0>
   : ITy<o, MRMDestReg, t, (outs t.RegClass:$dst),
         (ins t.RegClass:$src1, t.RegClass:$src2, u8imm:$src3), m, !if(!eq(ndd, 0), triop_args, triop_ndd_args),
         []>, NDD<ndd> {
@@ -355,7 +355,7 @@ class ShlrdOpRRI8U_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node,
                     [(set t.RegClass:$dst, (node t.RegClass:$src2, t.RegClass:$src1, (i8 imm:$src3)))]);
 }
 
-class ShlrdOpRRC_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0>
+class ShlrdOpRRC_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node = null_frag, bit ndd = 0>
   : BinOpRR<o, m, !if(!eq(ndd, 0), triop_cl_args, triop_cl_ndd_args), t, (outs t.RegClass:$dst), []>, NDD<ndd> {
   let Uses = [CL];
   let SchedRW = [WriteSHDrrcl];
@@ -364,7 +364,7 @@ class ShlrdOpRRC_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, b
                     [(set t.RegClass:$dst, (node t.RegClass:$src2, t.RegClass:$src1, CL))]);
 }
 
-class ShlrdOpMRI8U_M<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+class ShlrdOpMRI8U_M<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node = null_frag>
   : ITy<o, MRMDestMem, t, (outs), (ins t.MemOperand:$src1, t.RegClass:$src2, u8imm:$src3),
         m, triop_args, []>, TB {
   let ImmT = Imm8;
@@ -376,7 +376,7 @@ class ShlrdOpMRI8U_M<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
                     [(store (node t.RegClass:$src2, (t.LoadNode addr:$src1), (i8 imm:$src3)), addr:$src1)]);
 }
 
-class ShlrdOpMRC_M<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+class ShlrdOpMRC_M<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node = null_frag>
   : BinOpMR<o, m, triop_cl_args, t, (outs), []>, TB {
   let Uses = [CL];
   let SchedRW = [WriteSHDmrcl];
@@ -386,7 +386,7 @@ class ShlrdOpMRC_M<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
                     [(store (node t.RegClass:$src2, (t.LoadNode addr:$src1), CL), addr:$src1)]);
 }
 
-class ShlrdOpMRI8U_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+class ShlrdOpMRI8U_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node = null_frag>
   : ITy<o, MRMDestMem, t, (outs t.RegClass:$dst), (ins t.MemOperand:$src1, t.RegClass:$src2, u8imm:$src3),
         m, triop_ndd_args, []>, NDD<1> {
   let ImmT = Imm8;
@@ -397,7 +397,7 @@ class ShlrdOpMRI8U_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
                     [(set t.RegClass:$dst, (node t.RegClass:$src2, (t.LoadNode addr:$src1), (i8 imm:$src3)))]);
 }
 
-class ShlrdOpMRC_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+class ShlrdOpMRC_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node = null_frag>
   : BinOpMR<o, m, triop_cl_ndd_args, t, (outs t.RegClass:$dst), []>, NDD<1> {
   let Uses = [CL];
   let SchedRW = [WriteSHDmrcl];
@@ -427,13 +427,13 @@ multiclass Shlrd<bits<8> o1, bits<8> o2, bits<8> o3, string m, SDPatternOperator
   }
 
   let Predicates = [In64BitMode] in {
-    def 16rri8_NF : ShlrdOpRRI8U_R<o3, m, Xi16, null_frag>, NF, PD;
-    def 32rri8_NF : ShlrdOpRRI8U_R<o3, m, Xi32, null_frag>, NF;
-    def 64rri8_NF : ShlrdOpRRI8U_R<o3, m, Xi64, null_frag>, NF;
+    def 16rri8_NF : ShlrdOpRRI8U_R<o3, m, Xi16>, NF, PD;
+    def 32rri8_NF : ShlrdOpRRI8U_R<o3, m, Xi32>, NF;
+    def 64rri8_NF : ShlrdOpRRI8U_R<o3, m, Xi64>, NF;
 
-    def 16rrCL_NF : ShlrdOpRRC_R<o2, m, Xi16, null_frag>, NF, PD;
-    def 32rrCL_NF : ShlrdOpRRC_R<o2, m, Xi32, null_frag>, NF;
-    def 64rrCL_NF : ShlrdOpRRC_R<o2, m, Xi64, null_frag>, NF;
+    def 16rrCL_NF : ShlrdOpRRC_R<o2, m, Xi16>, NF, PD;
+    def 32rrCL_NF : ShlrdOpRRC_R<o2, m, Xi32>, NF;
+    def 64rrCL_NF : ShlrdOpRRC_R<o2, m, Xi64>, NF;
 
     def 16rri8_NF_ND : ShlrdOpRRI8U_R<o3, m, Xi16, null_frag, 1>, EVEX_NF, PD;
     def 32rri8_NF_ND : ShlrdOpRRI8U_R<o3, m, Xi32, null_frag, 1>, EVEX_NF;
@@ -443,13 +443,13 @@ multiclass Shlrd<bits<8> o1, bits<8> o2, bits<8> o3, string m, SDPatternOperator
     def 32rrCL_NF_ND : ShlrdOpRRC_R<o2, m, Xi32, null_frag, 1>, EVEX_NF;
     def 64rrCL_NF_ND : ShlrdOpRRC_R<o2, m, Xi64, null_frag, 1>, EVEX_NF;
 
-    def 16rri8_EVEX : ShlrdOpRRI8U_R<o3, m, Xi16, null_frag>, DefEFLAGS, PL, PD;
-    def 32rri8_EVEX : ShlrdOpRRI8U_R<o3, m, Xi32, null_frag>, DefEFLAGS, PL;
-    def 64rri8_EVEX : ShlrdOpRRI8U_R<o3, m, Xi64, null_frag>, DefEFLAGS, PL;
+    def 16rri8_EVEX : ShlrdOpRRI8U_R<o3, m, Xi16>, DefEFLAGS, PL, PD;
+    def 32rri8_EVEX : ShlrdOpRRI8U_R<o3, m, Xi32>, DefEFLAGS, PL;
+    def 64rri8_EVEX : ShlrdOpRRI8U_R<o3, m, Xi64>, DefEFLAGS, PL;
 
-    def 16rrCL_EVEX : ShlrdOpRRC_R<o2, m, Xi16, null_frag>, DefEFLAGS, PL, PD;
-    def 32rrCL_EVEX : ShlrdOpRRC_R<o2, m, Xi32, null_frag>, DefEFLAGS, PL;
-    def 64rrCL_EVEX : ShlrdOpRRC_R<o2, m, Xi64, null_frag>, DefEFLAGS, PL;
+    def 16rrCL_EVEX : ShlrdOpRRC_R<o2, m, Xi16>, DefEFLAGS, PL, PD;
+    def 32rrCL_EVEX : ShlrdOpRRC_R<o2, m, Xi32>, DefEFLAGS, PL;
+    def 64rrCL_EVEX : ShlrdOpRRC_R<o2, m, Xi64>, DefEFLAGS, PL;
   }
 
   def 16mri8 : ShlrdOpMRI8U_M<o1, m, Xi16, t_node>, DefEFLAGS, OpSize16;
@@ -471,29 +471,29 @@ multiclass Shlrd<bits<8> o1, bits<8> o2, bits<8> o3, string m, SDPatternOperator
   }
 
   let Predicates = [In64BitMode] in {
-    def 16mri8_NF : ShlrdOpMRI8U_M<o3, m, Xi16, null_frag>, NF, PD;
-    def 32mri8_NF : ShlrdOpMRI8U_M<o3, m, Xi32, null_frag>, NF;
-    def 64mri8_NF : ShlrdOpMRI8U_M<o3, m, Xi64, null_frag>, NF;
+    def 16mri8_NF : ShlrdOpMRI8U_M<o3, m, Xi16>, NF, PD;
+    def 32mri8_NF : ShlrdOpMRI8U_M<o3, m, Xi32>, NF;
+    def 64mri8_NF : ShlrdOpMRI8U_M<o3, m, Xi64>, NF;
 
-    def 16mrCL_NF : ShlrdOpMRC_M<o2, m, Xi16, null_frag>, NF, PD;
-    def 32mrCL_NF : ShlrdOpMRC_M<o2, m, Xi32, null_frag>, NF;
-    def 64mrCL_NF : ShlrdOpMRC_M<o2, m, Xi64, null_frag>, NF;
+    def 16mrCL_NF : ShlrdOpMRC_M<o2, m, Xi16>, NF, PD;
+    def 32mrCL_NF : ShlrdOpMRC_M<o2, m, Xi32>, NF;
+    def 64mrCL_NF : ShlrdOpMRC_M<o2, m, Xi64>, NF;
 
-    def 16mri8_NF_ND : ShlrdOpMRI8U_R<o3, m, Xi16, null_frag>, EVEX_NF, PD;
-    def 32mri8_NF_ND : ShlrdOpMRI8U_R<o3, m, Xi32, null_frag>, EVEX_NF;
-    def 64mri8_NF_ND : ShlrdOpMRI8U_R<o3, m, Xi64, null_frag>, EVEX_NF;
+    def 16mri8_NF_ND : ShlrdOpMRI8U_R<o3, m, Xi16>, EVEX_NF, PD;
+    def 32mri8_NF_ND : ShlrdOpMRI8U_R<o3, m, Xi32>, EVEX_NF;
+    def 64mri8_NF_ND : ShlrdOpMRI8U_R<o3, m, Xi64>, EVEX_NF;
 
-    def 16mrCL_NF_ND : ShlrdOpMRC_R<o2, m, Xi16, null_frag>, EVEX_NF, PD;
-    def 32mrCL_NF_ND : ShlrdOpMRC_R<o2, m, Xi32, null_frag>, EVEX_NF;
-    def 64mrCL_NF_ND : ShlrdOpMRC_R<o2, m, Xi64, null_frag>, EVEX_NF;
+    def 16mrCL_NF_ND : ShlrdOpMRC_R<o2, m, Xi16>, EVEX_NF, PD;
+    def 32mrCL_NF_ND : ShlrdOpMRC_R<o2, m, Xi32>, EVEX_NF;
+    def 64mrCL_NF_ND : ShlrdOpMRC_R<o2, m, Xi64>, EVEX_NF;
 
-    def 16mri8_EVEX : ShlrdOpMRI8U_M<o3, m, Xi16, null_frag>, DefEFLAGS, PL, PD;
-    def 32mri8_EVEX : ShlrdOpMRI8U_M<o3, m, Xi32, null_frag>, DefEFLAGS, PL;
-    def 64mri8_EVEX : ShlrdOpMRI8U_M<o3, m, Xi64, null_frag>, DefEFLAGS, PL;
+    def 16mri8_EVEX : ShlrdOpMRI8U_M<o3, m, Xi16>, DefEFLAGS, PL, PD;
+    def 32mri8_EVEX : ShlrdOpMRI8U_M<o3, m, Xi32>, DefEFLAGS, PL;
+    def 64mri8_EVEX : ShlrdOpMRI8U_M<o3, m, Xi64>, DefEFLAGS, PL;
 
-    def 16mrCL_EVEX : ShlrdOpMRC_M<o2, m, Xi16, null_frag>, DefEFLAGS, PL, PD;
-    def 32mrCL_EVEX : ShlrdOpMRC_M<o2, m, Xi32, null_frag>, DefEFLAGS, PL;
-    def 64mrCL_EVEX : ShlrdOpMRC_M<o2, m, Xi64, null_frag>, DefEFLAGS, PL;
+    def 16mrCL_EVEX : ShlrdOpMRC_M<o2, m, Xi16>, DefEFLAGS, PL, PD;
+    def 32mrCL_EVEX : ShlrdOpMRC_M<o2, m, Xi32>, DefEFLAGS, PL;
+    def 64mrCL_EVEX : ShlrdOpMRC_M<o2, m, Xi64>, DefEFLAGS, PL;
   }
 }
 
diff --git a/llvm/lib/Target/X86/X86InstrUtils.td b/llvm/lib/Target/X86/X86InstrUtils.td
index 93827bf0817c37..05ddcfbf2726d9 100644
--- a/llvm/lib/Target/X86/X86InstrUtils.td
+++ b/llvm/lib/Target/X86/X86InstrUtils.td
@@ -1070,7 +1070,7 @@ class BinOpRI_R<bits<8> o, string m, X86TypeInfo t, Format f, bit ndd = 0>
   : BinOpRI<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst),
             []>, NDD<ndd>;
 // BinOpRI8U_R - Instructions that read "reg, u8imm" and write "reg".
-class BinOpRI8U_R<string m, Format f, X86TypeInfo t, SDPatternOperator node, bit ndd = 0>
+class BinOpRI8U_R<string m, Format f, X86TypeInfo t, SDPatternOperator node = null_frag, bit ndd = 0>
   : ITy<0xC1, f, t, (outs t.RegClass:$dst), (ins t.RegClass:$src1, u8imm:$src2), m,
         !if(!eq(ndd, 0), binop_args, binop_ndd_args),
         [(set t.RegClass:$dst, (node t.RegClass:$src1, (i8 imm:$src2)))]>, NDD<ndd> {
@@ -1249,7 +1249,7 @@ class BinOpMI8_F<string m, X86TypeInfo t, Format f>
 class BinOpMI8_R<string m, X86TypeInfo t, Format f>
   : BinOpMI8<m, binop_ndd_args, t, f, (outs t.RegClass:$dst)>, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, NDD<1>;
 // BinOpMI8U_R - Instructions that read "[mem], u8imm" and write "reg".
-class BinOpMI8U_R<string m, Format f, X86TypeInfo t, SDPatternOperator node>
+class BinOpMI8U_R<string m, Format f, X86TypeInfo t, SDPatternOperator node = null_frag>
   : BinOpMI8U<m, binop_ndd_args, t, f, (outs t.RegClass:$dst),
               [(set t.RegClass:$dst, (node (t.LoadNode addr:$src1), (i8 imm:$src2)))]>, NDD<1>;
 // BinOpMI8_RF - Instructions that read "[mem], imm8" and write "reg"/EFLAGS.
@@ -1261,7 +1261,7 @@ class BinOpMI8_M<string m, X86TypeInfo t, Format f>
   let mayStore = 1;
 }
 // BinOpMI8U_M - Instructions that read "[mem], u8imm" and write "[mem]".
-class BinOpMI8U_M<string m, Format f, X86TypeInfo t, SDPatternOperator node>
+class BinOpMI8U_M<string m, Format f, X86TypeInfo t, SDPatternOperator node = null_frag>
   : BinOpMI8U<m, binop_args, t, f, (outs),
               [(store (node (t.LoadNode addr:$src1), (i8 imm:$src2)), addr:$src1)]> {
   let mayStore = 1;
@@ -1308,14 +1308,14 @@ class BinOpAIF_AF<bits<8> o, string m, X86TypeInfo t, Register areg,
   let SchedRW = [WriteADC];
 }
 // BinOpRC_R - Instructions that read "reg, cl" and write reg.
-class BinOpRC_R<string m, Format f, X86TypeInfo t, SDPatternOperator node, bit ndd = 0>
+class BinOpRC_R<string m, Format f, X86TypeInfo t, SDPatternOperator node = null_frag, bit ndd = 0>
   : ITy<0xD3, f, t, (outs t.RegClass:$dst), (ins t.RegClass:$src1), m,
         !if(!eq(ndd, 0), binop_cl_args, binop_cl_ndd_args),
         [(set t.RegClass:$dst, (node t.RegClass:$src1, CL))]>, NDD<ndd> {
   let Uses = [CL];
 }
 // BinOpMC_M - Instructions that read "[mem], cl" and write [mem].
-class BinOpMC_M<string m, Format f, X86TypeInfo t, SDPatternOperator node>
+class BinOpMC_M<string m, Format f, X86TypeInfo t, SDPatternOperator node = null_frag>
   : ITy<0xD3, f, t, (outs), (ins t.MemOperand:$src1), m, binop_cl_args,
         [(store (node (t.LoadNode addr:$src1), CL), addr:$src1)]> {
   let Uses = [CL];
@@ -1323,7 +1323,7 @@ class BinOpMC_M<string m, Format f, X86TypeInfo t, SDPatternOperator node>
   let mayStore = 1;
 }
 // BinOpMC_R - Instructions that read "[mem], cl" and write reg.
-class BinOpMC_R<string m, Format f, X86TypeInfo t, SDPatternOperator node>
+class BinOpMC_R<string m, Format f, X86TypeInfo t, SDPatternOperator node = null_frag>
   : ITy<0xD3, f, t, (outs t.RegClass:$dst), (ins t.MemOperand:$src1), m, binop_cl_ndd_args,
         [(set t.RegClass:$dst, (node (t.LoadNode addr:$src1), CL))]>, NDD<1> {
   let Uses = [CL];
@@ -1336,13 +1336,13 @@ class UnaryOpR<bits<8> o, Format f, string m, string args, X86TypeInfo t,
   : ITy<o, f, t, out, (ins t.RegClass:$src1), m, args, p>, Sched<[WriteALU]>;
 // UnaryOpR_R - Instructions that read "reg" and write "reg".
 class UnaryOpR_R<bits<8> o, Format f, string m, X86TypeInfo t,
-                  SDPatternOperator node, bit ndd = 0>
+                  SDPatternOperator node = null_frag, bit ndd = 0>
   : UnaryOpR<o, f, m, !if(!eq(ndd, 0), unaryop_args, unaryop_ndd_args), t,
              (outs t.RegClass:$dst),
              [(set t.RegClass:$dst, (node t.RegClass:$src1))]>, NDD<ndd>;
 // UnaryOpR_RF - Instructions that read "reg" and write "reg"/EFLAGS.
 class UnaryOpR_RF<bits<8> o, Format f, string m, X86TypeInfo t,
-                  SDPatternOperator node, bit ndd = 0>
+                  SDPatternOperator node = null_frag, bit ndd = 0>
   : UnaryOpR<o, f, m, !if(!eq(ndd, 0), unaryop_args, unaryop_ndd_args), t,
              (outs t.RegClass:$dst),
              [(set t.RegClass:$dst, (node t.RegClass:$src1)),
@@ -1356,19 +1356,19 @@ class UnaryOpM<bits<8> o, Format f, string m, string args, X86TypeInfo t,
 }
 // UnaryOpM_R - Instructions that read "[mem]" and writes "reg".
 class UnaryOpM_R<bits<8> o, Format f, string m, X86TypeInfo t,
-                  SDPatternOperator node>
+                  SDPatternOperator node = null_frag>
   : UnaryOpM<o, f, m, unaryop_ndd_args, t, (outs t.RegClass:$dst),
              [(set t.RegClass:$dst, (node (t.LoadNode addr:$src1)))]>,
     Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, NDD<1>;
 // UnaryOpM_RF - Instructions that read "[mem]" and writes "reg"/EFLAGS.
 class UnaryOpM_RF<bits<8> o, Format f, string m, X86TypeInfo t,
-                  SDPatternOperator node>
+                  SDPatternOperator node = null_frag>
   : UnaryOpM<o, f, m, unaryop_ndd_args, t, (outs t.RegClass:$dst),
              [(set t.RegClass:$dst, EFLAGS, (node (t.LoadNode addr:$src1)))]>,
     Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, DefEFLAGS, NDD<1>;
 // UnaryOpM_M - Instructions that read "[mem]" and writes "[mem]".
 class UnaryOpM_M<bits<8> o, Format f, string m, X86TypeInfo t,
-                  SDPatternOperator node>
+                  SDPatternOperator node = null_frag>
   : UnaryOpM<o, f, m, unaryop_args, t, (outs),
              [(store (node (t.LoadNode addr:$src1)), addr:$src1)]>,
     Sched<[WriteALURMW]>{
@@ -1376,7 +1376,7 @@ class UnaryOpM_M<bits<8> o, Format f, string m, X86TypeInfo t,
 }
 // UnaryOpM_MF - Instructions that read "[mem]" and writes "[mem]"/EFLAGS.
 class UnaryOpM_MF<bits<8> o, Format f, string m, X86TypeInfo t,
-                  SDPatternOperator node>
+                  SDPatternOperator node = null_frag>
   : UnaryOpM<o, f, m, unaryop_args, t, (outs),
              [(store (node (t.LoadNode addr:$src1)), addr:$src1),
               (implicit EFLAGS)]>, Sched<[WriteALURMW]>, DefEFLAGS {

>From 55c8586b3a305b03fdaa036058badde276e96ac1 Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Mon, 22 Jan 2024 17:04:20 +0800
Subject: [PATCH 4/4] add more test

---
 llvm/test/CodeGen/X86/apx/rol.ll | 76 ++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/llvm/test/CodeGen/X86/apx/rol.ll b/llvm/test/CodeGen/X86/apx/rol.ll
index f41c17ffbf6736..6a8d3fa7d2a10a 100644
--- a/llvm/test/CodeGen/X86/apx/rol.ll
+++ b/llvm/test/CodeGen/X86/apx/rol.ll
@@ -14,6 +14,16 @@ entry:
   ret i8 %rol
 }
 
+define i8 @rol8m1_intrinsic(ptr %ptr)  {
+; CHECK-LABEL: rol8m1_intrinsic:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rolb (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xd0,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %a = load i8, ptr %ptr
+  %f = call i8 @llvm.fshr.i8(i8 %a, i8 %a, i8 7)
+  ret i8 %f
+}
+
 define i16 @rol16m1(ptr %ptr) {
 ; CHECK-LABEL: rol16m1:
 ; CHECK:       # %bb.0: # %entry
@@ -27,6 +37,16 @@ entry:
   ret i16 %rol
 }
 
+define i16 @rol16m1_intrinsic(ptr %ptr)  {
+; CHECK-LABEL: rol16m1_intrinsic:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rolw (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0xd1,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %a = load i16, ptr %ptr
+  %f = call i16 @llvm.fshr.i16(i16 %a, i16 %a, i16 15)
+  ret i16 %f
+}
+
 define i32 @rol32m1(ptr %ptr) {
 ; CHECK-LABEL: rol32m1:
 ; CHECK:       # %bb.0: # %entry
@@ -40,6 +60,16 @@ entry:
   ret i32 %rol
 }
 
+define i32 @rol32m1_intrinsic(ptr %ptr)  {
+; CHECK-LABEL: rol32m1_intrinsic:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    roll (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd1,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %a = load i32, ptr %ptr
+  %f = call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 31)
+  ret i32 %f
+}
+
 define i64 @rol64m1(ptr %ptr) {
 ; CHECK-LABEL: rol64m1:
 ; CHECK:       # %bb.0: # %entry
@@ -53,6 +83,16 @@ entry:
   ret i64 %rol
 }
 
+define i64 @rol64m1_intrinsic(ptr %ptr)  {
+; CHECK-LABEL: rol64m1_intrinsic:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rolq (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd1,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %a = load i64, ptr %ptr
+  %f = call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 63)
+  ret i64 %f
+}
+
 define i8 @rol8mcl(ptr %ptr, i8 %cl) {
 ; CHECK-LABEL: rol8mcl:
 ; CHECK:       # %bb.0: # %entry
@@ -181,6 +221,15 @@ entry:
   ret i8 %rol
 }
 
+define i8 @rol8r1_intrinsic(i8 noundef %a)  {
+; CHECK-LABEL: rol8r1_intrinsic:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rolb %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xd0,0xc7]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %f = call i8 @llvm.fshr.i8(i8 %a, i8 %a, i8 7)
+  ret i8 %f
+}
+
 define i16 @rol16r1(i16 noundef %a) {
 ; CHECK-LABEL: rol16r1:
 ; CHECK:       # %bb.0: # %entry
@@ -193,6 +242,15 @@ entry:
   ret i16 %rol
 }
 
+define i16 @rol16r1_intrinsic(i16 noundef %a)  {
+; CHECK-LABEL: rol16r1_intrinsic:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rolw %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xd1,0xc7]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %f = call i16 @llvm.fshr.i16(i16 %a, i16 %a, i16 15)
+  ret i16 %f
+}
+
 define i32 @rol32r1(i32 noundef %a) {
 ; CHECK-LABEL: rol32r1:
 ; CHECK:       # %bb.0: # %entry
@@ -205,6 +263,15 @@ entry:
   ret i32 %rol
 }
 
+define i32 @rol32r1_intrinsic(i32 noundef %a)  {
+; CHECK-LABEL: rol32r1_intrinsic:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    roll %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd1,0xc7]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %f = call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 31)
+  ret i32 %f
+}
+
 define i64 @rol64r1(i64 noundef %a) {
 ; CHECK-LABEL: rol64r1:
 ; CHECK:       # %bb.0: # %entry
@@ -217,6 +284,15 @@ entry:
   ret i64 %rol
 }
 
+define i64 @rol64r1_intrinsic(i64 noundef %a)  {
+; CHECK-LABEL: rol64r1_intrinsic:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rolq %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd1,0xc7]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %f = call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 63)
+  ret i64 %f
+}
+
 define i8 @rol8rcl(i8 noundef %a, i8 %cl) {
 ; CHECK-LABEL: rol8rcl:
 ; CHECK:       # %bb.0: # %entry



More information about the llvm-commits mailing list