[llvm] c1c6733 - [X86] Model MXCSR for all AVX512 instructions

via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 3 16:22:34 PST 2019


Author: Wang, Pengfei
Date: 2019-12-04T08:07:38+08:00
New Revision: c1c673303dcfb242efea2d0d2760e5765f9f9281

URL: https://github.com/llvm/llvm-project/commit/c1c673303dcfb242efea2d0d2760e5765f9f9281
DIFF: https://github.com/llvm/llvm-project/commit/c1c673303dcfb242efea2d0d2760e5765f9f9281.diff

LOG: [X86] Model MXCSR for all AVX512 instructions

Summary: Model MXCSR for all AVX512 instructions

Reviewers: craig.topper, RKSimon, andrew.w.kaylor

Subscribers: hiraditya, llvm-commits, LuoYuanke, LiuChen3

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70881

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86InstrAVX512.td
    llvm/lib/Target/X86/X86InstrInfo.cpp
    llvm/test/CodeGen/X86/domain-reassignment.mir
    llvm/test/CodeGen/X86/evex-to-vex-compress.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 249da7b888f3..29a9c3002c81 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2078,7 +2078,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
                       "$cc, $src2, $src1", "$src1, $src2, $cc",
                       (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
                       (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
-                                 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
+                                 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
   let mayLoad = 1 in
   defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
                     (outs _.KRC:$dst),
@@ -2089,7 +2089,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
                         timm:$cc),
                     (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
                         timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
-                    Sched<[sched.Folded, sched.ReadAfterFold]>;
+                    Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
 
   defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
                      (outs _.KRC:$dst),
@@ -2111,7 +2111,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
                 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
                                           _.FRC:$src2,
                                           timm:$cc))]>,
-                EVEX_4V, VEX_LIG, Sched<[sched]>;
+                EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
     def rm : AVX512Ii8<0xC2, MRMSrcMem,
               (outs _.KRC:$dst),
               (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
@@ -2121,7 +2121,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
                                         (_.ScalarLdFrag addr:$src2),
                                         timm:$cc))]>,
               EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
-              Sched<[sched.Folded, sched.ReadAfterFold]>;
+              Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
   }
 }
 
@@ -2522,6 +2522,7 @@ def X86cmpm_imm_commute : SDNodeXForm<timm, [{
 
 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
                               string Name> {
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
   defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
                    (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
                    "vcmp"#_.Suffix,
@@ -2553,6 +2554,7 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
                             (_.VT (_.BroadcastLdFrag addr:$src2)),
                             timm:$cc)>,
                 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+  }
 
   // Patterns for selecting with loads in other operand.
   def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
@@ -5278,7 +5280,7 @@ defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
                             SDNode OpNode, SDNode VecNode,
                             X86FoldableSchedWrite sched, bit IsCommutable> {
-  let ExeDomain = _.ExeDomain in {
+  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                            "$src2, $src1", "$src1, $src2",
@@ -5329,16 +5331,17 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                            "$src2, $src1", "$src1, $src2",
                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
-                           Sched<[sched]>;
+                           Sched<[sched]>, SIMD_EXC;
 
   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
                          "$src2, $src1", "$src1, $src2",
                          (_.VT (VecNode _.RC:$src1,
                                         _.ScalarIntMemCPat:$src2))>,
-                         Sched<[sched.Folded, sched.ReadAfterFold]>;
+                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
 
-  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
+  let isCodeGenOnly = 1, Predicates = [HasAVX512],
+      Uses = [MXCSR], mayRaiseFPException = 1 in {
   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
                          (ins _.FRC:$src1, _.FRC:$src2),
                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -5353,7 +5356,7 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
                          (_.ScalarLdFrag addr:$src2)))]>,
                          Sched<[sched.Folded, sched.ReadAfterFold]>,
-                         EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
+                         EVEX2VEXOverride<EVEX2VexOvrd#"rm">, SIMD_EXC;
   }
 
   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -5429,27 +5432,28 @@ multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
 }
 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
                                          SchedWriteFCmp.Scl, "VMINCSS">, XS,
-                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
 
 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
                                          SchedWriteFCmp.Scl, "VMINCSD">, XD,
                                          VEX_W, EVEX_4V, VEX_LIG,
-                                         EVEX_CD8<64, CD8VT1>;
+                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
 
 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
                                          SchedWriteFCmp.Scl, "VMAXCSS">, XS,
-                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
 
 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
                                          SchedWriteFCmp.Scl, "VMAXCSD">, XD,
                                          VEX_W, EVEX_4V, VEX_LIG,
-                                         EVEX_CD8<64, CD8VT1>;
+                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
 
 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
                             bit IsCommutable,
                             bit IsKCommutable = IsCommutable> {
-  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
+  let ExeDomain = _.ExeDomain, hasSideEffects = 0,
+      Uses = [MXCSR], mayRaiseFPException = 1 in {
   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
                   "$src2, $src1", "$src1, $src2",
@@ -5570,6 +5574,7 @@ let isCodeGenOnly = 1 in {
   defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
                                  SchedWriteFCmpSizes, 1>;
 }
+let Uses = []<Register>, mayRaiseFPException = 0 in {
 defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
                                SchedWriteFLogicSizes, 1>;
 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
@@ -5578,10 +5583,11 @@ defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
                                SchedWriteFLogicSizes, 1>;
 defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
                                SchedWriteFLogicSizes, 1>;
+}
 
 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
-  let ExeDomain = _.ExeDomain in {
+  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
                   "$src2, $src1", "$src1, $src2",
@@ -5603,7 +5609,7 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
 
 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
-  let ExeDomain = _.ExeDomain in {
+  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
   defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
                   "$src2, $src1", "$src1, $src2",
@@ -6399,7 +6405,8 @@ let Predicates = [HasAVX512] in {
 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                X86FoldableSchedWrite sched,
                                X86VectorVTInfo _, string Suff> {
-  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
+  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
+      Uses = [MXCSR], mayRaiseFPException = 1 in {
   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3),
           OpcodeStr, "$src3, $src2", "$src2, $src3",
@@ -6473,7 +6480,8 @@ defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubR
 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                X86FoldableSchedWrite sched,
                                X86VectorVTInfo _, string Suff> {
-  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
+  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
+      Uses = [MXCSR], mayRaiseFPException = 1 in {
   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3),
           OpcodeStr, "$src3, $src2", "$src2, $src3",
@@ -6548,7 +6556,8 @@ defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubR
 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                X86FoldableSchedWrite sched,
                                X86VectorVTInfo _, string Suff> {
-  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
+  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
+      Uses = [MXCSR], mayRaiseFPException = 1 in {
   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3),
           OpcodeStr, "$src3, $src2", "$src2, $src3",
@@ -6630,13 +6639,13 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
   defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
-          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
+          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
 
   let mayLoad = 1 in
   defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
-          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
+          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
 
   defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
@@ -6648,12 +6657,12 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
                      !strconcat(OpcodeStr,
                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
+                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
     def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
                     (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
                     !strconcat(OpcodeStr,
                                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
+                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
 
     def rb    : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
@@ -6997,8 +7006,10 @@ defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
                     RegisterClass SrcRC, X86VectorVTInfo DstVT,
                     X86MemOperand x86memop, PatFrag ld_frag, string asm,
-                    string mem> {
-let ExeDomain = DstVT.ExeDomain in {
+                    string mem, list<Register> _Uses = [MXCSR],
+                    bit _mayRaiseFPException = 1> {
+let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
+    mayRaiseFPException = _mayRaiseFPException in {
   let hasSideEffects = 0, isCodeGenOnly = 1 in {
     def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
               (ins DstVT.FRC:$src1, SrcRC:$src),
@@ -7069,7 +7080,7 @@ defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
                                  v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
 defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
-                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l">,
+                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
                                  WriteCvtI2SD, GR64,
@@ -7108,7 +7119,7 @@ defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
                                   v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
                                   XS, VEX_W, EVEX_CD8<64, CD8VT1>;
 defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
-                                  i32mem, loadi32, "cvtusi2sd", "l">,
+                                  i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
                                   XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
                                   WriteCvtI2SD, GR64,
@@ -7152,7 +7163,7 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
                 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
-                EVEX, VEX_LIG, Sched<[sched]>;
+                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
     def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
                  !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
                  [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
@@ -7162,7 +7173,7 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
                 [(set DstVT.RC:$dst, (OpNode
                       (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
-                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
+                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
   } // Predicates = [HasAVX512]
 
   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
@@ -7294,17 +7305,17 @@ let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
   def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
               [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
-              EVEX, VEX_LIG, Sched<[sched]>;
+              EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
   def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
               [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
-              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
+              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
   }
 
   def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
             !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
            [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
-           EVEX, VEX_LIG, Sched<[sched]>;
+           EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
   def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
             !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
             [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
@@ -7314,7 +7325,7 @@ let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
               [(set _DstRC.RC:$dst,
                 (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
-              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
+              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
 } //HasAVX512
 
   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
@@ -7356,6 +7367,7 @@ defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
 // AVX-512  Convert form float to double and back
 //===----------------------------------------------------------------------===//
 
+let Uses = [MXCSR], mayRaiseFPException = 1 in
 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                                 X86VectorVTInfo _Src, SDNode OpNode,
                                 X86FoldableSchedWrite sched> {
@@ -7475,7 +7487,7 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                           string Alias = "", X86MemOperand MemOp = _Src.MemOp,
                           RegisterClass MaskRC = _.KRCWM,
                           dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
-
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
   defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
                          (ins _Src.RC:$src),
                          (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
@@ -7515,6 +7527,7 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                                   _.RC:$src0),
                          vselect, "$src0 = $dst">,
                          EVEX, EVEX_B, Sched<[sched.Folded]>;
+  }
 }
 // Coversion with SAE - suppress all exceptions
 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@@ -7719,6 +7732,7 @@ let Predicates = [HasVLX] in {
 }
 
 // Convert Signed/Unsigned Doubleword to Double
+let Uses = []<Register>, mayRaiseFPException = 0 in
 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
                            SDNode OpNode128, X86SchedWriteWidths sched> {
   // No rounding in this op
@@ -8524,6 +8538,7 @@ def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
 // Half precision conversion instructions
 //===----------------------------------------------------------------------===//
 
+let Uses = [MXCSR], mayRaiseFPException = 1 in
 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
                            X86MemOperand x86memop, PatFrag ld_frag,
                            X86FoldableSchedWrite sched> {
@@ -8571,7 +8586,7 @@ let Predicates = [HasVLX] in {
 
 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
                            X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
-let ExeDomain = GenericDomain in {
+let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
   def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
              (ins _src.RC:$src1, i32u8imm:$src2),
              "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -8807,7 +8822,7 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                            "$src2, $src1", "$src1, $src2",
                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
-                           Sched<[sched]>;
+                           Sched<[sched]>, SIMD_EXC;
 
   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
@@ -8819,7 +8834,7 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
                          "$src2, $src1", "$src1, $src2",
                          (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>,
-                         Sched<[sched.Folded, sched.ReadAfterFold]>;
+                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
   }
 }
 
@@ -8844,7 +8859,7 @@ defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
 
 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                          SDNode OpNode, X86FoldableSchedWrite sched> {
-  let ExeDomain = _.ExeDomain in {
+  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
                          (OpNode (_.VT _.RC:$src))>,
@@ -8927,7 +8942,7 @@ multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
 
 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
-  let ExeDomain = _.ExeDomain in {
+  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
                          (_.VT (fsqrt _.RC:$src))>, EVEX,
@@ -8946,6 +8961,7 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
   }
 }
 
+let Uses = [MXCSR], mayRaiseFPException = 1 in
 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
                                   X86SchedWriteSizes sched> {
   defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
@@ -8989,13 +9005,13 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
                          "$src2, $src1", "$src1, $src2",
                          (X86fsqrts (_.VT _.RC:$src1),
                                     (_.VT _.RC:$src2))>,
-                         Sched<[sched]>;
+                         Sched<[sched]>, SIMD_EXC;
     defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
                          "$src2, $src1", "$src1, $src2",
                          (X86fsqrts (_.VT _.RC:$src1),
                                     _.ScalarIntMemCPat:$src2)>,
-                         Sched<[sched.Folded, sched.ReadAfterFold]>;
+                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
     defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
                          "$rc, $src2, $src1", "$src1, $src2, $rc",
@@ -9008,12 +9024,12 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
       def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
                 (ins _.FRC:$src1, _.FRC:$src2),
                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-                Sched<[sched]>;
+                Sched<[sched]>, SIMD_EXC;
       let mayLoad = 1 in
         def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
                   (ins _.FRC:$src1, _.ScalarMemOp:$src2),
                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-                  Sched<[sched.Folded, sched.ReadAfterFold]>;
+                  Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
     }
   }
 
@@ -9051,7 +9067,7 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
                            "$src3, $src2, $src1", "$src1, $src2, $src3",
                            (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
                            (i32 timm:$src3)))>,
-                           Sched<[sched]>;
+                           Sched<[sched]>, SIMD_EXC;
 
   defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
@@ -9066,19 +9082,19 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
                          "$src3, $src2, $src1", "$src1, $src2, $src3",
                          (_.VT (X86RndScales _.RC:$src1,
                                 _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>,
-                         Sched<[sched.Folded, sched.ReadAfterFold]>;
+                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
 
   let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
     def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
                (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
                OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
-               []>, Sched<[sched]>;
+               []>, Sched<[sched]>, SIMD_EXC;
 
     let mayLoad = 1 in
       def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
                  (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
                  OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
-                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
+                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
   }
   }
 
@@ -10105,7 +10121,7 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
 //all instruction created with FROUND_CURRENT
 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                       X86FoldableSchedWrite sched, X86VectorVTInfo _> {
-  let ExeDomain = _.ExeDomain in {
+  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src1, i32u8imm:$src2),
                       OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
@@ -10164,7 +10180,7 @@ multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
 //all instruction created with FROUND_CURRENT
 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
-  let ExeDomain = _.ExeDomain in {
+  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
@@ -10236,7 +10252,7 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
 //                                      op(reg_vec2,mem_scalar,imm)
 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
-  let ExeDomain = _.ExeDomain in {
+  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
   defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
@@ -10896,10 +10912,12 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load
 // AVX-512 - Unpack Instructions
 //===----------------------------------------------------------------------===//
 
+let Uses = []<Register>, mayRaiseFPException = 0 in {
 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
                                  SchedWriteFShuffleSizes, 0, 1>;
 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
                                  SchedWriteFShuffleSizes>;
+}
 
 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
                                        SchedWriteShuffle, HasBWI>;
@@ -11591,7 +11609,8 @@ let Predicates = [HasVLX] in {
 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
                                   X86VectorVTInfo TblVT>{
-  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
+  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
+      Uses = [MXCSR], mayRaiseFPException = 1 in {
     defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
                         (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
                          OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
@@ -11647,7 +11666,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
                       (X86VFixupimms (_.VT _.RC:$src1),
                                      (_.VT _.RC:$src2),
                                      (_src3VT.VT _src3VT.RC:$src3),
-                                     (i32 timm:$src4))>, Sched<[sched]>;
+                                     (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
     defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
@@ -11665,7 +11684,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
                                     (_src3VT.VT (scalar_to_vector
                                               (_src3VT.ScalarLdFrag addr:$src3))),
                                     (i32 timm:$src4))>,
-                     Sched<[sched.Folded, sched.ReadAfterFold]>;
+                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
   }
 }
 
@@ -12170,7 +12189,7 @@ defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
 //===----------------------------------------------------------------------===//
 
 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
-    Constraints = "$src1 = $dst" in {
+    Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
                     "v4fmaddps", "$src3, $src2", "$src2, $src3",
@@ -12301,17 +12320,19 @@ defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
 // Truncate Float to BFloat16
 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
                              X86SchedWriteWidths sched> {
-  let Predicates = [HasBF16] in {
+  let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
                             X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
   }
   let Predicates = [HasBF16, HasVLX] in {
+    let Uses = []<Register>, mayRaiseFPException = 0 in {
     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
                                null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
                                VK4WM>, EVEX_V128;
     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
                                X86cvtneps2bf16,
                                sched.YMM, "{1to8}", "{y}">, EVEX_V256;
+    }
 
     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,

diff  --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index b66d9ffd5d5e..41c6fc4aaf67 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -1761,10 +1761,11 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
   case X86::VCMPPSZ128rrik:
   case X86::VCMPPDZ256rrik:
   case X86::VCMPPSZ256rrik: {
-    unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x1f;
+    unsigned Imm =
+                MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 0x1f;
     Imm = X86::getSwappedVCMPImm(Imm);
     auto &WorkingMI = cloneIfNew(MI);
-    WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm);
+    WorkingMI.getOperand(MI.getNumExplicitOperands() - 1).setImm(Imm);
     return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
                                                    OpIdx1, OpIdx2);
   }

diff  --git a/llvm/test/CodeGen/X86/domain-reassignment.mir b/llvm/test/CodeGen/X86/domain-reassignment.mir
index 38755344849a..e24a5ded09a0 100644
--- a/llvm/test/CodeGen/X86/domain-reassignment.mir
+++ b/llvm/test/CodeGen/X86/domain-reassignment.mir
@@ -167,7 +167,7 @@ body:             |
   bb.1.if:
     successors: %bb.3(0x80000000)
 
-    %14 = VCMPSSZrr %7, %8, 0
+    %14 = VCMPSSZrr %7, %8, 0, implicit $mxcsr
 
     ; check that cross domain copies are replaced with same domain copies.
 
@@ -177,7 +177,7 @@ body:             |
 
   bb.2.else:
     successors: %bb.3(0x80000000)
-    %12 = VCMPSSZrr %9, %10, 0
+    %12 = VCMPSSZrr %9, %10, 0, implicit $mxcsr
 
     ; check that cross domain copies are replaced with same domain copies.
 
@@ -292,7 +292,7 @@ body:             |
     %3 = COPY $zmm2
     %4 = COPY $zmm3
 
-    %5 = VCMPPDZrri %3, %4, 0
+    %5 = VCMPPDZrri %3, %4, 0, implicit $mxcsr
     %6 = COPY %5
     %7 = COPY %6.sub_8bit
 
@@ -411,7 +411,7 @@ body:             |
     %3 = COPY $zmm2
     %4 = COPY $zmm3
 
-    %5 = VCMPPSZrri %3, %4, 0
+    %5 = VCMPPSZrri %3, %4, 0, implicit $mxcsr
     %6 = COPY %5
     %7 = COPY %6.sub_16bit
 

diff  --git a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir
index eaf68b9bb210..3b0fbcba4348 100755
--- a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir
+++ b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir
@@ -167,14 +167,14 @@ body: |
   $ymm0 = VPADDWZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm0 = VPADDWYrr                   $ymm0, $ymm1
   $ymm0 = VPADDWZ256rr                         $ymm0, $ymm1                                   
-  ; CHECK: $ymm0 = VMULPDYrm                   $ymm0, $rip, 1, $rax, 0, $noreg
-  $ymm0 = VMULPDZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm0 = VMULPDYrr                   $ymm0, $ymm1
-  $ymm0 = VMULPDZ256rr                         $ymm0, $ymm1                                   
-  ; CHECK: $ymm0 = VMULPSYrm                   $ymm0, $rip, 1, $rax, 0, $noreg
-  $ymm0 = VMULPSZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm0 = VMULPSYrr                   $ymm0, $ymm1
-  $ymm0 = VMULPSZ256rr                         $ymm0, $ymm1                                   
+  ; CHECK: $ymm0 = VMULPDYrm                   $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm0 = VMULPDZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VMULPDYrr                   $ymm0, $ymm1, implicit $mxcsr
+  $ymm0 = VMULPDZ256rr                         $ymm0, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm0 = VMULPSYrm                   $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm0 = VMULPSZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VMULPSYrr                   $ymm0, $ymm1, implicit $mxcsr
+  $ymm0 = VMULPSZ256rr                         $ymm0, $ymm1, implicit $mxcsr
   ; CHECK: $ymm0 = VORPDYrm                    $ymm0, $rip, 1, $rax, 0, $noreg
   $ymm0 = VORPDZ256rm                          $ymm0, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm0 = VORPDYrr                    $ymm0, $ymm1
@@ -315,14 +315,14 @@ body: |
   $ymm0 = VPXORQZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm0 = VPXORYrr                    $ymm0, $ymm1  
   $ymm0 = VPXORQZ256rr                         $ymm0, $ymm1                                   
-  ; CHECK: $ymm0 = VADDPDYrm                   $ymm0, $rip, 1, $rax, 0, $noreg
-  $ymm0 = VADDPDZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm0 = VADDPDYrr                   $ymm0, $ymm1
-  $ymm0 = VADDPDZ256rr                         $ymm0, $ymm1                                   
-  ; CHECK: $ymm0 = VADDPSYrm                   $ymm0, $rip, 1, $rax, 0, $noreg
-  $ymm0 = VADDPSZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm0 = VADDPSYrr                   $ymm0, $ymm1
-  $ymm0 = VADDPSZ256rr                         $ymm0, $ymm1                                   
+  ; CHECK: $ymm0 = VADDPDYrm                   $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm0 = VADDPDZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VADDPDYrr                   $ymm0, $ymm1, implicit $mxcsr
+  $ymm0 = VADDPDZ256rr                         $ymm0, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm0 = VADDPSYrm                   $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm0 = VADDPSZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VADDPSYrr                   $ymm0, $ymm1, implicit $mxcsr
+  $ymm0 = VADDPSZ256rr                         $ymm0, $ymm1, implicit $mxcsr
   ; CHECK: $ymm0 = VANDNPDYrm                  $ymm0, $rip, 1, $rax, 0, $noreg
   $ymm0 = VANDNPDZ256rm                        $ymm0, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm0 = VANDNPDYrr                  $ymm0, $ymm1
@@ -339,46 +339,46 @@ body: |
   $ymm0 = VANDPSZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm0 = VANDPSYrr                   $ymm0, $ymm1
   $ymm0 = VANDPSZ256rr                         $ymm0, $ymm1                                   
-  ; CHECK: $ymm0 = VDIVPDYrm                   $ymm0, $rip, 1, $rax, 0, $noreg
-  $ymm0 = VDIVPDZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm0 = VDIVPDYrr                   $ymm0, $ymm1  
-  $ymm0 = VDIVPDZ256rr                         $ymm0, $ymm1                                   
-  ; CHECK: $ymm0 = VDIVPSYrm                   $ymm0, $rip, 1, $rax, 0, $noreg
-  $ymm0 = VDIVPSZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm0 = VDIVPSYrr                   $ymm0, $ymm1
-  $ymm0 = VDIVPSZ256rr                         $ymm0, $ymm1                                   
-  ; CHECK: $ymm0 = VMAXCPDYrm                  $ymm0, $rip, 1, $rax, 0, $noreg
-  $ymm0 = VMAXCPDZ256rm                        $ymm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm0 = VMAXCPDYrr                  $ymm0, $ymm1
-  $ymm0 = VMAXCPDZ256rr                        $ymm0, $ymm1                                   
-  ; CHECK: $ymm0 = VMAXCPSYrm                  $ymm0, $rip, 1, $rax, 0, $noreg
-  $ymm0 = VMAXCPSZ256rm                        $ymm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm0 = VMAXCPSYrr                  $ymm0, $ymm1
-  $ymm0 = VMAXCPSZ256rr                        $ymm0, $ymm1                                   
-  ; CHECK: $ymm0 = VMAXPDYrm                   $ymm0, $rip, 1, $rax, 0, $noreg
-  $ymm0 = VMAXPDZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm0 = VMAXPDYrr                   $ymm0, $ymm1
-  $ymm0 = VMAXPDZ256rr                         $ymm0, $ymm1                                   
-  ; CHECK: $ymm0 = VMAXPSYrm                   $ymm0, $rip, 1, $rax, 0, $noreg
-  $ymm0 = VMAXPSZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm0 = VMAXPSYrr                   $ymm0, $ymm1
-  $ymm0 = VMAXPSZ256rr                         $ymm0, $ymm1                                   
-  ; CHECK: $ymm0 = VMINCPDYrm                  $ymm0, $rip, 1, $rax, 0, $noreg
-  $ymm0 = VMINCPDZ256rm                        $ymm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm0 = VMINCPDYrr                  $ymm0, $ymm1
-  $ymm0 = VMINCPDZ256rr                        $ymm0, $ymm1                                   
-  ; CHECK: $ymm0 = VMINCPSYrm                  $ymm0, $rip, 1, $rax, 0, $noreg
-  $ymm0 = VMINCPSZ256rm                        $ymm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm0 = VMINCPSYrr                  $ymm0, $ymm1
-  $ymm0 = VMINCPSZ256rr                        $ymm0, $ymm1                                   
-  ; CHECK: $ymm0 = VMINPDYrm                   $ymm0, $rip, 1, $rax, 0, $noreg
-  $ymm0 = VMINPDZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm0 = VMINPDYrr                   $ymm0, $ymm1
-  $ymm0 = VMINPDZ256rr                         $ymm0, $ymm1                                   
-  ; CHECK: $ymm0 = VMINPSYrm                   $ymm0, $rip, 1, $rax, 0, $noreg
-  $ymm0 = VMINPSZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm0 = VMINPSYrr                   $ymm0, $ymm1
-  $ymm0 = VMINPSZ256rr                         $ymm0, $ymm1                                   
+  ; CHECK: $ymm0 = VDIVPDYrm                   $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm0 = VDIVPDZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VDIVPDYrr                   $ymm0, $ymm1, implicit $mxcsr
+  $ymm0 = VDIVPDZ256rr                         $ymm0, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm0 = VDIVPSYrm                   $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm0 = VDIVPSZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VDIVPSYrr                   $ymm0, $ymm1, implicit $mxcsr
+  $ymm0 = VDIVPSZ256rr                         $ymm0, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm0 = VMAXCPDYrm                  $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm0 = VMAXCPDZ256rm                        $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VMAXCPDYrr                  $ymm0, $ymm1, implicit $mxcsr
+  $ymm0 = VMAXCPDZ256rr                        $ymm0, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm0 = VMAXCPSYrm                  $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm0 = VMAXCPSZ256rm                        $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VMAXCPSYrr                  $ymm0, $ymm1, implicit $mxcsr
+  $ymm0 = VMAXCPSZ256rr                        $ymm0, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm0 = VMAXPDYrm                   $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm0 = VMAXPDZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VMAXPDYrr                   $ymm0, $ymm1, implicit $mxcsr
+  $ymm0 = VMAXPDZ256rr                         $ymm0, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm0 = VMAXPSYrm                   $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm0 = VMAXPSZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VMAXPSYrr                   $ymm0, $ymm1, implicit $mxcsr
+  $ymm0 = VMAXPSZ256rr                         $ymm0, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm0 = VMINCPDYrm                  $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm0 = VMINCPDZ256rm                        $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VMINCPDYrr                  $ymm0, $ymm1, implicit $mxcsr
+  $ymm0 = VMINCPDZ256rr                        $ymm0, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm0 = VMINCPSYrm                  $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm0 = VMINCPSZ256rm                        $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VMINCPSYrr                  $ymm0, $ymm1, implicit $mxcsr
+  $ymm0 = VMINCPSZ256rr                        $ymm0, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm0 = VMINPDYrm                   $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm0 = VMINPDZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VMINPDYrr                   $ymm0, $ymm1, implicit $mxcsr
+  $ymm0 = VMINPDZ256rr                         $ymm0, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm0 = VMINPSYrm                   $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm0 = VMINPSZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VMINPSYrr                   $ymm0, $ymm1, implicit $mxcsr
+  $ymm0 = VMINPSZ256rr                         $ymm0, $ymm1, implicit $mxcsr
   ; CHECK: $ymm0 = VXORPDYrm                   $ymm0, $rip, 1, $rax, 0, $noreg
   $ymm0 = VXORPDZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm0 = VXORPDYrr                   $ymm0, $ymm1
@@ -419,14 +419,14 @@ body: |
   $ymm0 = VUNPCKLPSZ256rm                      $ymm0, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm0 = VUNPCKLPSYrr                $ymm0, $ymm1
   $ymm0 = VUNPCKLPSZ256rr                      $ymm0, $ymm1                                   
-  ; CHECK: $ymm0 = VSUBPDYrm                   $ymm0, $rip, 1, $rax, 0, $noreg
-  $ymm0 = VSUBPDZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm0 = VSUBPDYrr                   $ymm0, $ymm1 
-  $ymm0 = VSUBPDZ256rr                         $ymm0, $ymm1                                   
-  ; CHECK: $ymm0 = VSUBPSYrm                   $ymm0, $rip, 1, $rax, 0, $noreg
-  $ymm0 = VSUBPSZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm0 = VSUBPSYrr                   $ymm0, $ymm1                               
-  $ymm0 = VSUBPSZ256rr                         $ymm0, $ymm1                                   
+  ; CHECK: $ymm0 = VSUBPDYrm                   $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm0 = VSUBPDZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VSUBPDYrr                   $ymm0, $ymm1, implicit $mxcsr
+  $ymm0 = VSUBPDZ256rr                         $ymm0, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm0 = VSUBPSYrm                   $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm0 = VSUBPSZ256rm                         $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VSUBPSYrr                   $ymm0, $ymm1, implicit $mxcsr
+  $ymm0 = VSUBPSZ256rr                         $ymm0, $ymm1, implicit $mxcsr
   ; CHECK: $ymm0 = VPUNPCKHBWYrm               $ymm0, $rip, 1, $rax, 0, $noreg
   $ymm0 = VPUNPCKHBWZ256rm                     $ymm0, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm0 = VPUNPCKHBWYrr               $ymm0, $ymm1
@@ -459,150 +459,150 @@ body: |
   $ymm0 = VPUNPCKLWDZ256rm                     $ymm0, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm0 = VPUNPCKLWDYrr               $ymm0, $ymm1                               
   $ymm0 = VPUNPCKLWDZ256rr                     $ymm0, $ymm1                                                
-  ; CHECK: $ymm0 = VFMADD132PDYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMADD132PDZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMADD132PDYr               $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMADD132PDZ256r                     $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMADD132PSYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMADD132PSZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMADD132PSYr               $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMADD132PSZ256r                     $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMADD213PDYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMADD213PDZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMADD213PDYr               $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMADD213PDZ256r                     $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMADD213PSYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMADD213PSZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMADD213PSYr               $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMADD213PSZ256r                     $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMADD231PDYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMADD231PDZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMADD231PDYr               $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMADD231PDZ256r                     $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMADD231PSYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMADD231PSZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMADD231PSYr               $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMADD231PSZ256r                     $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMADDSUB132PDYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMADDSUB132PDZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMADDSUB132PDYr            $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMADDSUB132PDZ256r                  $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMADDSUB132PSYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMADDSUB132PSZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMADDSUB132PSYr            $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMADDSUB132PSZ256r                  $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMADDSUB213PDYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMADDSUB213PDZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMADDSUB213PDYr            $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMADDSUB213PDZ256r                  $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMADDSUB213PSYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMADDSUB213PSZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMADDSUB213PSYr            $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMADDSUB213PSZ256r                  $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMADDSUB231PDYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMADDSUB231PDZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMADDSUB231PDYr            $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMADDSUB231PDZ256r                  $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMADDSUB231PSYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMADDSUB231PSZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMADDSUB231PSYr            $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMADDSUB231PSZ256r                  $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMSUB132PDYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMSUB132PDZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMSUB132PDYr               $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMSUB132PDZ256r                     $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMSUB132PSYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMSUB132PSZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMSUB132PSYr               $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMSUB132PSZ256r                     $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMSUB213PDYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMSUB213PDZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMSUB213PDYr               $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMSUB213PDZ256r                     $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMSUB213PSYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMSUB213PSZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMSUB213PSYr               $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMSUB213PSZ256r                     $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMSUB231PDYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMSUB231PDZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMSUB231PDYr               $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMSUB231PDZ256r                     $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMSUB231PSYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMSUB231PSZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMSUB231PSYr               $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMSUB231PSZ256r                     $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMSUBADD132PDYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMSUBADD132PDZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMSUBADD132PDYr            $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMSUBADD132PDZ256r                  $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMSUBADD132PSYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMSUBADD132PSZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMSUBADD132PSYr            $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMSUBADD132PSZ256r                  $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMSUBADD213PDYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMSUBADD213PDZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMSUBADD213PDYr            $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMSUBADD213PDZ256r                  $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMSUBADD213PSYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMSUBADD213PSZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMSUBADD213PSYr            $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMSUBADD213PSZ256r                  $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMSUBADD231PDYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMSUBADD231PDZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMSUBADD231PDYr            $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMSUBADD231PDZ256r                  $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFMSUBADD231PSYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFMSUBADD231PSZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFMSUBADD231PSYr            $ymm0, $ymm1, $ymm2
-  $ymm0 = VFMSUBADD231PSZ256r                  $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFNMADD132PDYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFNMADD132PDZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFNMADD132PDYr              $ymm0, $ymm1, $ymm2
-  $ymm0 = VFNMADD132PDZ256r                    $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFNMADD132PSYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFNMADD132PSZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFNMADD132PSYr              $ymm0, $ymm1, $ymm2
-  $ymm0 = VFNMADD132PSZ256r                    $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFNMADD213PDYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFNMADD213PDZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFNMADD213PDYr              $ymm0, $ymm1, $ymm2
-  $ymm0 = VFNMADD213PDZ256r                    $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFNMADD213PSYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFNMADD213PSZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFNMADD213PSYr              $ymm0, $ymm1, $ymm2
-  $ymm0 = VFNMADD213PSZ256r                    $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFNMADD231PDYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFNMADD231PDZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFNMADD231PDYr              $ymm0, $ymm1, $ymm2
-  $ymm0 = VFNMADD231PDZ256r                    $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFNMADD231PSYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFNMADD231PSZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFNMADD231PSYr              $ymm0, $ymm1, $ymm2
-  $ymm0 = VFNMADD231PSZ256r                    $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFNMSUB132PDYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFNMSUB132PDZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFNMSUB132PDYr              $ymm0, $ymm1, $ymm2
-  $ymm0 = VFNMSUB132PDZ256r                    $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFNMSUB132PSYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFNMSUB132PSZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFNMSUB132PSYr              $ymm0, $ymm1, $ymm2
-  $ymm0 = VFNMSUB132PSZ256r                    $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFNMSUB213PDYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFNMSUB213PDZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFNMSUB213PDYr              $ymm0, $ymm1, $ymm2
-  $ymm0 = VFNMSUB213PDZ256r                    $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFNMSUB213PSYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFNMSUB213PSZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFNMSUB213PSYr              $ymm0, $ymm1, $ymm2
-  $ymm0 = VFNMSUB213PSZ256r                    $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFNMSUB231PDYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFNMSUB231PDZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFNMSUB231PDYr              $ymm0, $ymm1, $ymm2
-  $ymm0 = VFNMSUB231PDZ256r                    $ymm0, $ymm1, $ymm2                            
-  ; CHECK: $ymm0 = VFNMSUB231PSYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg
-  $ymm0 = VFNMSUB231PSZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg                 
-  ; CHECK: $ymm0 = VFNMSUB231PSYr              $ymm0, $ymm1, $ymm2                        
-  $ymm0 = VFNMSUB231PSZ256r                    $ymm0, $ymm1, $ymm2                                               
+  ; CHECK: $ymm0 = VFMADD132PDYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMADD132PDZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADD132PDYr               $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMADD132PDZ256r                     $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADD132PSYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMADD132PSZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADD132PSYr               $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMADD132PSZ256r                     $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADD213PDYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMADD213PDZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADD213PDYr               $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMADD213PDZ256r                     $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADD213PSYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMADD213PSZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADD213PSYr               $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMADD213PSZ256r                     $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADD231PDYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMADD231PDZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADD231PDYr               $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMADD231PDZ256r                     $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADD231PSYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMADD231PSZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADD231PSYr               $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMADD231PSZ256r                     $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADDSUB132PDYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMADDSUB132PDZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADDSUB132PDYr            $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMADDSUB132PDZ256r                  $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADDSUB132PSYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMADDSUB132PSZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADDSUB132PSYr            $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMADDSUB132PSZ256r                  $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADDSUB213PDYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMADDSUB213PDZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADDSUB213PDYr            $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMADDSUB213PDZ256r                  $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADDSUB213PSYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMADDSUB213PSZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADDSUB213PSYr            $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMADDSUB213PSZ256r                  $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADDSUB231PDYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMADDSUB231PDZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADDSUB231PDYr            $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMADDSUB231PDZ256r                  $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADDSUB231PSYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMADDSUB231PSZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMADDSUB231PSYr            $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMADDSUB231PSZ256r                  $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUB132PDYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMSUB132PDZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUB132PDYr               $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMSUB132PDZ256r                     $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUB132PSYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMSUB132PSZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUB132PSYr               $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMSUB132PSZ256r                     $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUB213PDYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMSUB213PDZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUB213PDYr               $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMSUB213PDZ256r                     $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUB213PSYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMSUB213PSZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUB213PSYr               $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMSUB213PSZ256r                     $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUB231PDYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMSUB231PDZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUB231PDYr               $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMSUB231PDZ256r                     $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUB231PSYm               $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMSUB231PSZ256m                     $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUB231PSYr               $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMSUB231PSZ256r                     $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUBADD132PDYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMSUBADD132PDZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUBADD132PDYr            $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMSUBADD132PDZ256r                  $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUBADD132PSYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMSUBADD132PSZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUBADD132PSYr            $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMSUBADD132PSZ256r                  $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUBADD213PDYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMSUBADD213PDZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUBADD213PDYr            $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMSUBADD213PDZ256r                  $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUBADD213PSYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMSUBADD213PSZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUBADD213PSYr            $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMSUBADD213PSZ256r                  $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUBADD231PDYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMSUBADD231PDZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUBADD231PDYr            $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMSUBADD231PDZ256r                  $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUBADD231PSYm            $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFMSUBADD231PSZ256m                  $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFMSUBADD231PSYr            $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFMSUBADD231PSZ256r                  $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMADD132PDYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFNMADD132PDZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMADD132PDYr              $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFNMADD132PDZ256r                    $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMADD132PSYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFNMADD132PSZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMADD132PSYr              $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFNMADD132PSZ256r                    $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMADD213PDYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFNMADD213PDZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMADD213PDYr              $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFNMADD213PDZ256r                    $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMADD213PSYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFNMADD213PSZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMADD213PSYr              $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFNMADD213PSZ256r                    $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMADD231PDYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFNMADD231PDZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMADD231PDYr              $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFNMADD231PDZ256r                    $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMADD231PSYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFNMADD231PSZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMADD231PSYr              $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFNMADD231PSZ256r                    $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMSUB132PDYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFNMSUB132PDZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMSUB132PDYr              $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFNMSUB132PDZ256r                    $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMSUB132PSYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFNMSUB132PSZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMSUB132PSYr              $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFNMSUB132PSZ256r                    $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMSUB213PDYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFNMSUB213PDZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMSUB213PDYr              $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFNMSUB213PDZ256r                    $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMSUB213PSYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFNMSUB213PSZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMSUB213PSYr              $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFNMSUB213PSZ256r                    $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMSUB231PDYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFNMSUB231PDZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMSUB231PDYr              $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFNMSUB231PDZ256r                    $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMSUB231PSYm              $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VFNMSUB231PSZ256m                    $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VFNMSUB231PSYr              $ymm0, $ymm1, $ymm2, implicit $mxcsr
+  $ymm0 = VFNMSUB231PSZ256r                    $ymm0, $ymm1, $ymm2, implicit $mxcsr
   ; CHECK: $ymm0 = VPSRADYri                   $ymm0, 7
   $ymm0 = VPSRADZ256ri                         $ymm0, 7                                       
   ; CHECK: $ymm0 = VPSRADYrm                   $ymm0, $rip, 1, $rax, 0, $noreg
@@ -811,50 +811,50 @@ body: |
   $ymm0 = VCVTDQ2PDZ256rm                      $rdi, 1, $noreg, 0, $noreg
   ; CHECK: $ymm0 = VCVTDQ2PDYrr                $xmm0
   $ymm0 = VCVTDQ2PDZ256rr                      $xmm0                                          
-  ; CHECK: $ymm0 = VCVTDQ2PSYrm                $rdi, 1, $noreg, 0, $noreg
-  $ymm0 = VCVTDQ2PSZ256rm                      $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $ymm0 = VCVTDQ2PSYrr                $ymm0
-  $ymm0 = VCVTDQ2PSZ256rr                      $ymm0                                          
-  ; CHECK: $xmm0 = VCVTPD2DQYrm                $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTPD2DQZ256rm                      $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VCVTPD2DQYrr                $ymm0
-  $xmm0 = VCVTPD2DQZ256rr                      $ymm0                                          
-  ; CHECK: $xmm0 = VCVTPD2PSYrm                $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTPD2PSZ256rm                      $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VCVTPD2PSYrr                $ymm0
-  $xmm0 = VCVTPD2PSZ256rr                      $ymm0                                          
-  ; CHECK: $ymm0 = VCVTPS2DQYrm                $rdi, 1, $noreg, 0, $noreg
-  $ymm0 = VCVTPS2DQZ256rm                      $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $ymm0 = VCVTPS2DQYrr                $ymm0  
-  $ymm0 = VCVTPS2DQZ256rr                      $ymm0                                          
-  ; CHECK: $ymm0 = VCVTPS2PDYrm                $rdi, 1, $noreg, 0, $noreg
-  $ymm0 = VCVTPS2PDZ256rm                      $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $ymm0 = VCVTPS2PDYrr                $xmm0                                      
-  $ymm0 = VCVTPS2PDZ256rr                      $xmm0                                               
-  ; CHECK: VCVTPS2PHYmr                        $rdi, 1, $noreg, 0, $noreg, $ymm0, 0
-  VCVTPS2PHZ256mr                              $rdi, 1, $noreg, 0, $noreg, $ymm0, 0
-  ; CHECK: $xmm0 = VCVTPS2PHYrr                $ymm0, 0
-  $xmm0 = VCVTPS2PHZ256rr                      $ymm0, 0
-  ; CHECK: $ymm0 = VCVTPH2PSYrm                $rdi, 1, $noreg, 0, $noreg
-  $ymm0 = VCVTPH2PSZ256rm                      $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $ymm0 = VCVTPH2PSYrr                $xmm0      
-  $ymm0 = VCVTPH2PSZ256rr                      $xmm0                                          
-  ; CHECK: $xmm0 = VCVTTPD2DQYrm               $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTTPD2DQZ256rm                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VCVTTPD2DQYrr               $ymm0
-  $xmm0 = VCVTTPD2DQZ256rr                     $ymm0                                          
-  ; CHECK: $ymm0 = VCVTTPS2DQYrm               $rdi, 1, $noreg, 0, $noreg
-  $ymm0 = VCVTTPS2DQZ256rm                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $ymm0 = VCVTTPS2DQYrr               $ymm0                                      
-  $ymm0 = VCVTTPS2DQZ256rr                     $ymm0                                               
-  ; CHECK: $ymm0 = VSQRTPDYm                   $rdi, 1, $noreg, 0, $noreg
-  $ymm0 = VSQRTPDZ256m                         $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $ymm0 = VSQRTPDYr                   $ymm0
-  $ymm0 = VSQRTPDZ256r                         $ymm0                                          
-  ; CHECK: $ymm0 = VSQRTPSYm                   $rdi, 1, $noreg, 0, $noreg
-  $ymm0 = VSQRTPSZ256m                         $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $ymm0 = VSQRTPSYr                   $ymm0                                      
-  $ymm0 = VSQRTPSZ256r                         $ymm0                                                 
+  ; CHECK: $ymm0 = VCVTDQ2PSYrm                $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VCVTDQ2PSZ256rm                      $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VCVTDQ2PSYrr                $ymm0, implicit $mxcsr
+  $ymm0 = VCVTDQ2PSZ256rr                      $ymm0, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTPD2DQYrm                $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTPD2DQZ256rm                      $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTPD2DQYrr                $ymm0, implicit $mxcsr
+  $xmm0 = VCVTPD2DQZ256rr                      $ymm0, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTPD2PSYrm                $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTPD2PSZ256rm                      $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTPD2PSYrr                $ymm0, implicit $mxcsr
+  $xmm0 = VCVTPD2PSZ256rr                      $ymm0, implicit $mxcsr
+  ; CHECK: $ymm0 = VCVTPS2DQYrm                $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VCVTPS2DQZ256rm                      $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VCVTPS2DQYrr                $ymm0, implicit $mxcsr
+  $ymm0 = VCVTPS2DQZ256rr                      $ymm0, implicit $mxcsr
+  ; CHECK: $ymm0 = VCVTPS2PDYrm                $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VCVTPS2PDZ256rm                      $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VCVTPS2PDYrr                $xmm0, implicit $mxcsr
+  $ymm0 = VCVTPS2PDZ256rr                      $xmm0, implicit $mxcsr
+  ; CHECK: VCVTPS2PHYmr                        $rdi, 1, $noreg, 0, $noreg, $ymm0, 0, implicit $mxcsr
+  VCVTPS2PHZ256mr                              $rdi, 1, $noreg, 0, $noreg, $ymm0, 0, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTPS2PHYrr                $ymm0, 0, implicit $mxcsr
+  $xmm0 = VCVTPS2PHZ256rr                      $ymm0, 0, implicit $mxcsr
+  ; CHECK: $ymm0 = VCVTPH2PSYrm                $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VCVTPH2PSZ256rm                      $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VCVTPH2PSYrr                $xmm0, implicit $mxcsr
+  $ymm0 = VCVTPH2PSZ256rr                      $xmm0, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTTPD2DQYrm               $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTTPD2DQZ256rm                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTTPD2DQYrr               $ymm0, implicit $mxcsr
+  $xmm0 = VCVTTPD2DQZ256rr                     $ymm0, implicit $mxcsr
+  ; CHECK: $ymm0 = VCVTTPS2DQYrm               $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VCVTTPS2DQZ256rm                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VCVTTPS2DQYrr               $ymm0, implicit $mxcsr
+  $ymm0 = VCVTTPS2DQZ256rr                     $ymm0, implicit $mxcsr
+  ; CHECK: $ymm0 = VSQRTPDYm                   $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VSQRTPDZ256m                         $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VSQRTPDYr                   $ymm0, implicit $mxcsr
+  $ymm0 = VSQRTPDZ256r                         $ymm0, implicit $mxcsr
+  ; CHECK: $ymm0 = VSQRTPSYm                   $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm0 = VSQRTPSZ256m                         $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm0 = VSQRTPSYr                   $ymm0, implicit $mxcsr
+  $ymm0 = VSQRTPSZ256r                         $ymm0, implicit $mxcsr
   ; CHECK: $ymm0 = VPALIGNRYrmi                $ymm0, $rdi, 1, $noreg, 0, $noreg, 1
   $ymm0 = VPALIGNRZ256rmi                      $ymm0, $rdi, 1, $noreg, 0, $noreg, 1
   ; CHECK: $ymm0 = VPALIGNRYrri                $ymm0, $ymm1, 1
@@ -889,14 +889,14 @@ body: |
   $ymm0 = VSHUFPSZ256rmi                       $ymm0, $rdi, 1, $noreg, 0, $noreg, -24
   ; CHECK: $ymm0 = VSHUFPSYrri                 $ymm0, $ymm1, -24
   $ymm0 = VSHUFPSZ256rri                       $ymm0, $ymm1, -24
-  ; CHECK: $ymm0 = VROUNDPDYm                  $rip, 1, $rax, 0, $noreg, 15
-  $ymm0 = VRNDSCALEPDZ256rmi                   $rip, 1, $rax, 0, $noreg, 15
-  ; CHECK: $ymm0 = VROUNDPDYr                  $ymm0, 15
-  $ymm0 = VRNDSCALEPDZ256rri                   $ymm0, 15
-  ; CHECK: $ymm0 = VROUNDPSYm                  $rip, 1, $rax, 0, $noreg, 15
-  $ymm0 = VRNDSCALEPSZ256rmi                   $rip, 1, $rax, 0, $noreg, 15
-  ; CHECK: $ymm0 = VROUNDPSYr                  $ymm0, 15
-  $ymm0 = VRNDSCALEPSZ256rri                   $ymm0, 15
+  ; CHECK: $ymm0 = VROUNDPDYm                  $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  $ymm0 = VRNDSCALEPDZ256rmi                   $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  ; CHECK: $ymm0 = VROUNDPDYr                  $ymm0, 15, implicit $mxcsr
+  $ymm0 = VRNDSCALEPDZ256rri                   $ymm0, 15, implicit $mxcsr
+  ; CHECK: $ymm0 = VROUNDPSYm                  $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  $ymm0 = VRNDSCALEPSZ256rmi                   $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  ; CHECK: $ymm0 = VROUNDPSYr                  $ymm0, 15, implicit $mxcsr
+  $ymm0 = VRNDSCALEPSZ256rri                   $ymm0, 15, implicit $mxcsr
   ; CHECK: $ymm0 = VPERM2F128rm                $ymm0, $rip, 1, $rax, 0, $noreg, 32
   $ymm0 = VSHUFF32X4Z256rmi                    $ymm0, $rip, 1, $rax, 0, $noreg, 228
   ; CHECK: $ymm0 = VPERM2F128rr                $ymm0, $ymm1, 32
@@ -1075,46 +1075,46 @@ body: |
   VMOVLPSZ128mr                                $rdi, 1, $noreg, 0, $noreg, $xmm0                             
   ; CHECK: $xmm0 = VMOVLPSrm                   $xmm0,  $rdi, 1, $noreg, 0, $noreg                
   $xmm0 = VMOVLPSZ128rm                        $xmm0,  $rdi, 1, $noreg, 0, $noreg                                               
-  ; CHECK: $xmm0 = VMAXCPDrm                   $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMAXCPDZ128rm                        $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMAXCPDrr                   $xmm0, $xmm1  
-  $xmm0 = VMAXCPDZ128rr                        $xmm0, $xmm1                                        
-  ; CHECK: $xmm0 = VMAXCPSrm                   $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMAXCPSZ128rm                        $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMAXCPSrr                   $xmm0, $xmm1
-  $xmm0 = VMAXCPSZ128rr                        $xmm0, $xmm1                                        
-  ; CHECK: $xmm0 = VMAXPDrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMAXPDZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMAXPDrr                    $xmm0, $xmm1
-  $xmm0 = VMAXPDZ128rr                         $xmm0, $xmm1                                        
-  ; CHECK: $xmm0 = VMAXPSrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMAXPSZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMAXPSrr                    $xmm0, $xmm1
-  $xmm0 = VMAXPSZ128rr                         $xmm0, $xmm1                                        
-  ; CHECK: $xmm0 = VMINCPDrm                   $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMINCPDZ128rm                        $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMINCPDrr                   $xmm0, $xmm1  
-  $xmm0 = VMINCPDZ128rr                        $xmm0, $xmm1                                        
-  ; CHECK: $xmm0 = VMINCPSrm                   $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMINCPSZ128rm                        $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMINCPSrr                   $xmm0, $xmm1
-  $xmm0 = VMINCPSZ128rr                        $xmm0, $xmm1                                        
-  ; CHECK: $xmm0 = VMINPDrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMINPDZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMINPDrr                    $xmm0, $xmm1
-  $xmm0 = VMINPDZ128rr                         $xmm0, $xmm1                                        
-  ; CHECK: $xmm0 = VMINPSrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMINPSZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMINPSrr                    $xmm0, $xmm1
-  $xmm0 = VMINPSZ128rr                         $xmm0, $xmm1                                        
-  ; CHECK: $xmm0 = VMULPDrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMULPDZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMULPDrr                    $xmm0, $xmm1
-  $xmm0 = VMULPDZ128rr                         $xmm0, $xmm1                                        
-  ; CHECK: $xmm0 = VMULPSrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMULPSZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMULPSrr                    $xmm0, $xmm1
-  $xmm0 = VMULPSZ128rr                         $xmm0, $xmm1                                        
+  ; CHECK: $xmm0 = VMAXCPDrm                   $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMAXCPDZ128rm                        $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXCPDrr                   $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMAXCPDZ128rr                        $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXCPSrm                   $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMAXCPSZ128rm                        $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXCPSrr                   $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMAXCPSZ128rr                        $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXPDrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMAXPDZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXPDrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMAXPDZ128rr                         $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXPSrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMAXPSZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXPSrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMAXPSZ128rr                         $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINCPDrm                   $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMINCPDZ128rm                        $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINCPDrr                   $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMINCPDZ128rr                        $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINCPSrm                   $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMINCPSZ128rm                        $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINCPSrr                   $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMINCPSZ128rr                        $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINPDrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMINPDZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINPDrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMINPDZ128rr                         $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINPSrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMINPSZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINPSrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMINPSZ128rr                         $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMULPDrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMULPDZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMULPDrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMULPDZ128rr                         $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMULPSrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMULPSZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMULPSrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMULPSZ128rr                         $xmm0, $xmm1, implicit $mxcsr
   ; CHECK: $xmm0 = VORPDrm                     $xmm0, $rip, 1, $rax, 0, $noreg
   $xmm0 = VORPDZ128rm                          $xmm0, $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm0 = VORPDrr                     $xmm0, $xmm1
@@ -1295,14 +1295,14 @@ body: |
   $xmm0 = VPSUBWZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm0 = VPSUBWrr                    $xmm0, $xmm1                            
   $xmm0 = VPSUBWZ128rr                         $xmm0, $xmm1                                        
-  ; CHECK: $xmm0 = VADDPDrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VADDPDZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VADDPDrr                    $xmm0, $xmm1  
-  $xmm0 = VADDPDZ128rr                         $xmm0, $xmm1                                        
-  ; CHECK: $xmm0 = VADDPSrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VADDPSZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VADDPSrr                    $xmm0, $xmm1
-  $xmm0 = VADDPSZ128rr                         $xmm0, $xmm1                                        
+  ; CHECK: $xmm0 = VADDPDrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VADDPDZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VADDPDrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VADDPDZ128rr                         $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VADDPSrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VADDPSZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VADDPSrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VADDPSZ128rr                         $xmm0, $xmm1, implicit $mxcsr
   ; CHECK: $xmm0 = VANDNPDrm                   $xmm0, $rip, 1, $rax, 0, $noreg
   $xmm0 = VANDNPDZ128rm                        $xmm0, $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm0 = VANDNPDrr                   $xmm0, $xmm1
@@ -1319,14 +1319,14 @@ body: |
   $xmm0 = VANDPSZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm0 = VANDPSrr                    $xmm0, $xmm1
   $xmm0 = VANDPSZ128rr                         $xmm0, $xmm1                                        
-  ; CHECK: $xmm0 = VDIVPDrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VDIVPDZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VDIVPDrr                    $xmm0, $xmm1
-  $xmm0 = VDIVPDZ128rr                         $xmm0, $xmm1                                        
-  ; CHECK: $xmm0 = VDIVPSrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VDIVPSZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VDIVPSrr                    $xmm0, $xmm1
-  $xmm0 = VDIVPSZ128rr                         $xmm0, $xmm1                                        
+  ; CHECK: $xmm0 = VDIVPDrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VDIVPDZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VDIVPDrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VDIVPDZ128rr                         $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VDIVPSrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VDIVPSZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VDIVPSrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VDIVPSZ128rr                         $xmm0, $xmm1, implicit $mxcsr
   ; CHECK: $xmm0 = VPXORrm                     $xmm0, $rip, 1, $rax, 0, $noreg
   $xmm0 = VPXORDZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm0 = VPXORrr                     $xmm0, $xmm1
@@ -1335,14 +1335,14 @@ body: |
   $xmm0 = VPXORQZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm0 = VPXORrr                     $xmm0, $xmm1
   $xmm0 = VPXORQZ128rr                         $xmm0, $xmm1                                        
-  ; CHECK: $xmm0 = VSUBPDrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VSUBPDZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VSUBPDrr                    $xmm0, $xmm1
-  $xmm0 = VSUBPDZ128rr                         $xmm0, $xmm1                                        
-  ; CHECK: $xmm0 = VSUBPSrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VSUBPSZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VSUBPSrr                    $xmm0, $xmm1                  
-  $xmm0 = VSUBPSZ128rr                         $xmm0, $xmm1                                        
+  ; CHECK: $xmm0 = VSUBPDrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VSUBPDZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VSUBPDrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VSUBPDZ128rr                         $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VSUBPSrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VSUBPSZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VSUBPSrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VSUBPSZ128rr                         $xmm0, $xmm1, implicit $mxcsr
   ; CHECK: $xmm0 = VXORPDrm                    $xmm0, $rip, 1, $rax, 0, $noreg
   $xmm0 = VXORPDZ128rm                         $xmm0, $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm0 = VXORPDrr                    $xmm0, $xmm1
@@ -1423,150 +1423,150 @@ body: |
   $xmm0 = VUNPCKLPSZ128rm                      $xmm0, $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm0 = VUNPCKLPSrr                 $xmm0, $xmm1                            
   $xmm0 = VUNPCKLPSZ128rr                      $xmm0, $xmm1                                                                                              
-  ; CHECK: $xmm0 = VFMADD132PDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD132PDZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMADD132PDr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD132PDZ128r                     $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMADD132PSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD132PSZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMADD132PSr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD132PSZ128r                     $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMADD213PDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD213PDZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMADD213PDr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD213PDZ128r                     $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMADD213PSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD213PSZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMADD213PSr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD213PSZ128r                     $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMADD231PDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD231PDZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMADD231PDr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD231PDZ128r                     $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMADD231PSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD231PSZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMADD231PSr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD231PSZ128r                     $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMADDSUB132PDm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADDSUB132PDZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMADDSUB132PDr             $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADDSUB132PDZ128r                  $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMADDSUB132PSm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADDSUB132PSZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMADDSUB132PSr             $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADDSUB132PSZ128r                  $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMADDSUB213PDm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADDSUB213PDZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMADDSUB213PDr             $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADDSUB213PDZ128r                  $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMADDSUB213PSm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADDSUB213PSZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMADDSUB213PSr             $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADDSUB213PSZ128r                  $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMADDSUB231PDm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADDSUB231PDZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMADDSUB231PDr             $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADDSUB231PDZ128r                  $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMADDSUB231PSm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADDSUB231PSZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMADDSUB231PSr             $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADDSUB231PSZ128r                  $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMSUB132PDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB132PDZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMSUB132PDr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB132PDZ128r                     $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMSUB132PSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB132PSZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMSUB132PSr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB132PSZ128r                     $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMSUB213PDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB213PDZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMSUB213PDr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB213PDZ128r                     $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMSUB213PSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB213PSZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMSUB213PSr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB213PSZ128r                     $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMSUB231PDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB231PDZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMSUB231PDr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB231PDZ128r                     $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMSUB231PSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB231PSZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMSUB231PSr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB231PSZ128r                     $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMSUBADD132PDm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUBADD132PDZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMSUBADD132PDr             $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUBADD132PDZ128r                  $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMSUBADD132PSm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUBADD132PSZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMSUBADD132PSr             $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUBADD132PSZ128r                  $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMSUBADD213PDm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUBADD213PDZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMSUBADD213PDr             $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUBADD213PDZ128r                  $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMSUBADD213PSm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUBADD213PSZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMSUBADD213PSr             $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUBADD213PSZ128r                  $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMSUBADD231PDm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUBADD231PDZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMSUBADD231PDr             $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUBADD231PDZ128r                  $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFMSUBADD231PSm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUBADD231PSZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFMSUBADD231PSr             $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUBADD231PSZ128r                  $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFNMADD132PDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD132PDZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFNMADD132PDr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD132PDZ128r                    $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFNMADD132PSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD132PSZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFNMADD132PSr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD132PSZ128r                    $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFNMADD213PDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD213PDZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFNMADD213PDr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD213PDZ128r                    $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFNMADD213PSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD213PSZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFNMADD213PSr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD213PSZ128r                    $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFNMADD231PDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD231PDZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFNMADD231PDr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD231PDZ128r                    $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFNMADD231PSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD231PSZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFNMADD231PSr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD231PSZ128r                    $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFNMSUB132PDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB132PDZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFNMSUB132PDr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMSUB132PDZ128r                    $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFNMSUB132PSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB132PSZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFNMSUB132PSr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMSUB132PSZ128r                    $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFNMSUB213PDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB213PDZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFNMSUB213PDr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMSUB213PDZ128r                    $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFNMSUB213PSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB213PSZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFNMSUB213PSr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMSUB213PSZ128r                    $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFNMSUB231PDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB231PDZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFNMSUB231PDr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMSUB231PDZ128r                    $xmm0, $xmm1, $xmm2                                 
-  ; CHECK: $xmm0 = VFNMSUB231PSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB231PSZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                      
-  ; CHECK: $xmm0 = VFNMSUB231PSr               $xmm0, $xmm1, $xmm2                     
-  $xmm0 = VFNMSUB231PSZ128r                    $xmm0, $xmm1, $xmm2                                               
+  ; CHECK: $xmm0 = VFMADD132PDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD132PDZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD132PDr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD132PDZ128r                     $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD132PSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD132PSZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD132PSr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD132PSZ128r                     $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD213PDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD213PDZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD213PDr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD213PDZ128r                     $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD213PSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD213PSZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD213PSr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD213PSZ128r                     $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD231PDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD231PDZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD231PDr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD231PDZ128r                     $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD231PSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD231PSZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD231PSr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD231PSZ128r                     $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADDSUB132PDm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADDSUB132PDZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADDSUB132PDr             $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADDSUB132PDZ128r                  $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADDSUB132PSm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADDSUB132PSZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADDSUB132PSr             $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADDSUB132PSZ128r                  $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADDSUB213PDm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADDSUB213PDZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADDSUB213PDr             $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADDSUB213PDZ128r                  $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADDSUB213PSm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADDSUB213PSZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADDSUB213PSr             $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADDSUB213PSZ128r                  $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADDSUB231PDm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADDSUB231PDZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADDSUB231PDr             $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADDSUB231PDZ128r                  $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADDSUB231PSm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADDSUB231PSZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADDSUB231PSr             $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADDSUB231PSZ128r                  $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB132PDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB132PDZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB132PDr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB132PDZ128r                     $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB132PSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB132PSZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB132PSr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB132PSZ128r                     $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB213PDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB213PDZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB213PDr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB213PDZ128r                     $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB213PSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB213PSZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB213PSr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB213PSZ128r                     $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB231PDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB231PDZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB231PDr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB231PDZ128r                     $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB231PSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB231PSZ128m                     $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB231PSr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB231PSZ128r                     $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUBADD132PDm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUBADD132PDZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUBADD132PDr             $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUBADD132PDZ128r                  $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUBADD132PSm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUBADD132PSZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUBADD132PSr             $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUBADD132PSZ128r                  $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUBADD213PDm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUBADD213PDZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUBADD213PDr             $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUBADD213PDZ128r                  $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUBADD213PSm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUBADD213PSZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUBADD213PSr             $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUBADD213PSZ128r                  $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUBADD231PDm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUBADD231PDZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUBADD231PDr             $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUBADD231PDZ128r                  $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUBADD231PSm             $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUBADD231PSZ128m                  $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUBADD231PSr             $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUBADD231PSZ128r                  $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD132PDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD132PDZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD132PDr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD132PDZ128r                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD132PSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD132PSZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD132PSr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD132PSZ128r                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD213PDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD213PDZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD213PDr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD213PDZ128r                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD213PSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD213PSZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD213PSr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD213PSZ128r                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD231PDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD231PDZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD231PDr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD231PDZ128r                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD231PSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD231PSZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD231PSr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD231PSZ128r                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB132PDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB132PDZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB132PDr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB132PDZ128r                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB132PSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB132PSZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB132PSr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB132PSZ128r                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB213PDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB213PDZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB213PDr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB213PDZ128r                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB213PSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB213PSZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB213PSr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB213PSZ128r                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB231PDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB231PDZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB231PDr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB231PDZ128r                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB231PSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB231PSZ128m                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB231PSr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB231PSZ128r                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
   ; CHECK: $xmm0 = VPSLLDri                    $xmm0, 7
   $xmm0 = VPSLLDZ128ri                         $xmm0, 7                                            
   ; CHECK: $xmm0 = VPSLLDrm                    $xmm0, $rip, 1, $rax, 0, $noreg
@@ -1653,50 +1653,50 @@ body: |
   $xmm0 = VPERMILPSZ128rm                      $xmm0, $rdi, 1, $noreg, 0, $noreg                             
   ; CHECK: $xmm0 = VPERMILPSrr                 $xmm0, $xmm1                            
   $xmm0 = VPERMILPSZ128rr                      $xmm0, $xmm1                                               
-  ; CHECK: $xmm0 = VCVTPH2PSrm                 $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTPH2PSZ128rm                      $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VCVTPH2PSrr                 $xmm0
-  $xmm0 = VCVTPH2PSZ128rr                      $xmm0                                               
+  ; CHECK: $xmm0 = VCVTPH2PSrm                 $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTPH2PSZ128rm                      $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTPH2PSrr                 $xmm0, implicit $mxcsr
+  $xmm0 = VCVTPH2PSZ128rr                      $xmm0, implicit $mxcsr
   ; CHECK: $xmm0 = VCVTDQ2PDrm                 $rdi, 1, $noreg, 0, $noreg
   $xmm0 = VCVTDQ2PDZ128rm                      $rdi, 1, $noreg, 0, $noreg
   ; CHECK: $xmm0 = VCVTDQ2PDrr                 $xmm0     
   $xmm0 = VCVTDQ2PDZ128rr                      $xmm0                                               
-  ; CHECK: $xmm0 = VCVTDQ2PSrm                 $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTDQ2PSZ128rm                      $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VCVTDQ2PSrr                 $xmm0   
-  $xmm0 = VCVTDQ2PSZ128rr                      $xmm0                                               
-  ; CHECK: $xmm0 = VCVTPD2DQrm                 $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTPD2DQZ128rm                      $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VCVTPD2DQrr                 $xmm0   
-  $xmm0 = VCVTPD2DQZ128rr                      $xmm0                                               
-  ; CHECK: $xmm0 = VCVTPD2PSrm                 $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTPD2PSZ128rm                      $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VCVTPD2PSrr                 $xmm0   
-  $xmm0 = VCVTPD2PSZ128rr                      $xmm0                                               
-  ; CHECK: $xmm0 = VCVTPS2DQrm                 $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTPS2DQZ128rm                      $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VCVTPS2DQrr                 $xmm0   
-  $xmm0 = VCVTPS2DQZ128rr                      $xmm0                                               
-  ; CHECK: $xmm0 = VCVTPS2PDrm                 $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTPS2PDZ128rm                      $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VCVTPS2PDrr                 $xmm0
-  $xmm0 = VCVTPS2PDZ128rr                      $xmm0                                               
-  ; CHECK: $xmm0 = VCVTTPD2DQrm                $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTTPD2DQZ128rm                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VCVTTPD2DQrr                $xmm0  
-  $xmm0 = VCVTTPD2DQZ128rr                     $xmm0                                               
-  ; CHECK: $xmm0 = VCVTTPS2DQrm                $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTTPS2DQZ128rm                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VCVTTPS2DQrr                $xmm0                                   
-  $xmm0 = VCVTTPS2DQZ128rr                     $xmm0                                               
-  ; CHECK: $xmm0 = VSQRTPDm                    $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VSQRTPDZ128m                         $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VSQRTPDr                    $xmm0
-  $xmm0 = VSQRTPDZ128r                         $xmm0                                               
-  ; CHECK: $xmm0 = VSQRTPSm                    $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VSQRTPSZ128m                         $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VSQRTPSr                    $xmm0                                   
-  $xmm0 = VSQRTPSZ128r                         $xmm0                                               
+  ; CHECK: $xmm0 = VCVTDQ2PSrm                 $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTDQ2PSZ128rm                      $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTDQ2PSrr                 $xmm0, implicit $mxcsr
+  $xmm0 = VCVTDQ2PSZ128rr                      $xmm0, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTPD2DQrm                 $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTPD2DQZ128rm                      $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTPD2DQrr                 $xmm0, implicit $mxcsr
+  $xmm0 = VCVTPD2DQZ128rr                      $xmm0, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTPD2PSrm                 $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTPD2PSZ128rm                      $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTPD2PSrr                 $xmm0, implicit $mxcsr
+  $xmm0 = VCVTPD2PSZ128rr                      $xmm0, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTPS2DQrm                 $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTPS2DQZ128rm                      $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTPS2DQrr                 $xmm0, implicit $mxcsr
+  $xmm0 = VCVTPS2DQZ128rr                      $xmm0, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTPS2PDrm                 $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTPS2PDZ128rm                      $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTPS2PDrr                 $xmm0, implicit $mxcsr
+  $xmm0 = VCVTPS2PDZ128rr                      $xmm0, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTTPD2DQrm                $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTTPD2DQZ128rm                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTTPD2DQrr                $xmm0, implicit $mxcsr
+  $xmm0 = VCVTTPD2DQZ128rr                     $xmm0, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTTPS2DQrm                $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTTPS2DQZ128rm                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTTPS2DQrr                $xmm0, implicit $mxcsr
+  $xmm0 = VCVTTPS2DQZ128rr                     $xmm0, implicit $mxcsr
+  ; CHECK: $xmm0 = VSQRTPDm                    $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VSQRTPDZ128m                         $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VSQRTPDr                    $xmm0, implicit $mxcsr
+  $xmm0 = VSQRTPDZ128r                         $xmm0, implicit $mxcsr
+  ; CHECK: $xmm0 = VSQRTPSm                    $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VSQRTPSZ128m                         $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VSQRTPSr                    $xmm0, implicit $mxcsr
+  $xmm0 = VSQRTPSZ128r                         $xmm0, implicit $mxcsr
   ; CHECK: $xmm0 = VMOVDDUPrm                  $rdi, 1, $noreg, 0, $noreg     
   $xmm0 = VMOVDDUPZ128rm                       $rdi, 1, $noreg, 0, $noreg                                    
   ; CHECK: $xmm0 = VMOVDDUPrr                  $xmm0    
@@ -1763,10 +1763,10 @@ body: |
   $xmm0 = VBROADCASTI32X2Z128m                 $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm0 = VPBROADCASTQrr              $xmm0
   $xmm0 = VBROADCASTI32X2Z128r                 $xmm0
-  ; CHECK: $xmm0 = VCVTPS2PHrr                 $xmm0, 2
-  $xmm0 = VCVTPS2PHZ128rr                      $xmm0, 2                                            
-  ; CHECK: VCVTPS2PHmr                         $rdi, 1, $noreg, 0, $noreg, $xmm0, 2
-  VCVTPS2PHZ128mr                              $rdi, 1, $noreg, 0, $noreg, $xmm0, 2
+  ; CHECK: $xmm0 = VCVTPS2PHrr                 $xmm0, 2, implicit $mxcsr
+  $xmm0 = VCVTPS2PHZ128rr                      $xmm0, 2, implicit $mxcsr
+  ; CHECK: VCVTPS2PHmr                         $rdi, 1, $noreg, 0, $noreg, $xmm0, 2, implicit $mxcsr
+  VCVTPS2PHZ128mr                              $rdi, 1, $noreg, 0, $noreg, $xmm0, 2, implicit $mxcsr
   ; CHECK: $xmm0 = VPABSBrm                    $rip, 1, $rax, 0, $noreg
   $xmm0 = VPABSBZ128rm                         $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm0 = VPABSBrr                    $xmm0
@@ -1791,14 +1791,14 @@ body: |
   $xmm0 = VALIGNQZ128rmi                       $xmm0, $rip, 1, $rax, 0, $noreg, 1
   ; CHECK: $xmm0 = VPALIGNRrri                 $xmm0, $xmm1, 8
   $xmm0 = VALIGNQZ128rri                       $xmm0, $xmm1, 1
-  ; CHECK: $xmm0 = VROUNDPDm                   $rip, 1, $rax, 0, $noreg, 15
-  $xmm0 = VRNDSCALEPDZ128rmi                   $rip, 1, $rax, 0, $noreg, 15
-  ; CHECK: $xmm0 = VROUNDPDr                   $xmm0, 15
-  $xmm0 = VRNDSCALEPDZ128rri                   $xmm0, 15
-  ; CHECK: $xmm0 = VROUNDPSm                   $rip, 1, $rax, 0, $noreg, 15
-  $xmm0 = VRNDSCALEPSZ128rmi                   $rip, 1, $rax, 0, $noreg, 15
-  ; CHECK: $xmm0 = VROUNDPSr                   $xmm0, 15
-  $xmm0 = VRNDSCALEPSZ128rri                   $xmm0, 15
+  ; CHECK: $xmm0 = VROUNDPDm                   $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  $xmm0 = VRNDSCALEPDZ128rmi                   $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  ; CHECK: $xmm0 = VROUNDPDr                   $xmm0, 15, implicit $mxcsr
+  $xmm0 = VRNDSCALEPDZ128rri                   $xmm0, 15, implicit $mxcsr
+  ; CHECK: $xmm0 = VROUNDPSm                   $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  $xmm0 = VRNDSCALEPSZ128rmi                   $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  ; CHECK: $xmm0 = VROUNDPSr                   $xmm0, 15, implicit $mxcsr
+  $xmm0 = VRNDSCALEPSZ128rri                   $xmm0, 15, implicit $mxcsr
 
       RET 0, $zmm0, $zmm1
 ...
@@ -1810,310 +1810,310 @@ name: evex_scalar_to_vex_test
 body: |
   bb.0:
 
-  ; CHECK: $xmm0 = VADDSDrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VADDSDZrm                            $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VADDSDrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VADDSDZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VADDSDrr                    $xmm0, $xmm1  
-  $xmm0 = VADDSDZrr                            $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VADDSDrr_Int                $xmm0, $xmm1
-  $xmm0 = VADDSDZrr_Int                        $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VADDSSrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VADDSSZrm                            $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VADDSSrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VADDSSZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VADDSSrr                    $xmm0, $xmm1
-  $xmm0 = VADDSSZrr                            $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VADDSSrr_Int                $xmm0, $xmm1
-  $xmm0 = VADDSSZrr_Int                        $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VDIVSDrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VDIVSDZrm                            $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VDIVSDrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VDIVSDZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VDIVSDrr                    $xmm0, $xmm1  
-  $xmm0 = VDIVSDZrr                            $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VDIVSDrr_Int                $xmm0, $xmm1
-  $xmm0 = VDIVSDZrr_Int                        $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VDIVSSrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VDIVSSZrm                            $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VDIVSSrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VDIVSSZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VDIVSSrr                    $xmm0, $xmm1
-  $xmm0 = VDIVSSZrr                            $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VDIVSSrr_Int                $xmm0, $xmm1
-  $xmm0 = VDIVSSZrr_Int                        $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VMAXCSDrm                   $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMAXCSDZrm                           $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMAXCSDrr                   $xmm0, $xmm1
-  $xmm0 = VMAXCSDZrr                           $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VMAXCSSrm                   $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMAXCSSZrm                           $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMAXCSSrr                   $xmm0, $xmm1
-  $xmm0 = VMAXCSSZrr                           $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VMAXSDrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMAXSDZrm                            $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMAXSDrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMAXSDZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMAXSDrr                    $xmm0, $xmm1
-  $xmm0 = VMAXSDZrr                            $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VMAXSDrr_Int                $xmm0, $xmm1
-  $xmm0 = VMAXSDZrr_Int                        $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VMAXSSrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMAXSSZrm                            $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMAXSSrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMAXSSZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMAXSSrr                    $xmm0, $xmm1
-  $xmm0 = VMAXSSZrr                            $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VMAXSSrr_Int                $xmm0, $xmm1
-  $xmm0 = VMAXSSZrr_Int                        $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VMINCSDrm                   $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMINCSDZrm                           $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMINCSDrr                   $xmm0, $xmm1
-  $xmm0 = VMINCSDZrr                           $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VMINCSSrm                   $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMINCSSZrm                           $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMINCSSrr                   $xmm0, $xmm1
-  $xmm0 = VMINCSSZrr                           $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VMINSDrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMINSDZrm                            $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMINSDrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMINSDZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMINSDrr                    $xmm0, $xmm1
-  $xmm0 = VMINSDZrr                            $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VMINSDrr_Int                $xmm0, $xmm1
-  $xmm0 = VMINSDZrr_Int                        $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VMINSSrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMINSSZrm                            $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMINSSrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMINSSZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMINSSrr                    $xmm0, $xmm1
-  $xmm0 = VMINSSZrr                            $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VMINSSrr_Int                $xmm0, $xmm1
-  $xmm0 = VMINSSZrr_Int                        $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VMULSDrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMULSDZrm                            $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMULSDrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMULSDZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMULSDrr                    $xmm0, $xmm1
-  $xmm0 = VMULSDZrr                            $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VMULSDrr_Int                $xmm0, $xmm1
-  $xmm0 = VMULSDZrr_Int                        $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VMULSSrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMULSSZrm                            $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMULSSrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VMULSSZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VMULSSrr                    $xmm0, $xmm1  
-  $xmm0 = VMULSSZrr                            $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VMULSSrr_Int                $xmm0, $xmm1
-  $xmm0 = VMULSSZrr_Int                        $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VSUBSDrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VSUBSDZrm                            $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VSUBSDrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VSUBSDZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VSUBSDrr                    $xmm0, $xmm1  
-  $xmm0 = VSUBSDZrr                            $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VSUBSDrr_Int                $xmm0, $xmm1
-  $xmm0 = VSUBSDZrr_Int                        $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VSUBSSrm                    $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VSUBSSZrm                            $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VSUBSSrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg
-  $xmm0 = VSUBSSZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm0 = VSUBSSrr                    $xmm0, $xmm1
-  $xmm0 = VSUBSSZrr                            $xmm0, $xmm1                                            
-  ; CHECK: $xmm0 = VSUBSSrr_Int                $xmm0, $xmm1                                               
-  $xmm0 = VSUBSSZrr_Int                        $xmm0, $xmm1                                               
-  ; CHECK: $xmm0 = VFMADD132SDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD132SDZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMADD132SDm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD132SDZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMADD132SDr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD132SDZr                        $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMADD132SDr_Int            $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD132SDZr_Int                    $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMADD132SSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD132SSZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMADD132SSm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD132SSZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMADD132SSr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD132SSZr                        $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMADD132SSr_Int            $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD132SSZr_Int                    $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMADD213SDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD213SDZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMADD213SDm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD213SDZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMADD213SDr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD213SDZr                        $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMADD213SDr_Int            $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD213SDZr_Int                    $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMADD213SSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD213SSZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMADD213SSm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD213SSZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMADD213SSr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD213SSZr                        $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMADD213SSr_Int            $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD213SSZr_Int                    $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMADD231SDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD231SDZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMADD231SDm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD231SDZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMADD231SDr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD231SDZr                        $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMADD231SDr_Int            $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD231SDZr_Int                    $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMADD231SSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD231SSZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMADD231SSm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMADD231SSZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMADD231SSr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD231SSZr                        $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMADD231SSr_Int            $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMADD231SSZr_Int                    $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMSUB132SDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB132SDZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMSUB132SDm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB132SDZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMSUB132SDr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB132SDZr                        $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMSUB132SDr_Int            $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB132SDZr_Int                    $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMSUB132SSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB132SSZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMSUB132SSm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB132SSZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMSUB132SSr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB132SSZr                        $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMSUB132SSr_Int            $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB132SSZr_Int                    $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMSUB213SDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB213SDZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMSUB213SDm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB213SDZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMSUB213SDr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB213SDZr                        $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMSUB213SDr_Int            $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB213SDZr_Int                    $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMSUB213SSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB213SSZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMSUB213SSm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB213SSZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMSUB213SSr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB213SSZr                        $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMSUB213SSr_Int            $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB213SSZr_Int                    $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMSUB231SDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB231SDZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMSUB231SDm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB231SDZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMSUB231SDr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB231SDZr                        $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMSUB231SDr_Int            $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB231SDZr_Int                    $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMSUB231SSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB231SSZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMSUB231SSm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFMSUB231SSZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFMSUB231SSr                $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB231SSZr                        $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFMSUB231SSr_Int            $xmm0, $xmm1, $xmm2
-  $xmm0 = VFMSUB231SSZr_Int                    $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMADD132SDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD132SDZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMADD132SDm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD132SDZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMADD132SDr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD132SDZr                       $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMADD132SDr_Int           $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD132SDZr_Int                   $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMADD132SSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD132SSZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMADD132SSm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD132SSZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMADD132SSr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD132SSZr                       $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMADD132SSr_Int           $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD132SSZr_Int                   $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMADD213SDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD213SDZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMADD213SDm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD213SDZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMADD213SDr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD213SDZr                       $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMADD213SDr_Int           $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD213SDZr_Int                   $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMADD213SSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD213SSZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMADD213SSm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD213SSZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMADD213SSr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD213SSZr                       $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMADD213SSr_Int           $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD213SSZr_Int                   $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMADD231SDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD231SDZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMADD231SDm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD231SDZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMADD231SDr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD231SDZr                       $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMADD231SDr_Int           $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD231SDZr_Int                   $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMADD231SSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD231SSZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMADD231SSm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMADD231SSZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMADD231SSr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD231SSZr                       $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMADD231SSr_Int           $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMADD231SSZr_Int                   $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMSUB132SDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB132SDZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMSUB132SDm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB132SDZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMSUB132SDr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMSUB132SDZr                       $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMSUB132SDr_Int           $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMSUB132SDZr_Int                   $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMSUB132SSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB132SSZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMSUB132SSm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB132SSZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMSUB132SSr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMSUB132SSZr                       $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMSUB132SSr_Int           $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMSUB132SSZr_Int                   $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMSUB213SDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB213SDZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMSUB213SDm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB213SDZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMSUB213SDr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMSUB213SDZr                       $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMSUB213SDr_Int           $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMSUB213SDZr_Int                   $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMSUB213SSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB213SSZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMSUB213SSm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB213SSZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMSUB213SSr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMSUB213SSZr                       $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMSUB213SSr_Int           $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMSUB213SSZr_Int                   $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMSUB231SDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB231SDZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMSUB231SDm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB231SDZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMSUB231SDr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMSUB231SDZr                       $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMSUB231SDr_Int           $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMSUB231SDZr_Int                   $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMSUB231SSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB231SSZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMSUB231SSm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg
-  $xmm0 = VFNMSUB231SSZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm0 = VFNMSUB231SSr               $xmm0, $xmm1, $xmm2
-  $xmm0 = VFNMSUB231SSZr                       $xmm0, $xmm1, $xmm2                                     
-  ; CHECK: $xmm0 = VFNMSUB231SSr_Int           $xmm0, $xmm1, $xmm2                                               
-  $xmm0 = VFNMSUB231SSZr_Int                   $xmm0, $xmm1, $xmm2                                               
+  ; CHECK: $xmm0 = VADDSDrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VADDSDZrm                            $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VADDSDrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VADDSDZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VADDSDrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VADDSDZrr                            $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VADDSDrr_Int                $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VADDSDZrr_Int                        $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VADDSSrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VADDSSZrm                            $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VADDSSrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VADDSSZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VADDSSrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VADDSSZrr                            $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VADDSSrr_Int                $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VADDSSZrr_Int                        $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VDIVSDrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VDIVSDZrm                            $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VDIVSDrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VDIVSDZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VDIVSDrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VDIVSDZrr                            $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VDIVSDrr_Int                $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VDIVSDZrr_Int                        $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VDIVSSrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VDIVSSZrm                            $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VDIVSSrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VDIVSSZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VDIVSSrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VDIVSSZrr                            $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VDIVSSrr_Int                $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VDIVSSZrr_Int                        $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXCSDrm                   $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMAXCSDZrm                           $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXCSDrr                   $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMAXCSDZrr                           $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXCSSrm                   $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMAXCSSZrm                           $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXCSSrr                   $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMAXCSSZrr                           $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXSDrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMAXSDZrm                            $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXSDrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMAXSDZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXSDrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMAXSDZrr                            $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXSDrr_Int                $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMAXSDZrr_Int                        $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXSSrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMAXSSZrm                            $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXSSrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMAXSSZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXSSrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMAXSSZrr                            $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMAXSSrr_Int                $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMAXSSZrr_Int                        $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINCSDrm                   $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMINCSDZrm                           $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINCSDrr                   $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMINCSDZrr                           $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINCSSrm                   $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMINCSSZrm                           $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINCSSrr                   $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMINCSSZrr                           $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINSDrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMINSDZrm                            $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINSDrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMINSDZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINSDrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMINSDZrr                            $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINSDrr_Int                $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMINSDZrr_Int                        $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINSSrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMINSSZrm                            $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINSSrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMINSSZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINSSrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMINSSZrr                            $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMINSSrr_Int                $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMINSSZrr_Int                        $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMULSDrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMULSDZrm                            $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMULSDrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMULSDZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMULSDrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMULSDZrr                            $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMULSDrr_Int                $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMULSDZrr_Int                        $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMULSSrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMULSSZrm                            $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMULSSrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VMULSSZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VMULSSrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMULSSZrr                            $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VMULSSrr_Int                $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VMULSSZrr_Int                        $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VSUBSDrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VSUBSDZrm                            $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VSUBSDrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VSUBSDZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VSUBSDrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VSUBSDZrr                            $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VSUBSDrr_Int                $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VSUBSDZrr_Int                        $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VSUBSSrm                    $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VSUBSSZrm                            $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VSUBSSrm_Int                $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm0 = VSUBSSZrm_Int                        $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VSUBSSrr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VSUBSSZrr                            $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VSUBSSrr_Int                $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VSUBSSZrr_Int                        $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD132SDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD132SDZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD132SDm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD132SDZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD132SDr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD132SDZr                        $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD132SDr_Int            $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD132SDZr_Int                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD132SSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD132SSZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD132SSm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD132SSZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD132SSr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD132SSZr                        $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD132SSr_Int            $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD132SSZr_Int                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD213SDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD213SDZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD213SDm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD213SDZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD213SDr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD213SDZr                        $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD213SDr_Int            $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD213SDZr_Int                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD213SSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD213SSZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD213SSm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD213SSZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD213SSr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD213SSZr                        $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD213SSr_Int            $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD213SSZr_Int                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD231SDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD231SDZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD231SDm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD231SDZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD231SDr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD231SDZr                        $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD231SDr_Int            $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD231SDZr_Int                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD231SSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD231SSZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD231SSm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMADD231SSZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD231SSr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD231SSZr                        $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMADD231SSr_Int            $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMADD231SSZr_Int                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB132SDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB132SDZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB132SDm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB132SDZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB132SDr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB132SDZr                        $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB132SDr_Int            $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB132SDZr_Int                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB132SSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB132SSZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB132SSm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB132SSZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB132SSr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB132SSZr                        $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB132SSr_Int            $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB132SSZr_Int                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB213SDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB213SDZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB213SDm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB213SDZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB213SDr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB213SDZr                        $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB213SDr_Int            $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB213SDZr_Int                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB213SSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB213SSZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB213SSm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB213SSZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB213SSr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB213SSZr                        $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB213SSr_Int            $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB213SSZr_Int                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB231SDm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB231SDZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB231SDm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB231SDZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB231SDr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB231SDZr                        $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB231SDr_Int            $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB231SDZr_Int                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB231SSm                $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB231SSZm                        $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB231SSm_Int            $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFMSUB231SSZm_Int                    $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB231SSr                $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB231SSZr                        $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFMSUB231SSr_Int            $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFMSUB231SSZr_Int                    $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD132SDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD132SDZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD132SDm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD132SDZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD132SDr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD132SDZr                       $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD132SDr_Int           $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD132SDZr_Int                   $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD132SSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD132SSZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD132SSm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD132SSZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD132SSr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD132SSZr                       $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD132SSr_Int           $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD132SSZr_Int                   $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD213SDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD213SDZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD213SDm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD213SDZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD213SDr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD213SDZr                       $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD213SDr_Int           $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD213SDZr_Int                   $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD213SSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD213SSZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD213SSm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD213SSZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD213SSr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD213SSZr                       $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD213SSr_Int           $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD213SSZr_Int                   $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD231SDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD231SDZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD231SDm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD231SDZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD231SDr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD231SDZr                       $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD231SDr_Int           $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD231SDZr_Int                   $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD231SSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD231SSZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD231SSm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMADD231SSZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD231SSr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD231SSZr                       $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMADD231SSr_Int           $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMADD231SSZr_Int                   $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB132SDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB132SDZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB132SDm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB132SDZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB132SDr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB132SDZr                       $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB132SDr_Int           $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB132SDZr_Int                   $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB132SSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB132SSZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB132SSm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB132SSZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB132SSr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB132SSZr                       $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB132SSr_Int           $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB132SSZr_Int                   $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB213SDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB213SDZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB213SDm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB213SDZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB213SDr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB213SDZr                       $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB213SDr_Int           $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB213SDZr_Int                   $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB213SSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB213SSZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB213SSm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB213SSZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB213SSr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB213SSZr                       $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB213SSr_Int           $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB213SSZr_Int                   $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB231SDm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB231SDZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB231SDm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB231SDZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB231SDr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB231SDZr                       $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB231SDr_Int           $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB231SDZr_Int                   $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB231SSm               $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB231SSZm                       $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB231SSm_Int           $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VFNMSUB231SSZm_Int                   $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB231SSr               $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB231SSZr                       $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm0 = VFNMSUB231SSr_Int           $xmm0, $xmm1, $xmm2, implicit $mxcsr
+  $xmm0 = VFNMSUB231SSZr_Int                   $xmm0, $xmm1, $xmm2, implicit $mxcsr
   ; CHECK: VPEXTRBmr                           $rdi, 1, $noreg, 0, $noreg, $xmm0, 3       
   VPEXTRBZmr                                   $rdi, 1, $noreg, 0, $noreg, $xmm0, 3                              
   ; CHECK: $eax = VPEXTRBrr                    $xmm0, 1    
@@ -2148,34 +2148,34 @@ body: |
   $xmm0 = VPINSRWZrm                           $xmm0, $rsi, 1, $noreg, 0, $noreg, 3                              
   ; CHECK: $xmm0 = VPINSRWrr                   $xmm0, $edi, 5                                               
   $xmm0 = VPINSRWZrr                           $xmm0, $edi, 5                                              
-  ; CHECK: $xmm0 = VSQRTSDm                    $xmm0, $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VSQRTSDZm                            $xmm0, $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VSQRTSDm_Int                $xmm0, $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VSQRTSDZm_Int                        $xmm0, $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VSQRTSDr                    $xmm0, $noreg 
-  $xmm0 = VSQRTSDZr                            $xmm0, $noreg                                                
-  ; CHECK: $xmm0 = VSQRTSDr_Int                $xmm0, $noreg
-  $xmm0 = VSQRTSDZr_Int                        $xmm0, $noreg                                                
-  ; CHECK: $xmm0 = VSQRTSSm                    $xmm0, $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VSQRTSSZm                            $xmm0, $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VSQRTSSm_Int                $xmm0, $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VSQRTSSZm_Int                        $xmm0, $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VSQRTSSr                    $xmm0, $xmm1
-  $xmm0 = VSQRTSSZr                            $xmm0, $xmm1
-  ; CHECK: $xmm0 = VSQRTSSr_Int                $xmm0, $xmm1
-  $xmm0 = VSQRTSSZr_Int                        $xmm0, $xmm1
-  ; CHECK: $rdi = VCVTSD2SI64rr_Int            $xmm0
-  $rdi = VCVTSD2SI64Zrr_Int                    $xmm0                                                   
-  ; CHECK: $edi = VCVTSD2SIrr_Int              $xmm0
-  $edi = VCVTSD2SIZrr_Int                      $xmm0                                                   
-  ; CHECK: $xmm0 = VCVTSD2SSrm                 $xmm0, $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTSD2SSZrm                         $xmm0, $rdi, 1, $noreg, 0, $noreg                                 
-  ; CHECK: $xmm0 = VCVTSD2SSrm_Int             $xmm0, $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTSD2SSZrm_Int                     $xmm0, $rdi, 1, $noreg, 0, $noreg                                 
-  ; CHECK: $xmm0 = VCVTSD2SSrr                 $xmm0, $xmm1
-  $xmm0 = VCVTSD2SSZrr                         $xmm0, $xmm1
-  ; CHECK: $xmm0 = VCVTSD2SSrr_Int             $xmm0, $xmm1
-  $xmm0 = VCVTSD2SSZrr_Int                     $xmm0, $xmm1
+  ; CHECK: $xmm0 = VSQRTSDm                    $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VSQRTSDZm                            $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VSQRTSDm_Int                $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VSQRTSDZm_Int                        $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VSQRTSDr                    $xmm0, $noreg, implicit $mxcsr
+  $xmm0 = VSQRTSDZr                            $xmm0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VSQRTSDr_Int                $xmm0, $noreg, implicit $mxcsr
+  $xmm0 = VSQRTSDZr_Int                        $xmm0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VSQRTSSm                    $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VSQRTSSZm                            $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VSQRTSSm_Int                $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VSQRTSSZm_Int                        $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VSQRTSSr                    $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VSQRTSSZr                            $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VSQRTSSr_Int                $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VSQRTSSZr_Int                        $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $rdi = VCVTSD2SI64rr_Int            $xmm0, implicit $mxcsr
+  $rdi = VCVTSD2SI64Zrr_Int                    $xmm0, implicit $mxcsr
+  ; CHECK: $edi = VCVTSD2SIrr_Int              $xmm0, implicit $mxcsr
+  $edi = VCVTSD2SIZrr_Int                      $xmm0, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSD2SSrm                 $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTSD2SSZrm                         $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSD2SSrm_Int             $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTSD2SSZrm_Int                     $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSD2SSrr                 $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VCVTSD2SSZrr                         $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSD2SSrr_Int             $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VCVTSD2SSZrr_Int                     $xmm0, $xmm1, implicit $mxcsr
   ; CHECK: $xmm0 = VCVTSI2SDrm                 $xmm0, $rdi, 1, $noreg, 0, $noreg
   $xmm0 = VCVTSI2SDZrm                         $xmm0, $rdi, 1, $noreg, 0, $noreg                                 
   ; CHECK: $xmm0 = VCVTSI2SDrm_Int             $xmm0, $rdi, 1, $noreg, 0, $noreg
@@ -2184,78 +2184,78 @@ body: |
   $xmm0 = VCVTSI2SDZrr                         $xmm0, $edi
   ; CHECK: $xmm0 = VCVTSI2SDrr_Int             $xmm0, $edi
   $xmm0 = VCVTSI2SDZrr_Int                     $xmm0, $edi
-  ; CHECK: $xmm0 = VCVTSI2SSrm                 $xmm0, $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTSI2SSZrm                         $xmm0, $rdi, 1, $noreg, 0, $noreg                                 
-  ; CHECK: $xmm0 = VCVTSI2SSrm_Int             $xmm0, $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTSI2SSZrm_Int                     $xmm0, $rdi, 1, $noreg, 0, $noreg                                 
-  ; CHECK: $xmm0 = VCVTSI2SSrr                 $xmm0, $edi
-  $xmm0 = VCVTSI2SSZrr                         $xmm0, $edi
-  ; CHECK: $xmm0 = VCVTSI2SSrr_Int             $xmm0, $edi
-  $xmm0 = VCVTSI2SSZrr_Int                     $xmm0, $edi
-  ; CHECK: $xmm0 = VCVTSI642SDrm               $xmm0, $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTSI642SDZrm                       $xmm0, $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VCVTSI642SDrm_Int           $xmm0, $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTSI642SDZrm_Int                   $xmm0, $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VCVTSI642SDrr               $xmm0, $rdi
-  $xmm0 = VCVTSI642SDZrr                       $xmm0, $rdi
-  ; CHECK: $xmm0 = VCVTSI642SDrr_Int           $xmm0, $rdi
-  $xmm0 = VCVTSI642SDZrr_Int                   $xmm0, $rdi
-  ; CHECK: $xmm0 = VCVTSI642SSrm               $xmm0, $rdi, 1, $noreg, 0, $noreg 
-  $xmm0 = VCVTSI642SSZrm                       $xmm0, $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VCVTSI642SSrm_Int           $xmm0, $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTSI642SSZrm_Int                   $xmm0, $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm0 = VCVTSI642SSrr               $xmm0, $rdi
-  $xmm0 = VCVTSI642SSZrr                       $xmm0, $rdi
-  ; CHECK: $xmm0 = VCVTSI642SSrr_Int           $xmm0, $rdi
-  $xmm0 = VCVTSI642SSZrr_Int                   $xmm0, $rdi
-  ; CHECK: $xmm0 = VCVTSS2SDrm                 $xmm0, $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTSS2SDZrm                         $xmm0, $rdi, 1, $noreg, 0, $noreg                                 
-  ; CHECK: $xmm0 = VCVTSS2SDrm_Int             $xmm0, $rdi, 1, $noreg, 0, $noreg
-  $xmm0 = VCVTSS2SDZrm_Int                     $xmm0, $rdi, 1, $noreg, 0, $noreg                                 
-  ; CHECK: $xmm0 = VCVTSS2SDrr                 $xmm0, $xmm1
-  $xmm0 = VCVTSS2SDZrr                         $xmm0, $xmm1
-  ; CHECK: $xmm0 = VCVTSS2SDrr_Int             $xmm0, $xmm1
-  $xmm0 = VCVTSS2SDZrr_Int                     $xmm0, $xmm1
-  ; CHECK: $rdi = VCVTSS2SI64rm_Int            $rdi, 1, $noreg, 0, $noreg
-  $rdi = VCVTSS2SI64Zrm_Int                    $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $rdi = VCVTSS2SI64rr_Int            $xmm0
-  $rdi = VCVTSS2SI64Zrr_Int                    $xmm0                                                   
-  ; CHECK: $edi = VCVTSS2SIrm_Int              $rdi, 1, $noreg, 0, $noreg
-  $edi = VCVTSS2SIZrm_Int                      $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $edi = VCVTSS2SIrr_Int              $xmm0
-  $edi = VCVTSS2SIZrr_Int                      $xmm0                                                   
-  ; CHECK: $rdi = VCVTTSD2SI64rm               $rdi, 1, $noreg, 0, $noreg
-  $rdi = VCVTTSD2SI64Zrm                       $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $rdi = VCVTTSD2SI64rm_Int           $rdi, 1, $noreg, 0, $noreg
-  $rdi = VCVTTSD2SI64Zrm_Int                   $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $rdi = VCVTTSD2SI64rr               $xmm0
-  $rdi = VCVTTSD2SI64Zrr                       $xmm0                                                   
-  ; CHECK: $rdi = VCVTTSD2SI64rr_Int           $xmm0
-  $rdi = VCVTTSD2SI64Zrr_Int                   $xmm0                                                   
-  ; CHECK: $edi = VCVTTSD2SIrm                 $rdi, 1, $noreg, 0, $noreg
-  $edi = VCVTTSD2SIZrm                         $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $edi = VCVTTSD2SIrm_Int             $rdi, 1, $noreg, 0, $noreg
-  $edi = VCVTTSD2SIZrm_Int                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $edi = VCVTTSD2SIrr                 $xmm0
-  $edi = VCVTTSD2SIZrr                         $xmm0                                                   
-  ; CHECK: $edi = VCVTTSD2SIrr_Int             $xmm0
-  $edi = VCVTTSD2SIZrr_Int                     $xmm0                                                   
-  ; CHECK: $rdi = VCVTTSS2SI64rm               $rdi, 1, $noreg, 0, $noreg
-  $rdi = VCVTTSS2SI64Zrm                       $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $rdi = VCVTTSS2SI64rm_Int           $rdi, 1, $noreg, 0, $noreg
-  $rdi = VCVTTSS2SI64Zrm_Int                   $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $rdi = VCVTTSS2SI64rr               $xmm0
-  $rdi = VCVTTSS2SI64Zrr                       $xmm0                                                   
-  ; CHECK: $rdi = VCVTTSS2SI64rr_Int           $xmm0
-  $rdi = VCVTTSS2SI64Zrr_Int                   $xmm0                                                   
-  ; CHECK: $edi = VCVTTSS2SIrm                 $rdi, 1, $noreg, 0, $noreg
-  $edi = VCVTTSS2SIZrm                         $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $edi = VCVTTSS2SIrm_Int             $rdi, 1, $noreg, 0, $noreg
-  $edi = VCVTTSS2SIZrm_Int                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $edi = VCVTTSS2SIrr                 $xmm0
-  $edi = VCVTTSS2SIZrr                         $xmm0                                                   
-  ; CHECK: $edi = VCVTTSS2SIrr_Int             $xmm0                                               
-  $edi = VCVTTSS2SIZrr_Int                     $xmm0                                                   
+  ; CHECK: $xmm0 = VCVTSI2SSrm                 $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTSI2SSZrm                         $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSI2SSrm_Int             $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTSI2SSZrm_Int                     $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSI2SSrr                 $xmm0, $edi, implicit $mxcsr
+  $xmm0 = VCVTSI2SSZrr                         $xmm0, $edi, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSI2SSrr_Int             $xmm0, $edi, implicit $mxcsr
+  $xmm0 = VCVTSI2SSZrr_Int                     $xmm0, $edi, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSI642SDrm               $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTSI642SDZrm                       $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSI642SDrm_Int           $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTSI642SDZrm_Int                   $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSI642SDrr               $xmm0, $rdi, implicit $mxcsr
+  $xmm0 = VCVTSI642SDZrr                       $xmm0, $rdi, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSI642SDrr_Int           $xmm0, $rdi, implicit $mxcsr
+  $xmm0 = VCVTSI642SDZrr_Int                   $xmm0, $rdi, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSI642SSrm               $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTSI642SSZrm                       $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSI642SSrm_Int           $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTSI642SSZrm_Int                   $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSI642SSrr               $xmm0, $rdi, implicit $mxcsr
+  $xmm0 = VCVTSI642SSZrr                       $xmm0, $rdi, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSI642SSrr_Int           $xmm0, $rdi, implicit $mxcsr
+  $xmm0 = VCVTSI642SSZrr_Int                   $xmm0, $rdi, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSS2SDrm                 $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTSS2SDZrm                         $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSS2SDrm_Int             $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm0 = VCVTSS2SDZrm_Int                     $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSS2SDrr                 $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VCVTSS2SDZrr                         $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTSS2SDrr_Int             $xmm0, $xmm1, implicit $mxcsr
+  $xmm0 = VCVTSS2SDZrr_Int                     $xmm0, $xmm1, implicit $mxcsr
+  ; CHECK: $rdi = VCVTSS2SI64rm_Int            $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $rdi = VCVTSS2SI64Zrm_Int                    $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $rdi = VCVTSS2SI64rr_Int            $xmm0, implicit $mxcsr
+  $rdi = VCVTSS2SI64Zrr_Int                    $xmm0, implicit $mxcsr
+  ; CHECK: $edi = VCVTSS2SIrm_Int              $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $edi = VCVTSS2SIZrm_Int                      $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $edi = VCVTSS2SIrr_Int              $xmm0, implicit $mxcsr
+  $edi = VCVTSS2SIZrr_Int                      $xmm0, implicit $mxcsr
+  ; CHECK: $rdi = VCVTTSD2SI64rm               $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $rdi = VCVTTSD2SI64Zrm                       $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $rdi = VCVTTSD2SI64rm_Int           $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $rdi = VCVTTSD2SI64Zrm_Int                   $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $rdi = VCVTTSD2SI64rr               $xmm0, implicit $mxcsr
+  $rdi = VCVTTSD2SI64Zrr                       $xmm0, implicit $mxcsr
+  ; CHECK: $rdi = VCVTTSD2SI64rr_Int           $xmm0, implicit $mxcsr
+  $rdi = VCVTTSD2SI64Zrr_Int                   $xmm0, implicit $mxcsr
+  ; CHECK: $edi = VCVTTSD2SIrm                 $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $edi = VCVTTSD2SIZrm                         $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $edi = VCVTTSD2SIrm_Int             $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $edi = VCVTTSD2SIZrm_Int                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $edi = VCVTTSD2SIrr                 $xmm0, implicit $mxcsr
+  $edi = VCVTTSD2SIZrr                         $xmm0, implicit $mxcsr
+  ; CHECK: $edi = VCVTTSD2SIrr_Int             $xmm0, implicit $mxcsr
+  $edi = VCVTTSD2SIZrr_Int                     $xmm0, implicit $mxcsr
+  ; CHECK: $rdi = VCVTTSS2SI64rm               $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $rdi = VCVTTSS2SI64Zrm                       $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $rdi = VCVTTSS2SI64rm_Int           $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $rdi = VCVTTSS2SI64Zrm_Int                   $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $rdi = VCVTTSS2SI64rr               $xmm0, implicit $mxcsr
+  $rdi = VCVTTSS2SI64Zrr                       $xmm0, implicit $mxcsr
+  ; CHECK: $rdi = VCVTTSS2SI64rr_Int           $xmm0, implicit $mxcsr
+  $rdi = VCVTTSS2SI64Zrr_Int                   $xmm0, implicit $mxcsr
+  ; CHECK: $edi = VCVTTSS2SIrm                 $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $edi = VCVTTSS2SIZrm                         $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $edi = VCVTTSS2SIrm_Int             $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $edi = VCVTTSS2SIZrm_Int                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $edi = VCVTTSS2SIrr                 $xmm0, implicit $mxcsr
+  $edi = VCVTTSS2SIZrr                         $xmm0, implicit $mxcsr
+  ; CHECK: $edi = VCVTTSS2SIrr_Int             $xmm0, implicit $mxcsr
+  $edi = VCVTTSS2SIZrr_Int                     $xmm0, implicit $mxcsr
   ; CHECK: $xmm0 = VMOV64toSDrr                $rdi    
   $xmm0 = VMOV64toSDZrr                        $rdi                                                    
   ; CHECK: $xmm0 = VMOVDI2SSrr                 $eax
@@ -2354,22 +2354,22 @@ body: |
   $xmm0 = VINSERTPSZrm                         $xmm0, $rdi, 1, $noreg, 0, $noreg, 1
   ; CHECK: $xmm0 = VINSERTPSrr                 $xmm0, $xmm0, 1
   $xmm0 = VINSERTPSZrr                         $xmm0, $xmm0, 1
-  ; CHECK: $xmm0 = VROUNDSDm                   $xmm0, $rip, 1, $rax, 0, $noreg, 15
-  $xmm0 = VRNDSCALESDZm                        $xmm0, $rip, 1, $rax, 0, $noreg, 15
-  ; CHECK: $xmm0 = VROUNDSDr                   $xmm0, $xmm1, 15
-  $xmm0 = VRNDSCALESDZr                        $xmm0, $xmm1, 15
-  ; CHECK: $xmm0 = VROUNDSSm                   $xmm0, $rip, 1, $rax, 0, $noreg, 15
-  $xmm0 = VRNDSCALESSZm                        $xmm0, $rip, 1, $rax, 0, $noreg, 15
-  ; CHECK: $xmm0 = VROUNDSSr                   $xmm0, $xmm1, 15
-  $xmm0 = VRNDSCALESSZr                        $xmm0, $xmm1, 15
-  ; CHECK: $xmm0 = VROUNDSDm_Int               $xmm0, $rip, 1, $rax, 0, $noreg, 15
-  $xmm0 = VRNDSCALESDZm_Int                    $xmm0, $rip, 1, $rax, 0, $noreg, 15
-  ; CHECK: $xmm0 = VROUNDSDr_Int               $xmm0, $xmm1, 15
-  $xmm0 = VRNDSCALESDZr_Int                    $xmm0, $xmm1, 15
-  ; CHECK: $xmm0 = VROUNDSSm_Int               $xmm0, $rip, 1, $rax, 0, $noreg, 15
-  $xmm0 = VRNDSCALESSZm_Int                    $xmm0, $rip, 1, $rax, 0, $noreg, 15
-  ; CHECK: $xmm0 = VROUNDSSr_Int               $xmm0, $xmm1, 15
-  $xmm0 = VRNDSCALESSZr_Int                    $xmm0, $xmm1, 15
+  ; CHECK: $xmm0 = VROUNDSDm                   $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  $xmm0 = VRNDSCALESDZm                        $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  ; CHECK: $xmm0 = VROUNDSDr                   $xmm0, $xmm1, 15, implicit $mxcsr
+  $xmm0 = VRNDSCALESDZr                        $xmm0, $xmm1, 15, implicit $mxcsr
+  ; CHECK: $xmm0 = VROUNDSSm                   $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  $xmm0 = VRNDSCALESSZm                        $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  ; CHECK: $xmm0 = VROUNDSSr                   $xmm0, $xmm1, 15, implicit $mxcsr
+  $xmm0 = VRNDSCALESSZr                        $xmm0, $xmm1, 15, implicit $mxcsr
+  ; CHECK: $xmm0 = VROUNDSDm_Int               $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  $xmm0 = VRNDSCALESDZm_Int                    $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  ; CHECK: $xmm0 = VROUNDSDr_Int               $xmm0, $xmm1, 15, implicit $mxcsr
+  $xmm0 = VRNDSCALESDZr_Int                    $xmm0, $xmm1, 15, implicit $mxcsr
+  ; CHECK: $xmm0 = VROUNDSSm_Int               $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  $xmm0 = VRNDSCALESSZm_Int                    $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  ; CHECK: $xmm0 = VROUNDSSr_Int               $xmm0, $xmm1, 15, implicit $mxcsr
+  $xmm0 = VRNDSCALESSZr_Int                    $xmm0, $xmm1, 15, implicit $mxcsr
 
   RET 0, $zmm0, $zmm1                          
 ...
@@ -2530,14 +2530,14 @@ body: |
   $ymm16 = VPADDWZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm16 = VPADDWZ256rr               $ymm16, $ymm1
   $ymm16 = VPADDWZ256rr                        $ymm16, $ymm1                                 
-  ; CHECK: $ymm16 = VMULPDZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg
-  $ymm16 = VMULPDZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm16 = VMULPDZ256rr               $ymm16, $ymm1
-  $ymm16 = VMULPDZ256rr                        $ymm16, $ymm1                                 
-  ; CHECK: $ymm16 = VMULPSZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg
-  $ymm16 = VMULPSZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm16 = VMULPSZ256rr               $ymm16, $ymm1
-  $ymm16 = VMULPSZ256rr                        $ymm16, $ymm1                                 
+  ; CHECK: $ymm16 = VMULPDZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm16 = VMULPDZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VMULPDZ256rr               $ymm16, $ymm1, implicit $mxcsr
+  $ymm16 = VMULPDZ256rr                        $ymm16, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm16 = VMULPSZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm16 = VMULPSZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VMULPSZ256rr               $ymm16, $ymm1, implicit $mxcsr
+  $ymm16 = VMULPSZ256rr                        $ymm16, $ymm1, implicit $mxcsr
   ; CHECK: $ymm16 = VORPDZ256rm                $ymm16, $rip, 1, $rax, 0, $noreg
   $ymm16 = VORPDZ256rm                         $ymm16, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm16 = VORPDZ256rr                $ymm16, $ymm1
@@ -2678,14 +2678,14 @@ body: |
   $ymm16 = VPXORQZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm16 = VPXORQZ256rr               $ymm16, $ymm1  
   $ymm16 = VPXORQZ256rr                        $ymm16, $ymm1                                 
-  ; CHECK: $ymm16 = VADDPDZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg
-  $ymm16 = VADDPDZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm16 = VADDPDZ256rr               $ymm16, $ymm1
-  $ymm16 = VADDPDZ256rr                        $ymm16, $ymm1                                 
-  ; CHECK: $ymm16 = VADDPSZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg
-  $ymm16 = VADDPSZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm16 = VADDPSZ256rr               $ymm16, $ymm1
-  $ymm16 = VADDPSZ256rr                        $ymm16, $ymm1                                 
+  ; CHECK: $ymm16 = VADDPDZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm16 = VADDPDZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VADDPDZ256rr               $ymm16, $ymm1, implicit $mxcsr
+  $ymm16 = VADDPDZ256rr                        $ymm16, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm16 = VADDPSZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm16 = VADDPSZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VADDPSZ256rr               $ymm16, $ymm1, implicit $mxcsr
+  $ymm16 = VADDPSZ256rr                        $ymm16, $ymm1, implicit $mxcsr
   ; CHECK: $ymm16 = VANDNPDZ256rm              $ymm16, $rip, 1, $rax, 0, $noreg
   $ymm16 = VANDNPDZ256rm                       $ymm16, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm16 = VANDNPDZ256rr              $ymm16, $ymm1
@@ -2702,46 +2702,46 @@ body: |
   $ymm16 = VANDPSZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm16 = VANDPSZ256rr               $ymm16, $ymm1
   $ymm16 = VANDPSZ256rr                        $ymm16, $ymm1                                 
-  ; CHECK: $ymm16 = VDIVPDZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg
-  $ymm16 = VDIVPDZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm16 = VDIVPDZ256rr               $ymm16, $ymm1  
-  $ymm16 = VDIVPDZ256rr                        $ymm16, $ymm1                                 
-  ; CHECK: $ymm16 = VDIVPSZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg
-  $ymm16 = VDIVPSZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm16 = VDIVPSZ256rr               $ymm16, $ymm1
-  $ymm16 = VDIVPSZ256rr                        $ymm16, $ymm1                                 
-  ; CHECK: $ymm16 = VMAXCPDZ256rm              $ymm16, $rip, 1, $rax, 0, $noreg
-  $ymm16 = VMAXCPDZ256rm                       $ymm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm16 = VMAXCPDZ256rr              $ymm16, $ymm1
-  $ymm16 = VMAXCPDZ256rr                       $ymm16, $ymm1                                 
-  ; CHECK: $ymm16 = VMAXCPSZ256rm              $ymm16, $rip, 1, $rax, 0, $noreg
-  $ymm16 = VMAXCPSZ256rm                       $ymm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm16 = VMAXCPSZ256rr              $ymm16, $ymm1
-  $ymm16 = VMAXCPSZ256rr                       $ymm16, $ymm1                                 
-  ; CHECK: $ymm16 = VMAXPDZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg
-  $ymm16 = VMAXPDZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm16 = VMAXPDZ256rr               $ymm16, $ymm1
-  $ymm16 = VMAXPDZ256rr                        $ymm16, $ymm1                                 
-  ; CHECK: $ymm16 = VMAXPSZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg
-  $ymm16 = VMAXPSZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm16 = VMAXPSZ256rr               $ymm16, $ymm1
-  $ymm16 = VMAXPSZ256rr                        $ymm16, $ymm1                                 
-  ; CHECK: $ymm16 = VMINCPDZ256rm              $ymm16, $rip, 1, $rax, 0, $noreg
-  $ymm16 = VMINCPDZ256rm                       $ymm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm16 = VMINCPDZ256rr              $ymm16, $ymm1
-  $ymm16 = VMINCPDZ256rr                       $ymm16, $ymm1                                 
-  ; CHECK: $ymm16 = VMINCPSZ256rm              $ymm16, $rip, 1, $rax, 0, $noreg
-  $ymm16 = VMINCPSZ256rm                       $ymm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm16 = VMINCPSZ256rr              $ymm16, $ymm1
-  $ymm16 = VMINCPSZ256rr                       $ymm16, $ymm1                                 
-  ; CHECK: $ymm16 = VMINPDZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg
-  $ymm16 = VMINPDZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm16 = VMINPDZ256rr               $ymm16, $ymm1
-  $ymm16 = VMINPDZ256rr                        $ymm16, $ymm1                                 
-  ; CHECK: $ymm16 = VMINPSZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg
-  $ymm16 = VMINPSZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm16 = VMINPSZ256rr               $ymm16, $ymm1
-  $ymm16 = VMINPSZ256rr                        $ymm16, $ymm1                                 
+  ; CHECK: $ymm16 = VDIVPDZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm16 = VDIVPDZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VDIVPDZ256rr               $ymm16, $ymm1, implicit $mxcsr
+  $ymm16 = VDIVPDZ256rr                        $ymm16, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm16 = VDIVPSZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm16 = VDIVPSZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VDIVPSZ256rr               $ymm16, $ymm1, implicit $mxcsr
+  $ymm16 = VDIVPSZ256rr                        $ymm16, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm16 = VMAXCPDZ256rm              $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm16 = VMAXCPDZ256rm                       $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VMAXCPDZ256rr              $ymm16, $ymm1, implicit $mxcsr
+  $ymm16 = VMAXCPDZ256rr                       $ymm16, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm16 = VMAXCPSZ256rm              $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm16 = VMAXCPSZ256rm                       $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VMAXCPSZ256rr              $ymm16, $ymm1, implicit $mxcsr
+  $ymm16 = VMAXCPSZ256rr                       $ymm16, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm16 = VMAXPDZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm16 = VMAXPDZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VMAXPDZ256rr               $ymm16, $ymm1, implicit $mxcsr
+  $ymm16 = VMAXPDZ256rr                        $ymm16, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm16 = VMAXPSZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm16 = VMAXPSZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VMAXPSZ256rr               $ymm16, $ymm1, implicit $mxcsr
+  $ymm16 = VMAXPSZ256rr                        $ymm16, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm16 = VMINCPDZ256rm              $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm16 = VMINCPDZ256rm                       $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VMINCPDZ256rr              $ymm16, $ymm1, implicit $mxcsr
+  $ymm16 = VMINCPDZ256rr                       $ymm16, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm16 = VMINCPSZ256rm              $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm16 = VMINCPSZ256rm                       $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VMINCPSZ256rr              $ymm16, $ymm1, implicit $mxcsr
+  $ymm16 = VMINCPSZ256rr                       $ymm16, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm16 = VMINPDZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm16 = VMINPDZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VMINPDZ256rr               $ymm16, $ymm1, implicit $mxcsr
+  $ymm16 = VMINPDZ256rr                        $ymm16, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm16 = VMINPSZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm16 = VMINPSZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VMINPSZ256rr               $ymm16, $ymm1, implicit $mxcsr
+  $ymm16 = VMINPSZ256rr                        $ymm16, $ymm1, implicit $mxcsr
   ; CHECK: $ymm16 = VXORPDZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg
   $ymm16 = VXORPDZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm16 = VXORPDZ256rr               $ymm16, $ymm1
@@ -2782,14 +2782,14 @@ body: |
   $ymm16 = VUNPCKLPSZ256rm                     $ymm16, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm16 = VUNPCKLPSZ256rr            $ymm16, $ymm1
   $ymm16 = VUNPCKLPSZ256rr                     $ymm16, $ymm1                                 
-  ; CHECK: $ymm16 = VSUBPDZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg 
-  $ymm16 = VSUBPDZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm16 = VSUBPDZ256rr               $ymm16, $ymm1 
-  $ymm16 = VSUBPDZ256rr                        $ymm16, $ymm1                                 
-  ; CHECK: $ymm16 = VSUBPSZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg 
-  $ymm16 = VSUBPSZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $ymm16 = VSUBPSZ256rr               $ymm16, $ymm1   
-  $ymm16 = VSUBPSZ256rr                        $ymm16, $ymm1                                                
+  ; CHECK: $ymm16 = VSUBPDZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm16 = VSUBPDZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VSUBPDZ256rr               $ymm16, $ymm1, implicit $mxcsr
+  $ymm16 = VSUBPDZ256rr                        $ymm16, $ymm1, implicit $mxcsr
+  ; CHECK: $ymm16 = VSUBPSZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $ymm16 = VSUBPSZ256rm                        $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VSUBPSZ256rr               $ymm16, $ymm1, implicit $mxcsr
+  $ymm16 = VSUBPSZ256rr                        $ymm16, $ymm1, implicit $mxcsr
   ; CHECK: $ymm16 = VPUNPCKHBWZ256rm           $ymm16, $rip, 1, $rax, 0, $noreg
   $ymm16 = VPUNPCKHBWZ256rm                    $ymm16, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm16 = VPUNPCKHBWZ256rr           $ymm16, $ymm1
@@ -2822,150 +2822,150 @@ body: |
   $ymm16 = VPUNPCKLWDZ256rm                    $ymm16, $rip, 1, $rax, 0, $noreg
   ; CHECK: $ymm16 = VPUNPCKLWDZ256rr           $ymm16, $ymm1   
   $ymm16 = VPUNPCKLWDZ256rr                    $ymm16, $ymm1                                                
-  ; CHECK: $ymm16 = VFMADD132PDZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMADD132PDZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMADD132PDZ256r           $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMADD132PDZ256r                    $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMADD132PSZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMADD132PSZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMADD132PSZ256r           $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMADD132PSZ256r                    $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMADD213PDZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMADD213PDZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMADD213PDZ256r           $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMADD213PDZ256r                    $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMADD213PSZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMADD213PSZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMADD213PSZ256r           $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMADD213PSZ256r                    $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMADD231PDZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMADD231PDZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMADD231PDZ256r           $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMADD231PDZ256r                    $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMADD231PSZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMADD231PSZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMADD231PSZ256r           $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMADD231PSZ256r                    $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMADDSUB132PDZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMADDSUB132PDZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMADDSUB132PDZ256r        $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMADDSUB132PDZ256r                 $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMADDSUB132PSZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMADDSUB132PSZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMADDSUB132PSZ256r        $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMADDSUB132PSZ256r                 $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMADDSUB213PDZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMADDSUB213PDZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMADDSUB213PDZ256r        $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMADDSUB213PDZ256r                 $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMADDSUB213PSZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMADDSUB213PSZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMADDSUB213PSZ256r        $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMADDSUB213PSZ256r                 $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMADDSUB231PDZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMADDSUB231PDZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMADDSUB231PDZ256r        $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMADDSUB231PDZ256r                 $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMADDSUB231PSZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMADDSUB231PSZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMADDSUB231PSZ256r        $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMADDSUB231PSZ256r                 $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMSUB132PDZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMSUB132PDZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMSUB132PDZ256r           $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMSUB132PDZ256r                    $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMSUB132PSZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMSUB132PSZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMSUB132PSZ256r           $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMSUB132PSZ256r                    $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMSUB213PDZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMSUB213PDZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMSUB213PDZ256r           $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMSUB213PDZ256r                    $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMSUB213PSZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMSUB213PSZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMSUB213PSZ256r           $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMSUB213PSZ256r                    $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMSUB231PDZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMSUB231PDZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMSUB231PDZ256r           $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMSUB231PDZ256r                    $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMSUB231PSZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMSUB231PSZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMSUB231PSZ256r           $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMSUB231PSZ256r                    $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMSUBADD132PDZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMSUBADD132PDZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMSUBADD132PDZ256r        $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMSUBADD132PDZ256r                 $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMSUBADD132PSZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMSUBADD132PSZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMSUBADD132PSZ256r        $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMSUBADD132PSZ256r                 $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMSUBADD213PDZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMSUBADD213PDZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMSUBADD213PDZ256r        $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMSUBADD213PDZ256r                 $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMSUBADD213PSZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMSUBADD213PSZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMSUBADD213PSZ256r        $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMSUBADD213PSZ256r                 $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMSUBADD231PDZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMSUBADD231PDZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMSUBADD231PDZ256r        $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMSUBADD231PDZ256r                 $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFMSUBADD231PSZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFMSUBADD231PSZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFMSUBADD231PSZ256r        $ymm16, $ymm1, $ymm2
-  $ymm16 = VFMSUBADD231PSZ256r                 $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFNMADD132PDZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFNMADD132PDZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFNMADD132PDZ256r          $ymm16, $ymm1, $ymm2
-  $ymm16 = VFNMADD132PDZ256r                   $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFNMADD132PSZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFNMADD132PSZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFNMADD132PSZ256r          $ymm16, $ymm1, $ymm2
-  $ymm16 = VFNMADD132PSZ256r                   $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFNMADD213PDZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFNMADD213PDZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFNMADD213PDZ256r          $ymm16, $ymm1, $ymm2
-  $ymm16 = VFNMADD213PDZ256r                   $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFNMADD213PSZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFNMADD213PSZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFNMADD213PSZ256r          $ymm16, $ymm1, $ymm2
-  $ymm16 = VFNMADD213PSZ256r                   $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFNMADD231PDZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFNMADD231PDZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFNMADD231PDZ256r          $ymm16, $ymm1, $ymm2
-  $ymm16 = VFNMADD231PDZ256r                   $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFNMADD231PSZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFNMADD231PSZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFNMADD231PSZ256r          $ymm16, $ymm1, $ymm2
-  $ymm16 = VFNMADD231PSZ256r                   $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFNMSUB132PDZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFNMSUB132PDZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFNMSUB132PDZ256r          $ymm16, $ymm1, $ymm2
-  $ymm16 = VFNMSUB132PDZ256r                   $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFNMSUB132PSZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFNMSUB132PSZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFNMSUB132PSZ256r          $ymm16, $ymm1, $ymm2
-  $ymm16 = VFNMSUB132PSZ256r                   $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFNMSUB213PDZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFNMSUB213PDZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFNMSUB213PDZ256r          $ymm16, $ymm1, $ymm2
-  $ymm16 = VFNMSUB213PDZ256r                   $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFNMSUB213PSZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFNMSUB213PSZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFNMSUB213PSZ256r          $ymm16, $ymm1, $ymm2
-  $ymm16 = VFNMSUB213PSZ256r                   $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFNMSUB231PDZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFNMSUB231PDZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFNMSUB231PDZ256r          $ymm16, $ymm1, $ymm2
-  $ymm16 = VFNMSUB231PDZ256r                   $ymm16, $ymm1, $ymm2                          
-  ; CHECK: $ymm16 = VFNMSUB231PSZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg
-  $ymm16 = VFNMSUB231PSZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg              
-  ; CHECK: $ymm16 = VFNMSUB231PSZ256r          $ymm16, $ymm1, $ymm2  
-  $ymm16 = VFNMSUB231PSZ256r                   $ymm16, $ymm1, $ymm2                                              
+  ; CHECK: $ymm16 = VFMADD132PDZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMADD132PDZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADD132PDZ256r           $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMADD132PDZ256r                    $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADD132PSZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMADD132PSZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADD132PSZ256r           $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMADD132PSZ256r                    $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADD213PDZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMADD213PDZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADD213PDZ256r           $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMADD213PDZ256r                    $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADD213PSZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMADD213PSZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADD213PSZ256r           $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMADD213PSZ256r                    $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADD231PDZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMADD231PDZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADD231PDZ256r           $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMADD231PDZ256r                    $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADD231PSZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMADD231PSZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADD231PSZ256r           $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMADD231PSZ256r                    $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADDSUB132PDZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMADDSUB132PDZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADDSUB132PDZ256r        $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMADDSUB132PDZ256r                 $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADDSUB132PSZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMADDSUB132PSZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADDSUB132PSZ256r        $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMADDSUB132PSZ256r                 $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADDSUB213PDZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMADDSUB213PDZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADDSUB213PDZ256r        $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMADDSUB213PDZ256r                 $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADDSUB213PSZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMADDSUB213PSZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADDSUB213PSZ256r        $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMADDSUB213PSZ256r                 $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADDSUB231PDZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMADDSUB231PDZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADDSUB231PDZ256r        $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMADDSUB231PDZ256r                 $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADDSUB231PSZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMADDSUB231PSZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMADDSUB231PSZ256r        $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMADDSUB231PSZ256r                 $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUB132PDZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMSUB132PDZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUB132PDZ256r           $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMSUB132PDZ256r                    $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUB132PSZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMSUB132PSZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUB132PSZ256r           $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMSUB132PSZ256r                    $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUB213PDZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMSUB213PDZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUB213PDZ256r           $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMSUB213PDZ256r                    $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUB213PSZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMSUB213PSZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUB213PSZ256r           $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMSUB213PSZ256r                    $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUB231PDZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMSUB231PDZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUB231PDZ256r           $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMSUB231PDZ256r                    $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUB231PSZ256m           $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMSUB231PSZ256m                    $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUB231PSZ256r           $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMSUB231PSZ256r                    $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUBADD132PDZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMSUBADD132PDZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUBADD132PDZ256r        $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMSUBADD132PDZ256r                 $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUBADD132PSZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMSUBADD132PSZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUBADD132PSZ256r        $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMSUBADD132PSZ256r                 $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUBADD213PDZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMSUBADD213PDZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUBADD213PDZ256r        $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMSUBADD213PDZ256r                 $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUBADD213PSZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMSUBADD213PSZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUBADD213PSZ256r        $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMSUBADD213PSZ256r                 $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUBADD231PDZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMSUBADD231PDZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUBADD231PDZ256r        $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMSUBADD231PDZ256r                 $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUBADD231PSZ256m        $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFMSUBADD231PSZ256m                 $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFMSUBADD231PSZ256r        $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFMSUBADD231PSZ256r                 $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMADD132PDZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFNMADD132PDZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMADD132PDZ256r          $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFNMADD132PDZ256r                   $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMADD132PSZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFNMADD132PSZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMADD132PSZ256r          $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFNMADD132PSZ256r                   $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMADD213PDZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFNMADD213PDZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMADD213PDZ256r          $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFNMADD213PDZ256r                   $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMADD213PSZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFNMADD213PSZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMADD213PSZ256r          $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFNMADD213PSZ256r                   $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMADD231PDZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFNMADD231PDZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMADD231PDZ256r          $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFNMADD231PDZ256r                   $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMADD231PSZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFNMADD231PSZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMADD231PSZ256r          $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFNMADD231PSZ256r                   $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMSUB132PDZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFNMSUB132PDZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMSUB132PDZ256r          $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFNMSUB132PDZ256r                   $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMSUB132PSZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFNMSUB132PSZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMSUB132PSZ256r          $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFNMSUB132PSZ256r                   $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMSUB213PDZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFNMSUB213PDZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMSUB213PDZ256r          $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFNMSUB213PDZ256r                   $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMSUB213PSZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFNMSUB213PSZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMSUB213PSZ256r          $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFNMSUB213PSZ256r                   $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMSUB231PDZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFNMSUB231PDZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMSUB231PDZ256r          $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFNMSUB231PDZ256r                   $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMSUB231PSZ256m          $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VFNMSUB231PSZ256m                   $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VFNMSUB231PSZ256r          $ymm16, $ymm1, $ymm2, implicit $mxcsr
+  $ymm16 = VFNMSUB231PSZ256r                   $ymm16, $ymm1, $ymm2, implicit $mxcsr
   ; CHECK: $ymm16 = VPSRADZ256ri               $ymm16, 7
   $ymm16 = VPSRADZ256ri                        $ymm16, 7                                     
   ; CHECK: $ymm16 = VPSRADZ256rm               $ymm16, $rip, 1, $rax, 0, $noreg
@@ -3173,51 +3173,51 @@ body: |
   ; CHECK: $ymm16 = VCVTDQ2PDZ256rm            $rdi, 1, $noreg, 0, $noreg
   $ymm16 = VCVTDQ2PDZ256rm                     $rdi, 1, $noreg, 0, $noreg
   ; CHECK: $ymm16 = VCVTDQ2PDZ256rr            $xmm0
-  $ymm16 = VCVTDQ2PDZ256rr                     $xmm0                                         
-  ; CHECK: $ymm16 = VCVTDQ2PSZ256rm            $rdi, 1, $noreg, 0, $noreg
-  $ymm16 = VCVTDQ2PSZ256rm                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $ymm16 = VCVTDQ2PSZ256rr            $ymm16
-  $ymm16 = VCVTDQ2PSZ256rr                     $ymm16                                        
-  ; CHECK: $xmm16 = VCVTPD2DQZ256rm            $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTPD2DQZ256rm                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VCVTPD2DQZ256rr            $ymm16
-  $xmm16 = VCVTPD2DQZ256rr                     $ymm16                                        
-  ; CHECK: $xmm16 = VCVTPD2PSZ256rm            $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTPD2PSZ256rm                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VCVTPD2PSZ256rr            $ymm16
-  $xmm16 = VCVTPD2PSZ256rr                     $ymm16                                        
-  ; CHECK: $ymm16 = VCVTPS2DQZ256rm            $rdi, 1, $noreg, 0, $noreg
-  $ymm16 = VCVTPS2DQZ256rm                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $ymm16 = VCVTPS2DQZ256rr            $ymm16  
-  $ymm16 = VCVTPS2DQZ256rr                     $ymm16                                        
-  ; CHECK: $ymm16 = VCVTPS2PDZ256rm            $rdi, 1, $noreg, 0, $noreg
-  $ymm16 = VCVTPS2PDZ256rm                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $ymm16 = VCVTPS2PDZ256rr            $xmm0
-  $ymm16 = VCVTPS2PDZ256rr                     $xmm0                                               
-  ; CHECK: VCVTPS2PHZ256mr                     $rdi, 1, $noreg, 0, $noreg, $ymm16, 0
-  VCVTPS2PHZ256mr                              $rdi, 1, $noreg, 0, $noreg, $ymm16, 0
-  ; CHECK: $xmm0 = VCVTPS2PHZ256rr             $ymm16, 0
-  $xmm0 = VCVTPS2PHZ256rr                      $ymm16, 0
-  ; CHECK: $ymm16 = VCVTPH2PSZ256rm            $rdi, 1, $noreg, 0, $noreg
-  $ymm16 = VCVTPH2PSZ256rm                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $ymm16 = VCVTPH2PSZ256rr            $xmm16
-  $ymm16 = VCVTPH2PSZ256rr                     $xmm16
-  ; CHECK: $xmm16 = VCVTTPD2DQZ256rm           $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTTPD2DQZ256rm                    $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VCVTTPD2DQZ256rr           $ymm16
-  $xmm16 = VCVTTPD2DQZ256rr                    $ymm16                                        
-  ; CHECK: $ymm16 = VCVTTPS2DQZ256rm           $rdi, 1, $noreg, 0, $noreg
-  $ymm16 = VCVTTPS2DQZ256rm                    $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $ymm16 = VCVTTPS2DQZ256rr           $ymm16  
-  $ymm16 = VCVTTPS2DQZ256rr                    $ymm16                                               
-  ; CHECK: $ymm16 = VSQRTPDZ256m               $rdi, 1, $noreg, 0, $noreg
-  $ymm16 = VSQRTPDZ256m                        $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $ymm16 = VSQRTPDZ256r               $ymm16
-  $ymm16 = VSQRTPDZ256r                        $ymm16                                        
-  ; CHECK: $ymm16 = VSQRTPSZ256m               $rdi, 1, $noreg, 0, $noreg
-  $ymm16 = VSQRTPSZ256m                        $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $ymm16 = VSQRTPSZ256r               $ymm16    
-  $ymm16 = VSQRTPSZ256r                        $ymm16                                                 
+  $ymm16 = VCVTDQ2PDZ256rr                     $xmm0, implicit $mxcsr
+  ; CHECK: $ymm16 = VCVTDQ2PSZ256rm            $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VCVTDQ2PSZ256rm                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VCVTDQ2PSZ256rr            $ymm16, implicit $mxcsr
+  $ymm16 = VCVTDQ2PSZ256rr                     $ymm16, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTPD2DQZ256rm            $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTPD2DQZ256rm                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTPD2DQZ256rr            $ymm16, implicit $mxcsr
+  $xmm16 = VCVTPD2DQZ256rr                     $ymm16, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTPD2PSZ256rm            $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTPD2PSZ256rm                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTPD2PSZ256rr            $ymm16, implicit $mxcsr
+  $xmm16 = VCVTPD2PSZ256rr                     $ymm16, implicit $mxcsr
+  ; CHECK: $ymm16 = VCVTPS2DQZ256rm            $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VCVTPS2DQZ256rm                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VCVTPS2DQZ256rr            $ymm16, implicit $mxcsr
+  $ymm16 = VCVTPS2DQZ256rr                     $ymm16, implicit $mxcsr
+  ; CHECK: $ymm16 = VCVTPS2PDZ256rm            $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VCVTPS2PDZ256rm                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VCVTPS2PDZ256rr            $xmm0, implicit $mxcsr
+  $ymm16 = VCVTPS2PDZ256rr                     $xmm0, implicit $mxcsr
+  ; CHECK: VCVTPS2PHZ256mr                     $rdi, 1, $noreg, 0, $noreg, $ymm16, 0, implicit $mxcsr
+  VCVTPS2PHZ256mr                              $rdi, 1, $noreg, 0, $noreg, $ymm16, 0, implicit $mxcsr
+  ; CHECK: $xmm0 = VCVTPS2PHZ256rr             $ymm16, 0, implicit $mxcsr
+  $xmm0 = VCVTPS2PHZ256rr                      $ymm16, 0, implicit $mxcsr
+  ; CHECK: $ymm16 = VCVTPH2PSZ256rm            $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VCVTPH2PSZ256rm                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VCVTPH2PSZ256rr            $xmm16, implicit $mxcsr
+  $ymm16 = VCVTPH2PSZ256rr                     $xmm16, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTTPD2DQZ256rm           $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTTPD2DQZ256rm                    $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTTPD2DQZ256rr           $ymm16, implicit $mxcsr
+  $xmm16 = VCVTTPD2DQZ256rr                    $ymm16, implicit $mxcsr
+  ; CHECK: $ymm16 = VCVTTPS2DQZ256rm           $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VCVTTPS2DQZ256rm                    $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VCVTTPS2DQZ256rr           $ymm16, implicit $mxcsr
+  $ymm16 = VCVTTPS2DQZ256rr                    $ymm16, implicit $mxcsr
+  ; CHECK: $ymm16 = VSQRTPDZ256m               $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VSQRTPDZ256m                        $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VSQRTPDZ256r               $ymm16, implicit $mxcsr
+  $ymm16 = VSQRTPDZ256r                        $ymm16, implicit $mxcsr
+  ; CHECK: $ymm16 = VSQRTPSZ256m               $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $ymm16 = VSQRTPSZ256m                        $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $ymm16 = VSQRTPSZ256r               $ymm16, implicit $mxcsr
+  $ymm16 = VSQRTPSZ256r                        $ymm16, implicit $mxcsr
   ; CHECK: $ymm16 = VPALIGNRZ256rmi            $ymm16, $rdi, 1, $noreg, 0, $noreg, 1
   $ymm16 = VPALIGNRZ256rmi                     $ymm16, $rdi, 1, $noreg, 0, $noreg, 1
   ; CHECK: $ymm16 = VPALIGNRZ256rri            $ymm16, $ymm1, 1
@@ -3252,22 +3252,22 @@ body: |
   $ymm16 = VSHUFPSZ256rmi                      $ymm16, $rip, 1, $rax, 0, $noreg, -24
   ; CHECK: $ymm16 = VSHUFPSZ256rri             $ymm16, $ymm1, -24
   $ymm16 = VSHUFPSZ256rri                      $ymm16, $ymm1, -24
-  ; CHECK: $ymm16 = VRNDSCALEPDZ256rmi         $rip, 1, $rax, 0, $noreg, 15
-  $ymm16 = VRNDSCALEPDZ256rmi                  $rip, 1, $rax, 0, $noreg, 15
-  ; CHECK: $ymm16 = VRNDSCALEPDZ256rri         $ymm16, 15
-  $ymm16 = VRNDSCALEPDZ256rri                  $ymm16, 15
-  ; CHECK: $ymm16 = VRNDSCALEPSZ256rmi         $rip, 1, $rax, 0, $noreg, 15
-  $ymm16 = VRNDSCALEPSZ256rmi                  $rip, 1, $rax, 0, $noreg, 15
-  ; CHECK: $ymm16 = VRNDSCALEPSZ256rri         $ymm16, 15
-  $ymm16 = VRNDSCALEPSZ256rri                  $ymm16, 15
-  ; CHECK: $ymm0 = VRNDSCALEPDZ256rmi          $rip, 1, $rax, 0, $noreg, 31
-  $ymm0 = VRNDSCALEPDZ256rmi                   $rip, 1, $rax, 0, $noreg, 31
-  ; CHECK: $ymm0 = VRNDSCALEPDZ256rri          $ymm0, 31
-  $ymm0 = VRNDSCALEPDZ256rri                   $ymm0, 31
-  ; CHECK: $ymm0 = VRNDSCALEPSZ256rmi          $rip, 1, $rax, 0, $noreg, 31
-  $ymm0 = VRNDSCALEPSZ256rmi                   $rip, 1, $rax, 0, $noreg, 31
-  ; CHECK: $ymm0 = VRNDSCALEPSZ256rri          $ymm0, 31
-  $ymm0 = VRNDSCALEPSZ256rri                   $ymm0, 31
+  ; CHECK: $ymm16 = VRNDSCALEPDZ256rmi         $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  $ymm16 = VRNDSCALEPDZ256rmi                  $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  ; CHECK: $ymm16 = VRNDSCALEPDZ256rri         $ymm16, 15, implicit $mxcsr
+  $ymm16 = VRNDSCALEPDZ256rri                  $ymm16, 15, implicit $mxcsr
+  ; CHECK: $ymm16 = VRNDSCALEPSZ256rmi         $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  $ymm16 = VRNDSCALEPSZ256rmi                  $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  ; CHECK: $ymm16 = VRNDSCALEPSZ256rri         $ymm16, 15, implicit $mxcsr
+  $ymm16 = VRNDSCALEPSZ256rri                  $ymm16, 15, implicit $mxcsr
+  ; CHECK: $ymm0 = VRNDSCALEPDZ256rmi          $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr
+  $ymm0 = VRNDSCALEPDZ256rmi                   $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr
+  ; CHECK: $ymm0 = VRNDSCALEPDZ256rri          $ymm0, 31, implicit $mxcsr
+  $ymm0 = VRNDSCALEPDZ256rri                   $ymm0, 31, implicit $mxcsr
+  ; CHECK: $ymm0 = VRNDSCALEPSZ256rmi          $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr
+  $ymm0 = VRNDSCALEPSZ256rmi                   $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr
+  ; CHECK: $ymm0 = VRNDSCALEPSZ256rri          $ymm0, 31, implicit $mxcsr
+  $ymm0 = VRNDSCALEPSZ256rri                   $ymm0, 31, implicit $mxcsr
   ; CHECK: $ymm16 = VSHUFF32X4Z256rmi          $ymm16, $rip, 1, $rax, 0, $noreg, 228
   $ymm16 = VSHUFF32X4Z256rmi                   $ymm16, $rip, 1, $rax, 0, $noreg, 228
   ; CHECK: $ymm16 = VSHUFF32X4Z256rri          $ymm16, $ymm1, 228
@@ -3446,46 +3446,46 @@ body: |
   VMOVLPSZ128mr                                $rdi, 1, $noreg, 0, $noreg, $xmm16                                  
   ; CHECK: $xmm16 = VMOVLPSZ128rm              $xmm16,  $rdi, 1, $noreg, 0, $noreg  
   $xmm16 = VMOVLPSZ128rm                       $xmm16,  $rdi, 1, $noreg, 0, $noreg                                               
-  ; CHECK: $xmm16 = VMAXCPDZ128rm              $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMAXCPDZ128rm                       $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMAXCPDZ128rr              $xmm16, $xmm1  
-  $xmm16 = VMAXCPDZ128rr                       $xmm16, $xmm1                                             
-  ; CHECK: $xmm16 = VMAXCPSZ128rm              $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMAXCPSZ128rm                       $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMAXCPSZ128rr              $xmm16, $xmm1
-  $xmm16 = VMAXCPSZ128rr                       $xmm16, $xmm1                                             
-  ; CHECK: $xmm16 = VMAXPDZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMAXPDZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMAXPDZ128rr               $xmm16, $xmm1
-  $xmm16 = VMAXPDZ128rr                        $xmm16, $xmm1                                             
-  ; CHECK: $xmm16 = VMAXPSZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMAXPSZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMAXPSZ128rr               $xmm16, $xmm1
-  $xmm16 = VMAXPSZ128rr                        $xmm16, $xmm1                                             
-  ; CHECK: $xmm16 = VMINCPDZ128rm              $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMINCPDZ128rm                       $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMINCPDZ128rr              $xmm16, $xmm1  
-  $xmm16 = VMINCPDZ128rr                       $xmm16, $xmm1                                             
-  ; CHECK: $xmm16 = VMINCPSZ128rm              $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMINCPSZ128rm                       $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMINCPSZ128rr              $xmm16, $xmm1
-  $xmm16 = VMINCPSZ128rr                       $xmm16, $xmm1                                             
-  ; CHECK: $xmm16 = VMINPDZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMINPDZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMINPDZ128rr               $xmm16, $xmm1
-  $xmm16 = VMINPDZ128rr                        $xmm16, $xmm1                                             
-  ; CHECK: $xmm16 = VMINPSZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMINPSZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMINPSZ128rr               $xmm16, $xmm1
-  $xmm16 = VMINPSZ128rr                        $xmm16, $xmm1                                             
-  ; CHECK: $xmm16 = VMULPDZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMULPDZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMULPDZ128rr               $xmm16, $xmm1
-  $xmm16 = VMULPDZ128rr                        $xmm16, $xmm1                                             
-  ; CHECK: $xmm16 = VMULPSZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMULPSZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMULPSZ128rr               $xmm16, $xmm1
-  $xmm16 = VMULPSZ128rr                        $xmm16, $xmm1                                             
+  ; CHECK: $xmm16 = VMAXCPDZ128rm              $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMAXCPDZ128rm                       $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXCPDZ128rr              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMAXCPDZ128rr                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXCPSZ128rm              $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMAXCPSZ128rm                       $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXCPSZ128rr              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMAXCPSZ128rr                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXPDZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMAXPDZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXPDZ128rr               $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMAXPDZ128rr                        $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXPSZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMAXPSZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXPSZ128rr               $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMAXPSZ128rr                        $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINCPDZ128rm              $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMINCPDZ128rm                       $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINCPDZ128rr              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMINCPDZ128rr                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINCPSZ128rm              $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMINCPSZ128rm                       $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINCPSZ128rr              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMINCPSZ128rr                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINPDZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMINPDZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINPDZ128rr               $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMINPDZ128rr                        $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINPSZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMINPSZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINPSZ128rr               $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMINPSZ128rr                        $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMULPDZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMULPDZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMULPDZ128rr               $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMULPDZ128rr                        $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMULPSZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMULPSZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMULPSZ128rr               $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMULPSZ128rr                        $xmm16, $xmm1, implicit $mxcsr
   ; CHECK: $xmm16 = VORPDZ128rm                $xmm16, $rip, 1, $rax, 0, $noreg
   $xmm16 = VORPDZ128rm                         $xmm16, $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm16 = VORPDZ128rr                $xmm16, $xmm1
@@ -3666,14 +3666,14 @@ body: |
   $xmm16 = VPSUBWZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm16 = VPSUBWZ128rr               $xmm16, $xmm1                            
   $xmm16 = VPSUBWZ128rr                        $xmm16, $xmm1                                             
-  ; CHECK: $xmm16 = VADDPDZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VADDPDZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VADDPDZ128rr               $xmm16, $xmm1  
-  $xmm16 = VADDPDZ128rr                        $xmm16, $xmm1                                             
-  ; CHECK: $xmm16 = VADDPSZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VADDPSZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VADDPSZ128rr               $xmm16, $xmm1
-  $xmm16 = VADDPSZ128rr                        $xmm16, $xmm1                                             
+  ; CHECK: $xmm16 = VADDPDZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VADDPDZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VADDPDZ128rr               $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VADDPDZ128rr                        $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VADDPSZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VADDPSZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VADDPSZ128rr               $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VADDPSZ128rr                        $xmm16, $xmm1, implicit $mxcsr
   ; CHECK: $xmm16 = VANDNPDZ128rm              $xmm16, $rip, 1, $rax, 0, $noreg
   $xmm16 = VANDNPDZ128rm                       $xmm16, $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm16 = VANDNPDZ128rr              $xmm16, $xmm1
@@ -3690,14 +3690,14 @@ body: |
   $xmm16 = VANDPSZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm16 = VANDPSZ128rr               $xmm16, $xmm1
   $xmm16 = VANDPSZ128rr                        $xmm16, $xmm1                                             
-  ; CHECK: $xmm16 = VDIVPDZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VDIVPDZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VDIVPDZ128rr               $xmm16, $xmm1
-  $xmm16 = VDIVPDZ128rr                        $xmm16, $xmm1                                             
-  ; CHECK: $xmm16 = VDIVPSZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VDIVPSZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VDIVPSZ128rr               $xmm16, $xmm1
-  $xmm16 = VDIVPSZ128rr                        $xmm16, $xmm1                                             
+  ; CHECK: $xmm16 = VDIVPDZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VDIVPDZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VDIVPDZ128rr               $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VDIVPDZ128rr                        $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VDIVPSZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VDIVPSZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VDIVPSZ128rr               $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VDIVPSZ128rr                        $xmm16, $xmm1, implicit $mxcsr
   ; CHECK: $xmm16 = VPXORDZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg
   $xmm16 = VPXORDZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm16 = VPXORDZ128rr               $xmm16, $xmm1
@@ -3706,14 +3706,14 @@ body: |
   $xmm16 = VPXORQZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm16 = VPXORQZ128rr               $xmm16, $xmm1
   $xmm16 = VPXORQZ128rr                        $xmm16, $xmm1                                             
-  ; CHECK: $xmm16 = VSUBPDZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VSUBPDZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VSUBPDZ128rr               $xmm16, $xmm1
-  $xmm16 = VSUBPDZ128rr                        $xmm16, $xmm1                                             
-  ; CHECK: $xmm16 = VSUBPSZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VSUBPSZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VSUBPSZ128rr               $xmm16, $xmm1                  
-  $xmm16 = VSUBPSZ128rr                        $xmm16, $xmm1                                             
+  ; CHECK: $xmm16 = VSUBPDZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VSUBPDZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VSUBPDZ128rr               $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VSUBPDZ128rr                        $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VSUBPSZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VSUBPSZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VSUBPSZ128rr               $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VSUBPSZ128rr                        $xmm16, $xmm1, implicit $mxcsr
   ; CHECK: $xmm16 = VXORPDZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg
   $xmm16 = VXORPDZ128rm                        $xmm16, $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm16 = VXORPDZ128rr               $xmm16, $xmm1
@@ -3794,150 +3794,150 @@ body: |
   $xmm16 = VUNPCKLPSZ128rm                     $xmm16, $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm16 = VUNPCKLPSZ128rr            $xmm16, $xmm1                                               
   $xmm16 = VUNPCKLPSZ128rr                     $xmm16, $xmm1                                                             
-  ; CHECK: $xmm16 = VFMADD132PDZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD132PDZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMADD132PDZ128r           $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD132PDZ128r                    $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMADD132PSZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD132PSZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMADD132PSZ128r           $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD132PSZ128r                    $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMADD213PDZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD213PDZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMADD213PDZ128r           $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD213PDZ128r                    $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMADD213PSZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD213PSZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMADD213PSZ128r           $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD213PSZ128r                    $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMADD231PDZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD231PDZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMADD231PDZ128r           $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD231PDZ128r                    $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMADD231PSZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD231PSZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMADD231PSZ128r           $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD231PSZ128r                    $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMADDSUB132PDZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADDSUB132PDZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMADDSUB132PDZ128r        $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADDSUB132PDZ128r                 $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMADDSUB132PSZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADDSUB132PSZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMADDSUB132PSZ128r        $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADDSUB132PSZ128r                 $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMADDSUB213PDZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADDSUB213PDZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMADDSUB213PDZ128r        $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADDSUB213PDZ128r                 $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMADDSUB213PSZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADDSUB213PSZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMADDSUB213PSZ128r        $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADDSUB213PSZ128r                 $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMADDSUB231PDZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADDSUB231PDZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMADDSUB231PDZ128r        $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADDSUB231PDZ128r                 $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMADDSUB231PSZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADDSUB231PSZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMADDSUB231PSZ128r        $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADDSUB231PSZ128r                 $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMSUB132PDZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB132PDZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMSUB132PDZ128r           $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB132PDZ128r                    $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMSUB132PSZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB132PSZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMSUB132PSZ128r           $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB132PSZ128r                    $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMSUB213PDZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB213PDZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMSUB213PDZ128r           $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB213PDZ128r                    $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMSUB213PSZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB213PSZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMSUB213PSZ128r           $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB213PSZ128r                    $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMSUB231PDZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB231PDZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMSUB231PDZ128r           $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB231PDZ128r                    $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMSUB231PSZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB231PSZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMSUB231PSZ128r           $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB231PSZ128r                    $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMSUBADD132PDZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUBADD132PDZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMSUBADD132PDZ128r        $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUBADD132PDZ128r                 $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMSUBADD132PSZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUBADD132PSZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMSUBADD132PSZ128r        $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUBADD132PSZ128r                 $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMSUBADD213PDZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUBADD213PDZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMSUBADD213PDZ128r        $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUBADD213PDZ128r                 $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMSUBADD213PSZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUBADD213PSZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMSUBADD213PSZ128r        $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUBADD213PSZ128r                 $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMSUBADD231PDZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUBADD231PDZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMSUBADD231PDZ128r        $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUBADD231PDZ128r                 $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFMSUBADD231PSZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUBADD231PSZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFMSUBADD231PSZ128r        $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUBADD231PSZ128r                 $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFNMADD132PDZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD132PDZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFNMADD132PDZ128r          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD132PDZ128r                   $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFNMADD132PSZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD132PSZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFNMADD132PSZ128r          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD132PSZ128r                   $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFNMADD213PDZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD213PDZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFNMADD213PDZ128r          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD213PDZ128r                   $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFNMADD213PSZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD213PSZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFNMADD213PSZ128r          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD213PSZ128r                   $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFNMADD231PDZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD231PDZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFNMADD231PDZ128r          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD231PDZ128r                   $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFNMADD231PSZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD231PSZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFNMADD231PSZ128r          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD231PSZ128r                   $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFNMSUB132PDZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB132PDZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFNMSUB132PDZ128r          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMSUB132PDZ128r                   $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFNMSUB132PSZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB132PSZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFNMSUB132PSZ128r          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMSUB132PSZ128r                   $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFNMSUB213PDZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB213PDZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFNMSUB213PDZ128r          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMSUB213PDZ128r                   $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFNMSUB213PSZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB213PSZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFNMSUB213PSZ128r          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMSUB213PSZ128r                   $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFNMSUB231PDZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB231PDZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFNMSUB231PDZ128r          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMSUB231PDZ128r                   $xmm16, $xmm1, $xmm2                                      
-  ; CHECK: $xmm16 = VFNMSUB231PSZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB231PSZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                          
-  ; CHECK: $xmm16 = VFNMSUB231PSZ128r          $xmm16, $xmm1, $xmm2 
-  $xmm16 = VFNMSUB231PSZ128r                   $xmm16, $xmm1, $xmm2                                               
+  ; CHECK: $xmm16 = VFMADD132PDZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD132PDZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD132PDZ128r           $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD132PDZ128r                    $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD132PSZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD132PSZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD132PSZ128r           $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD132PSZ128r                    $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD213PDZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD213PDZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD213PDZ128r           $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD213PDZ128r                    $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD213PSZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD213PSZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD213PSZ128r           $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD213PSZ128r                    $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD231PDZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD231PDZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD231PDZ128r           $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD231PDZ128r                    $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD231PSZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD231PSZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD231PSZ128r           $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD231PSZ128r                    $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADDSUB132PDZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADDSUB132PDZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADDSUB132PDZ128r        $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADDSUB132PDZ128r                 $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADDSUB132PSZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADDSUB132PSZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADDSUB132PSZ128r        $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADDSUB132PSZ128r                 $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADDSUB213PDZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADDSUB213PDZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADDSUB213PDZ128r        $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADDSUB213PDZ128r                 $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADDSUB213PSZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADDSUB213PSZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADDSUB213PSZ128r        $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADDSUB213PSZ128r                 $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADDSUB231PDZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADDSUB231PDZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADDSUB231PDZ128r        $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADDSUB231PDZ128r                 $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADDSUB231PSZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADDSUB231PSZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADDSUB231PSZ128r        $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADDSUB231PSZ128r                 $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB132PDZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB132PDZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB132PDZ128r           $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB132PDZ128r                    $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB132PSZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB132PSZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB132PSZ128r           $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB132PSZ128r                    $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB213PDZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB213PDZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB213PDZ128r           $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB213PDZ128r                    $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB213PSZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB213PSZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB213PSZ128r           $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB213PSZ128r                    $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB231PDZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB231PDZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB231PDZ128r           $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB231PDZ128r                    $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB231PSZ128m           $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB231PSZ128m                    $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB231PSZ128r           $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB231PSZ128r                    $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUBADD132PDZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUBADD132PDZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUBADD132PDZ128r        $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUBADD132PDZ128r                 $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUBADD132PSZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUBADD132PSZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUBADD132PSZ128r        $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUBADD132PSZ128r                 $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUBADD213PDZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUBADD213PDZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUBADD213PDZ128r        $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUBADD213PDZ128r                 $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUBADD213PSZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUBADD213PSZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUBADD213PSZ128r        $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUBADD213PSZ128r                 $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUBADD231PDZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUBADD231PDZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUBADD231PDZ128r        $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUBADD231PDZ128r                 $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUBADD231PSZ128m        $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUBADD231PSZ128m                 $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUBADD231PSZ128r        $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUBADD231PSZ128r                 $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD132PDZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD132PDZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD132PDZ128r          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD132PDZ128r                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD132PSZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD132PSZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD132PSZ128r          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD132PSZ128r                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD213PDZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD213PDZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD213PDZ128r          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD213PDZ128r                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD213PSZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD213PSZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD213PSZ128r          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD213PSZ128r                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD231PDZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD231PDZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD231PDZ128r          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD231PDZ128r                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD231PSZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD231PSZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD231PSZ128r          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD231PSZ128r                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB132PDZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB132PDZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB132PDZ128r          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB132PDZ128r                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB132PSZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB132PSZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB132PSZ128r          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB132PSZ128r                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB213PDZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB213PDZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB213PDZ128r          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB213PDZ128r                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB213PSZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB213PSZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB213PSZ128r          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB213PSZ128r                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB231PDZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB231PDZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB231PDZ128r          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB231PDZ128r                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB231PSZ128m          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB231PSZ128m                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB231PSZ128r          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB231PSZ128r                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
   ; CHECK: $xmm16 = VPSLLDZ128ri               $xmm16, 7  
   $xmm16 = VPSLLDZ128ri                        $xmm16, 7                                                 
   ; CHECK: $xmm16 = VPSLLDZ128rm               $xmm16, $rip, 1, $rax, 0, $noreg
@@ -4024,50 +4024,50 @@ body: |
   $xmm16 = VPERMILPSZ128rm                     $xmm16, $rdi, 1, $noreg, 0, $noreg                                  
   ; CHECK: $xmm16 = VPERMILPSZ128rr            $xmm16, $xmm1
   $xmm16 = VPERMILPSZ128rr                     $xmm16, $xmm1                                               
-  ; CHECK: $xmm16 = VCVTPH2PSZ128rm            $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTPH2PSZ128rm                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VCVTPH2PSZ128rr            $xmm16
-  $xmm16 = VCVTPH2PSZ128rr                     $xmm16                                                    
+  ; CHECK: $xmm16 = VCVTPH2PSZ128rm            $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTPH2PSZ128rm                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTPH2PSZ128rr            $xmm16, implicit $mxcsr
+  $xmm16 = VCVTPH2PSZ128rr                     $xmm16, implicit $mxcsr
   ; CHECK: $xmm16 = VCVTDQ2PDZ128rm            $rdi, 1, $noreg, 0, $noreg
   $xmm16 = VCVTDQ2PDZ128rm                     $rdi, 1, $noreg, 0, $noreg
   ; CHECK: $xmm16 = VCVTDQ2PDZ128rr            $xmm16     
   $xmm16 = VCVTDQ2PDZ128rr                     $xmm16                                                    
-  ; CHECK: $xmm16 = VCVTDQ2PSZ128rm            $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTDQ2PSZ128rm                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VCVTDQ2PSZ128rr            $xmm16   
-  $xmm16 = VCVTDQ2PSZ128rr                     $xmm16                                                    
-  ; CHECK: $xmm16 = VCVTPD2DQZ128rm            $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTPD2DQZ128rm                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VCVTPD2DQZ128rr            $xmm16   
-  $xmm16 = VCVTPD2DQZ128rr                     $xmm16                                                    
-  ; CHECK: $xmm16 = VCVTPD2PSZ128rm            $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTPD2PSZ128rm                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VCVTPD2PSZ128rr            $xmm16   
-  $xmm16 = VCVTPD2PSZ128rr                     $xmm16                                                    
-  ; CHECK: $xmm16 = VCVTPS2DQZ128rm            $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTPS2DQZ128rm                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VCVTPS2DQZ128rr            $xmm16   
-  $xmm16 = VCVTPS2DQZ128rr                     $xmm16                                                    
-  ; CHECK: $xmm16 = VCVTPS2PDZ128rm            $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTPS2PDZ128rm                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VCVTPS2PDZ128rr            $xmm16
-  $xmm16 = VCVTPS2PDZ128rr                     $xmm16                                                    
-  ; CHECK: $xmm16 = VCVTTPD2DQZ128rm           $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTTPD2DQZ128rm                    $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VCVTTPD2DQZ128rr           $xmm16  
-  $xmm16 = VCVTTPD2DQZ128rr                    $xmm16                                                    
-  ; CHECK: $xmm16 = VCVTTPS2DQZ128rm           $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTTPS2DQZ128rm                    $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VCVTTPS2DQZ128rr           $xmm16
-  $xmm16 = VCVTTPS2DQZ128rr                    $xmm16                                                    
-  ; CHECK: $xmm16 = VSQRTPDZ128m               $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VSQRTPDZ128m                        $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VSQRTPDZ128r               $xmm16
-  $xmm16 = VSQRTPDZ128r                        $xmm16                                                    
-  ; CHECK: $xmm16 = VSQRTPSZ128m               $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VSQRTPSZ128m                        $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VSQRTPSZ128r               $xmm16  
-  $xmm16 = VSQRTPSZ128r                        $xmm16                                                    
+  ; CHECK: $xmm16 = VCVTDQ2PSZ128rm            $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTDQ2PSZ128rm                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTDQ2PSZ128rr            $xmm16, implicit $mxcsr
+  $xmm16 = VCVTDQ2PSZ128rr                     $xmm16, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTPD2DQZ128rm            $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTPD2DQZ128rm                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTPD2DQZ128rr            $xmm16, implicit $mxcsr
+  $xmm16 = VCVTPD2DQZ128rr                     $xmm16, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTPD2PSZ128rm            $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTPD2PSZ128rm                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTPD2PSZ128rr            $xmm16, implicit $mxcsr
+  $xmm16 = VCVTPD2PSZ128rr                     $xmm16, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTPS2DQZ128rm            $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTPS2DQZ128rm                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTPS2DQZ128rr            $xmm16, implicit $mxcsr
+  $xmm16 = VCVTPS2DQZ128rr                     $xmm16, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTPS2PDZ128rm            $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTPS2PDZ128rm                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTPS2PDZ128rr            $xmm16, implicit $mxcsr
+  $xmm16 = VCVTPS2PDZ128rr                     $xmm16, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTTPD2DQZ128rm           $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTTPD2DQZ128rm                    $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTTPD2DQZ128rr           $xmm16, implicit $mxcsr
+  $xmm16 = VCVTTPD2DQZ128rr                    $xmm16, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTTPS2DQZ128rm           $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTTPS2DQZ128rm                    $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTTPS2DQZ128rr           $xmm16, implicit $mxcsr
+  $xmm16 = VCVTTPS2DQZ128rr                    $xmm16, implicit $mxcsr
+  ; CHECK: $xmm16 = VSQRTPDZ128m               $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VSQRTPDZ128m                        $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VSQRTPDZ128r               $xmm16, implicit $mxcsr
+  $xmm16 = VSQRTPDZ128r                        $xmm16, implicit $mxcsr
+  ; CHECK: $xmm16 = VSQRTPSZ128m               $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VSQRTPSZ128m                        $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VSQRTPSZ128r               $xmm16, implicit $mxcsr
+  $xmm16 = VSQRTPSZ128r                        $xmm16, implicit $mxcsr
   ; CHECK: $xmm16 = VMOVDDUPZ128rm             $rdi, 1, $noreg, 0, $noreg     
   $xmm16 = VMOVDDUPZ128rm                      $rdi, 1, $noreg, 0, $noreg                                          
   ; CHECK: $xmm16 = VMOVDDUPZ128rr             $xmm16    
@@ -4134,10 +4134,10 @@ body: |
   $xmm16 = VBROADCASTI32X2Z128m                $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm16 = VBROADCASTI32X2Z128r       $xmm0
   $xmm16 = VBROADCASTI32X2Z128r                $xmm0
-  ; CHECK: $xmm16 = VCVTPS2PHZ128rr            $xmm16, 2
-  $xmm16 = VCVTPS2PHZ128rr                     $xmm16, 2                                                 
-  ; CHECK: VCVTPS2PHZ128mr                     $rdi, 1, $noreg, 0, $noreg, $xmm16, 2
-  VCVTPS2PHZ128mr                              $rdi, 1, $noreg, 0, $noreg, $xmm16, 2
+  ; CHECK: $xmm16 = VCVTPS2PHZ128rr            $xmm16, 2, implicit $mxcsr
+  $xmm16 = VCVTPS2PHZ128rr                     $xmm16, 2, implicit $mxcsr
+  ; CHECK: VCVTPS2PHZ128mr                     $rdi, 1, $noreg, 0, $noreg, $xmm16, 2, implicit $mxcsr
+  VCVTPS2PHZ128mr                              $rdi, 1, $noreg, 0, $noreg, $xmm16, 2, implicit $mxcsr
   ; CHECK: $xmm16 = VPABSBZ128rm               $rip, 1, $rax, 0, $noreg
   $xmm16 = VPABSBZ128rm                        $rip, 1, $rax, 0, $noreg
   ; CHECK: $xmm16 = VPABSBZ128rr               $xmm16
@@ -4162,22 +4162,22 @@ body: |
   $xmm16 = VINSERTPSZrm                        $xmm16, $rdi, 1, $noreg, 0, $noreg, 1
   ; CHECK: $xmm16 = VINSERTPSZrr               $xmm16, $xmm16, 1
   $xmm16 = VINSERTPSZrr                        $xmm16, $xmm16, 1
-  ; CHECK: $xmm16 = VRNDSCALEPDZ128rmi         $rip, 1, $rax, 0, $noreg, 15
-  $xmm16 = VRNDSCALEPDZ128rmi                  $rip, 1, $rax, 0, $noreg, 15
-  ; CHECK: $xmm16 = VRNDSCALEPDZ128rri         $xmm16, 15
-  $xmm16 = VRNDSCALEPDZ128rri                  $xmm16, 15
-  ; CHECK: $xmm16 = VRNDSCALEPSZ128rmi         $rip, 1, $rax, 0, $noreg, 15
-  $xmm16 = VRNDSCALEPSZ128rmi                  $rip, 1, $rax, 0, $noreg, 15
-  ; CHECK: $xmm16 = VRNDSCALEPSZ128rri         $xmm16, 15
-  $xmm16 = VRNDSCALEPSZ128rri                  $xmm16, 15
-  ; CHECK: $xmm0 = VRNDSCALEPDZ128rmi          $rip, 1, $rax, 0, $noreg, 31
-  $xmm0 = VRNDSCALEPDZ128rmi                   $rip, 1, $rax, 0, $noreg, 31
-  ; CHECK: $xmm0 = VRNDSCALEPDZ128rri          $xmm0, 31
-  $xmm0 = VRNDSCALEPDZ128rri                   $xmm0, 31
-  ; CHECK: $xmm0 = VRNDSCALEPSZ128rmi          $rip, 1, $rax, 0, $noreg, 31
-  $xmm0 = VRNDSCALEPSZ128rmi                   $rip, 1, $rax, 0, $noreg, 31
-  ; CHECK: $xmm0 = VRNDSCALEPSZ128rri          $xmm0, 31
-  $xmm0 = VRNDSCALEPSZ128rri                   $xmm0, 31
+  ; CHECK: $xmm16 = VRNDSCALEPDZ128rmi         $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  $xmm16 = VRNDSCALEPDZ128rmi                  $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  ; CHECK: $xmm16 = VRNDSCALEPDZ128rri         $xmm16, 15, implicit $mxcsr
+  $xmm16 = VRNDSCALEPDZ128rri                  $xmm16, 15, implicit $mxcsr
+  ; CHECK: $xmm16 = VRNDSCALEPSZ128rmi         $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  $xmm16 = VRNDSCALEPSZ128rmi                  $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  ; CHECK: $xmm16 = VRNDSCALEPSZ128rri         $xmm16, 15, implicit $mxcsr
+  $xmm16 = VRNDSCALEPSZ128rri                  $xmm16, 15, implicit $mxcsr
+  ; CHECK: $xmm0 = VRNDSCALEPDZ128rmi          $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr
+  $xmm0 = VRNDSCALEPDZ128rmi                   $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr
+  ; CHECK: $xmm0 = VRNDSCALEPDZ128rri          $xmm0, 31, implicit $mxcsr
+  $xmm0 = VRNDSCALEPDZ128rri                   $xmm0, 31, implicit $mxcsr
+  ; CHECK: $xmm0 = VRNDSCALEPSZ128rmi          $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr
+  $xmm0 = VRNDSCALEPSZ128rmi                   $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr
+  ; CHECK: $xmm0 = VRNDSCALEPSZ128rri          $xmm0, 31, implicit $mxcsr
+  $xmm0 = VRNDSCALEPSZ128rri                   $xmm0, 31, implicit $mxcsr
     
       RET 0, $zmm0, $zmm1
 ...
@@ -4188,310 +4188,310 @@ body: |
 name: evex_scalar_to_evex_test
 body: |
   bb.0:
-  ; CHECK: $xmm16 = VADDSDZrm                  $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VADDSDZrm                           $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VADDSDZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VADDSDZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VADDSDZrr                  $xmm16, $xmm1  
-  $xmm16 = VADDSDZrr                           $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VADDSDZrr_Int              $xmm16, $xmm1
-  $xmm16 = VADDSDZrr_Int                       $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VADDSSZrm                  $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VADDSSZrm                           $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VADDSSZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VADDSSZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VADDSSZrr                  $xmm16, $xmm1
-  $xmm16 = VADDSSZrr                           $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VADDSSZrr_Int              $xmm16, $xmm1
-  $xmm16 = VADDSSZrr_Int                       $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VDIVSDZrm                  $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VDIVSDZrm                           $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VDIVSDZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VDIVSDZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VDIVSDZrr                  $xmm16, $xmm1  
-  $xmm16 = VDIVSDZrr                           $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VDIVSDZrr_Int              $xmm16, $xmm1
-  $xmm16 = VDIVSDZrr_Int                       $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VDIVSSZrm                  $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VDIVSSZrm                           $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VDIVSSZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VDIVSSZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VDIVSSZrr                  $xmm16, $xmm1
-  $xmm16 = VDIVSSZrr                           $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VDIVSSZrr_Int              $xmm16, $xmm1
-  $xmm16 = VDIVSSZrr_Int                       $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VMAXCSDZrm                 $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMAXCSDZrm                          $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMAXCSDZrr                 $xmm16, $xmm1
-  $xmm16 = VMAXCSDZrr                          $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VMAXCSSZrm                 $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMAXCSSZrm                          $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMAXCSSZrr                 $xmm16, $xmm1
-  $xmm16 = VMAXCSSZrr                          $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VMAXSDZrm                  $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMAXSDZrm                           $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMAXSDZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMAXSDZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMAXSDZrr                  $xmm16, $xmm1
-  $xmm16 = VMAXSDZrr                           $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VMAXSDZrr_Int              $xmm16, $xmm1
-  $xmm16 = VMAXSDZrr_Int                       $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VMAXSSZrm                  $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMAXSSZrm                           $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMAXSSZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMAXSSZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMAXSSZrr                  $xmm16, $xmm1
-  $xmm16 = VMAXSSZrr                           $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VMAXSSZrr_Int              $xmm16, $xmm1
-  $xmm16 = VMAXSSZrr_Int                       $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VMINCSDZrm                 $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMINCSDZrm                          $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMINCSDZrr                 $xmm16, $xmm1
-  $xmm16 = VMINCSDZrr                          $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VMINCSSZrm                 $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMINCSSZrm                          $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMINCSSZrr                 $xmm16, $xmm1
-  $xmm16 = VMINCSSZrr                          $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VMINSDZrm                  $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMINSDZrm                           $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMINSDZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMINSDZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMINSDZrr                  $xmm16, $xmm1
-  $xmm16 = VMINSDZrr                           $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VMINSDZrr_Int              $xmm16, $xmm1
-  $xmm16 = VMINSDZrr_Int                       $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VMINSSZrm                  $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMINSSZrm                           $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMINSSZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMINSSZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMINSSZrr                  $xmm16, $xmm1
-  $xmm16 = VMINSSZrr                           $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VMINSSZrr_Int              $xmm16, $xmm1
-  $xmm16 = VMINSSZrr_Int                       $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VMULSDZrm                  $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMULSDZrm                           $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMULSDZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMULSDZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMULSDZrr                  $xmm16, $xmm1
-  $xmm16 = VMULSDZrr                           $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VMULSDZrr_Int              $xmm16, $xmm1
-  $xmm16 = VMULSDZrr_Int                       $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VMULSSZrm                  $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMULSSZrm                           $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMULSSZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VMULSSZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VMULSSZrr                  $xmm16, $xmm1  
-  $xmm16 = VMULSSZrr                           $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VMULSSZrr_Int              $xmm16, $xmm1
-  $xmm16 = VMULSSZrr_Int                       $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VSUBSDZrm                  $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VSUBSDZrm                           $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VSUBSDZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VSUBSDZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VSUBSDZrr                  $xmm16, $xmm1  
-  $xmm16 = VSUBSDZrr                           $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VSUBSDZrr_Int              $xmm16, $xmm1
-  $xmm16 = VSUBSDZrr_Int                       $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VSUBSSZrm                  $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VSUBSSZrm                           $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VSUBSSZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg
-  $xmm16 = VSUBSSZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg
-  ; CHECK: $xmm16 = VSUBSSZrr                  $xmm16, $xmm1
-  $xmm16 = VSUBSSZrr                           $xmm16, $xmm1                                              
-  ; CHECK: $xmm16 = VSUBSSZrr_Int              $xmm16, $xmm1
-  $xmm16 = VSUBSSZrr_Int                       $xmm16, $xmm1                                               
-  ; CHECK: $xmm16 = VFMADD132SDZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD132SDZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMADD132SDZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD132SDZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMADD132SDZr              $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD132SDZr                       $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMADD132SDZr_Int          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD132SDZr_Int                   $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMADD132SSZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD132SSZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMADD132SSZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD132SSZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMADD132SSZr              $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD132SSZr                       $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMADD132SSZr_Int          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD132SSZr_Int                   $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMADD213SDZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD213SDZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMADD213SDZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD213SDZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMADD213SDZr              $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD213SDZr                       $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMADD213SDZr_Int          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD213SDZr_Int                   $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMADD213SSZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD213SSZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMADD213SSZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD213SSZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMADD213SSZr              $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD213SSZr                       $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMADD213SSZr_Int          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD213SSZr_Int                   $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMADD231SDZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD231SDZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMADD231SDZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD231SDZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMADD231SDZr              $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD231SDZr                       $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMADD231SDZr_Int          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD231SDZr_Int                   $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMADD231SSZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD231SSZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMADD231SSZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMADD231SSZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMADD231SSZr              $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD231SSZr                       $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMADD231SSZr_Int          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMADD231SSZr_Int                   $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMSUB132SDZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB132SDZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMSUB132SDZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB132SDZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMSUB132SDZr              $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB132SDZr                       $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMSUB132SDZr_Int          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB132SDZr_Int                   $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMSUB132SSZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB132SSZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMSUB132SSZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB132SSZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMSUB132SSZr              $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB132SSZr                       $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMSUB132SSZr_Int          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB132SSZr_Int                   $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMSUB213SDZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB213SDZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMSUB213SDZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB213SDZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMSUB213SDZr              $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB213SDZr                       $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMSUB213SDZr_Int          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB213SDZr_Int                   $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMSUB213SSZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB213SSZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMSUB213SSZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB213SSZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMSUB213SSZr              $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB213SSZr                       $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMSUB213SSZr_Int          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB213SSZr_Int                   $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMSUB231SDZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB231SDZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMSUB231SDZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB231SDZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMSUB231SDZr              $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB231SDZr                       $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMSUB231SDZr_Int          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB231SDZr_Int                   $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMSUB231SSZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB231SSZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMSUB231SSZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFMSUB231SSZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFMSUB231SSZr              $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB231SSZr                       $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFMSUB231SSZr_Int          $xmm16, $xmm1, $xmm2
-  $xmm16 = VFMSUB231SSZr_Int                   $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMADD132SDZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD132SDZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMADD132SDZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD132SDZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMADD132SDZr             $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD132SDZr                      $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMADD132SDZr_Int         $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD132SDZr_Int                  $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMADD132SSZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD132SSZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMADD132SSZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD132SSZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMADD132SSZr             $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD132SSZr                      $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMADD132SSZr_Int         $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD132SSZr_Int                  $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMADD213SDZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD213SDZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMADD213SDZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD213SDZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMADD213SDZr             $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD213SDZr                      $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMADD213SDZr_Int         $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD213SDZr_Int                  $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMADD213SSZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD213SSZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMADD213SSZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD213SSZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMADD213SSZr             $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD213SSZr                      $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMADD213SSZr_Int         $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD213SSZr_Int                  $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMADD231SDZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD231SDZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMADD231SDZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD231SDZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMADD231SDZr             $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD231SDZr                      $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMADD231SDZr_Int         $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD231SDZr_Int                  $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMADD231SSZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD231SSZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMADD231SSZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMADD231SSZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMADD231SSZr             $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD231SSZr                      $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMADD231SSZr_Int         $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMADD231SSZr_Int                  $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMSUB132SDZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB132SDZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMSUB132SDZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB132SDZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMSUB132SDZr             $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMSUB132SDZr                      $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMSUB132SDZr_Int         $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMSUB132SDZr_Int                  $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMSUB132SSZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB132SSZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMSUB132SSZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB132SSZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMSUB132SSZr             $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMSUB132SSZr                      $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMSUB132SSZr_Int         $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMSUB132SSZr_Int                  $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMSUB213SDZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB213SDZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMSUB213SDZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB213SDZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMSUB213SDZr             $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMSUB213SDZr                      $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMSUB213SDZr_Int         $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMSUB213SDZr_Int                  $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMSUB213SSZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB213SSZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMSUB213SSZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB213SSZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMSUB213SSZr             $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMSUB213SSZr                      $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMSUB213SSZr_Int         $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMSUB213SSZr_Int                  $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMSUB231SDZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB231SDZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMSUB231SDZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB231SDZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMSUB231SDZr             $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMSUB231SDZr                      $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMSUB231SDZr_Int         $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMSUB231SDZr_Int                  $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMSUB231SSZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB231SSZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMSUB231SSZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg
-  $xmm16 = VFNMSUB231SSZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg                           
-  ; CHECK: $xmm16 = VFNMSUB231SSZr             $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMSUB231SSZr                      $xmm16, $xmm1, $xmm2                                       
-  ; CHECK: $xmm16 = VFNMSUB231SSZr_Int         $xmm16, $xmm1, $xmm2
-  $xmm16 = VFNMSUB231SSZr_Int                  $xmm16, $xmm1, $xmm2                                               
+  ; CHECK: $xmm16 = VADDSDZrm                  $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VADDSDZrm                           $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VADDSDZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VADDSDZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VADDSDZrr                  $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VADDSDZrr                           $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VADDSDZrr_Int              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VADDSDZrr_Int                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VADDSSZrm                  $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VADDSSZrm                           $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VADDSSZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VADDSSZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VADDSSZrr                  $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VADDSSZrr                           $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VADDSSZrr_Int              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VADDSSZrr_Int                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VDIVSDZrm                  $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VDIVSDZrm                           $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VDIVSDZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VDIVSDZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VDIVSDZrr                  $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VDIVSDZrr                           $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VDIVSDZrr_Int              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VDIVSDZrr_Int                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VDIVSSZrm                  $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VDIVSSZrm                           $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VDIVSSZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VDIVSSZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VDIVSSZrr                  $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VDIVSSZrr                           $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VDIVSSZrr_Int              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VDIVSSZrr_Int                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXCSDZrm                 $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMAXCSDZrm                          $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXCSDZrr                 $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMAXCSDZrr                          $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXCSSZrm                 $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMAXCSSZrm                          $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXCSSZrr                 $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMAXCSSZrr                          $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXSDZrm                  $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMAXSDZrm                           $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXSDZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMAXSDZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXSDZrr                  $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMAXSDZrr                           $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXSDZrr_Int              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMAXSDZrr_Int                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXSSZrm                  $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMAXSSZrm                           $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXSSZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMAXSSZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXSSZrr                  $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMAXSSZrr                           $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMAXSSZrr_Int              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMAXSSZrr_Int                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINCSDZrm                 $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMINCSDZrm                          $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINCSDZrr                 $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMINCSDZrr                          $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINCSSZrm                 $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMINCSSZrm                          $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINCSSZrr                 $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMINCSSZrr                          $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINSDZrm                  $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMINSDZrm                           $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINSDZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMINSDZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINSDZrr                  $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMINSDZrr                           $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINSDZrr_Int              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMINSDZrr_Int                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINSSZrm                  $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMINSSZrm                           $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINSSZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMINSSZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINSSZrr                  $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMINSSZrr                           $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMINSSZrr_Int              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMINSSZrr_Int                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMULSDZrm                  $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMULSDZrm                           $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMULSDZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMULSDZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMULSDZrr                  $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMULSDZrr                           $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMULSDZrr_Int              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMULSDZrr_Int                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMULSSZrm                  $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMULSSZrm                           $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMULSSZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VMULSSZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VMULSSZrr                  $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMULSSZrr                           $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VMULSSZrr_Int              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VMULSSZrr_Int                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VSUBSDZrm                  $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VSUBSDZrm                           $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VSUBSDZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VSUBSDZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VSUBSDZrr                  $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VSUBSDZrr                           $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VSUBSDZrr_Int              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VSUBSDZrr_Int                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VSUBSSZrm                  $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VSUBSSZrm                           $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VSUBSSZrm_Int              $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  $xmm16 = VSUBSSZrm_Int                       $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VSUBSSZrr                  $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VSUBSSZrr                           $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VSUBSSZrr_Int              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VSUBSSZrr_Int                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD132SDZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD132SDZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD132SDZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD132SDZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD132SDZr              $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD132SDZr                       $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD132SDZr_Int          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD132SDZr_Int                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD132SSZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD132SSZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD132SSZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD132SSZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD132SSZr              $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD132SSZr                       $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD132SSZr_Int          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD132SSZr_Int                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD213SDZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD213SDZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD213SDZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD213SDZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD213SDZr              $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD213SDZr                       $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD213SDZr_Int          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD213SDZr_Int                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD213SSZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD213SSZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD213SSZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD213SSZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD213SSZr              $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD213SSZr                       $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD213SSZr_Int          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD213SSZr_Int                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD231SDZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD231SDZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD231SDZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD231SDZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD231SDZr              $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD231SDZr                       $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD231SDZr_Int          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD231SDZr_Int                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD231SSZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD231SSZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD231SSZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMADD231SSZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD231SSZr              $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD231SSZr                       $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMADD231SSZr_Int          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMADD231SSZr_Int                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB132SDZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB132SDZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB132SDZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB132SDZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB132SDZr              $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB132SDZr                       $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB132SDZr_Int          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB132SDZr_Int                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB132SSZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB132SSZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB132SSZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB132SSZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB132SSZr              $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB132SSZr                       $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB132SSZr_Int          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB132SSZr_Int                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB213SDZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB213SDZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB213SDZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB213SDZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB213SDZr              $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB213SDZr                       $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB213SDZr_Int          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB213SDZr_Int                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB213SSZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB213SSZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB213SSZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB213SSZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB213SSZr              $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB213SSZr                       $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB213SSZr_Int          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB213SSZr_Int                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB231SDZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB231SDZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB231SDZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB231SDZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB231SDZr              $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB231SDZr                       $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB231SDZr_Int          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB231SDZr_Int                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB231SSZm              $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB231SSZm                       $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB231SSZm_Int          $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFMSUB231SSZm_Int                   $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB231SSZr              $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB231SSZr                       $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFMSUB231SSZr_Int          $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFMSUB231SSZr_Int                   $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD132SDZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD132SDZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD132SDZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD132SDZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD132SDZr             $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD132SDZr                      $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD132SDZr_Int         $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD132SDZr_Int                  $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD132SSZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD132SSZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD132SSZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD132SSZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD132SSZr             $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD132SSZr                      $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD132SSZr_Int         $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD132SSZr_Int                  $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD213SDZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD213SDZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD213SDZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD213SDZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD213SDZr             $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD213SDZr                      $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD213SDZr_Int         $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD213SDZr_Int                  $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD213SSZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD213SSZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD213SSZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD213SSZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD213SSZr             $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD213SSZr                      $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD213SSZr_Int         $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD213SSZr_Int                  $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD231SDZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD231SDZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD231SDZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD231SDZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD231SDZr             $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD231SDZr                      $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD231SDZr_Int         $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD231SDZr_Int                  $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD231SSZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD231SSZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD231SSZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMADD231SSZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD231SSZr             $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD231SSZr                      $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMADD231SSZr_Int         $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMADD231SSZr_Int                  $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB132SDZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB132SDZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB132SDZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB132SDZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB132SDZr             $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB132SDZr                      $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB132SDZr_Int         $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB132SDZr_Int                  $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB132SSZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB132SSZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB132SSZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB132SSZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB132SSZr             $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB132SSZr                      $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB132SSZr_Int         $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB132SSZr_Int                  $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB213SDZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB213SDZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB213SDZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB213SDZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB213SDZr             $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB213SDZr                      $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB213SDZr_Int         $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB213SDZr_Int                  $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB213SSZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB213SSZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB213SSZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB213SSZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB213SSZr             $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB213SSZr                      $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB213SSZr_Int         $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB213SSZr_Int                  $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB231SDZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB231SDZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB231SDZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB231SDZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB231SDZr             $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB231SDZr                      $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB231SDZr_Int         $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB231SDZr_Int                  $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB231SSZm             $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB231SSZm                      $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB231SSZm_Int         $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VFNMSUB231SSZm_Int                  $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB231SSZr             $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB231SSZr                      $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  ; CHECK: $xmm16 = VFNMSUB231SSZr_Int         $xmm16, $xmm1, $xmm2, implicit $mxcsr
+  $xmm16 = VFNMSUB231SSZr_Int                  $xmm16, $xmm1, $xmm2, implicit $mxcsr
   ; CHECK: VPEXTRBZmr                          $rdi, 1, $noreg, 0, $noreg, $xmm16, 3       
   VPEXTRBZmr                                   $rdi, 1, $noreg, 0, $noreg, $xmm16, 3                                
   ; CHECK: $eax = VPEXTRBZrr                   $xmm16, 1    
@@ -4526,38 +4526,38 @@ body: |
   $xmm16 = VPINSRWZrm                          $xmm16, $rsi, 1, $noreg, 0, $noreg, 3                                
   ; CHECK: $xmm16 = VPINSRWZrr                 $xmm16, $edi, 5
   $xmm16 = VPINSRWZrr                          $xmm16, $edi, 5                                               
-  ; CHECK: $xmm16 = VSQRTSDZm                  $xmm16, $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VSQRTSDZm                           $xmm16, $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VSQRTSDZm_Int              $xmm16, $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VSQRTSDZm_Int                       $xmm16, $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VSQRTSDZr                  $xmm16, $xmm1 
-  $xmm16 = VSQRTSDZr                           $xmm16, $xmm1                                                  
-  ; CHECK: $xmm16 = VSQRTSDZr_Int              $xmm16, $xmm1
-  $xmm16 = VSQRTSDZr_Int                       $xmm16, $xmm1                                                  
-  ; CHECK: $xmm16 = VSQRTSSZm                  $xmm16, $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VSQRTSSZm                           $xmm16, $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VSQRTSSZm_Int              $xmm16, $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VSQRTSSZm_Int                       $xmm16, $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VSQRTSSZr                  $xmm16, $xmm1
-  $xmm16 = VSQRTSSZr                           $xmm16, $xmm1                                                  
-  ; CHECK: $xmm16 = VSQRTSSZr_Int              $xmm16, $xmm1
-  $xmm16 = VSQRTSSZr_Int                       $xmm16, $xmm1                                                  
-  ; CHECK: $rdi = VCVTSD2SI64rm_Int            $rdi, 1, $noreg, 0, $noreg
-  $rdi = VCVTSD2SI64Zrm_Int                    $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $rdi = VCVTSD2SI64Zrr_Int           $xmm16
-  $rdi = VCVTSD2SI64Zrr_Int                    $xmm16                                                     
-  ; CHECK: $edi = VCVTSD2SIrm_Int              $rdi, 1, $noreg, 0, $noreg
-  $edi = VCVTSD2SIZrm_Int                      $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $edi = VCVTSD2SIZrr_Int             $xmm16
-  $edi = VCVTSD2SIZrr_Int                      $xmm16                                                     
-  ; CHECK: $xmm16 = VCVTSD2SSZrm               $xmm16, $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTSD2SSZrm                        $xmm16, $rdi, 1, $noreg, 0, $noreg                                   
-  ; CHECK: $xmm16 = VCVTSD2SSZrm_Int           $xmm16, $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTSD2SSZrm_Int                    $xmm16, $rdi, 1, $noreg, 0, $noreg                                   
-  ; CHECK: $xmm16 = VCVTSD2SSZrr               $xmm16, $noreg
-  $xmm16 = VCVTSD2SSZrr                        $xmm16, $noreg                                                  
-  ; CHECK: $xmm16 = VCVTSD2SSZrr_Int           $xmm16, $noreg
-  $xmm16 = VCVTSD2SSZrr_Int                    $xmm16, $noreg                                                  
+  ; CHECK: $xmm16 = VSQRTSDZm                  $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VSQRTSDZm                           $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VSQRTSDZm_Int              $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VSQRTSDZm_Int                       $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VSQRTSDZr                  $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VSQRTSDZr                           $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VSQRTSDZr_Int              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VSQRTSDZr_Int                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VSQRTSSZm                  $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VSQRTSSZm                           $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VSQRTSSZm_Int              $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VSQRTSSZm_Int                       $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VSQRTSSZr                  $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VSQRTSSZr                           $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $xmm16 = VSQRTSSZr_Int              $xmm16, $xmm1, implicit $mxcsr
+  $xmm16 = VSQRTSSZr_Int                       $xmm16, $xmm1, implicit $mxcsr
+  ; CHECK: $rdi = VCVTSD2SI64rm_Int            $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $rdi = VCVTSD2SI64Zrm_Int                    $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $rdi = VCVTSD2SI64Zrr_Int           $xmm16, implicit $mxcsr
+  $rdi = VCVTSD2SI64Zrr_Int                    $xmm16, implicit $mxcsr
+  ; CHECK: $edi = VCVTSD2SIrm_Int              $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $edi = VCVTSD2SIZrm_Int                      $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $edi = VCVTSD2SIZrr_Int             $xmm16, implicit $mxcsr
+  $edi = VCVTSD2SIZrr_Int                      $xmm16, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSD2SSZrm               $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSD2SSZrm                        $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSD2SSZrm_Int           $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSD2SSZrm_Int                    $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSD2SSZrr               $xmm16, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSD2SSZrr                        $xmm16, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSD2SSZrr_Int           $xmm16, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSD2SSZrr_Int                    $xmm16, $noreg, implicit $mxcsr
   ; CHECK: $xmm16 = VCVTSI2SDZrm               $xmm16, $rdi, 1, $noreg, 0, $noreg
   $xmm16 = VCVTSI2SDZrm                        $xmm16, $rdi, 1, $noreg, 0, $noreg                                   
   ; CHECK: $xmm16 = VCVTSI2SDZrm_Int           $xmm16, $rdi, 1, $noreg, 0, $noreg
@@ -4566,78 +4566,78 @@ body: |
   $xmm16 = VCVTSI2SDZrr                        $xmm16, $noreg                                                  
   ; CHECK: $xmm16 = VCVTSI2SDZrr_Int           $xmm16, $noreg
   $xmm16 = VCVTSI2SDZrr_Int                    $xmm16, $noreg                                                  
-  ; CHECK: $xmm16 = VCVTSI2SSZrm               $xmm16, $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTSI2SSZrm                        $xmm16, $rdi, 1, $noreg, 0, $noreg                                   
-  ; CHECK: $xmm16 = VCVTSI2SSZrm_Int           $xmm16, $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTSI2SSZrm_Int                    $xmm16, $rdi, 1, $noreg, 0, $noreg                                   
-  ; CHECK: $xmm16 = VCVTSI2SSZrr               $xmm16, $noreg
-  $xmm16 = VCVTSI2SSZrr                        $xmm16, $noreg                                                  
-  ; CHECK: $xmm16 = VCVTSI2SSZrr_Int           $xmm16, $noreg
-  $xmm16 = VCVTSI2SSZrr_Int                    $xmm16, $noreg                                                  
-  ; CHECK: $xmm16 = VCVTSI642SDZrm             $xmm16, $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTSI642SDZrm                      $xmm16, $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VCVTSI642SDZrm_Int         $xmm16, $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTSI642SDZrm_Int                  $xmm16, $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VCVTSI642SDZrr             $xmm16, $noreg
-  $xmm16 = VCVTSI642SDZrr                      $xmm16, $noreg
-  ; CHECK: $xmm16 = VCVTSI642SDZrr_Int         $xmm16, $noreg
-  $xmm16 = VCVTSI642SDZrr_Int                  $xmm16, $noreg
-  ; CHECK: $xmm16 = VCVTSI642SSZrm             $xmm16, $rdi, 1, $noreg, 0, $noreg 
-  $xmm16 = VCVTSI642SSZrm                      $xmm16, $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VCVTSI642SSZrm_Int         $xmm16, $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTSI642SSZrm_Int                  $xmm16, $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $xmm16 = VCVTSI642SSZrr             $xmm16, $noreg 
-  $xmm16 = VCVTSI642SSZrr                      $xmm16, $noreg
-  ; CHECK: $xmm16 = VCVTSI642SSZrr_Int         $xmm16, $noreg
-  $xmm16 = VCVTSI642SSZrr_Int                  $xmm16, $noreg
-  ; CHECK: $xmm16 = VCVTSS2SDZrm               $xmm16, $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTSS2SDZrm                        $xmm16, $rdi, 1, $noreg, 0, $noreg                                   
-  ; CHECK: $xmm16 = VCVTSS2SDZrm_Int           $xmm16, $rdi, 1, $noreg, 0, $noreg
-  $xmm16 = VCVTSS2SDZrm_Int                    $xmm16, $rdi, 1, $noreg, 0, $noreg                                   
-  ; CHECK: $xmm16 = VCVTSS2SDZrr               $xmm16, $noreg
-  $xmm16 = VCVTSS2SDZrr                        $xmm16, $noreg                                                  
-  ; CHECK: $xmm16 = VCVTSS2SDZrr_Int           $xmm16, $noreg
-  $xmm16 = VCVTSS2SDZrr_Int                    $xmm16, $noreg                                                  
-  ; CHECK: $rdi = VCVTSS2SI64rm_Int            $rdi, 1, $noreg, 0, $noreg
-  $rdi = VCVTSS2SI64Zrm_Int                    $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $rdi = VCVTSS2SI64Zrr_Int           $xmm16
-  $rdi = VCVTSS2SI64Zrr_Int                    $xmm16                                                     
-  ; CHECK: $edi = VCVTSS2SIrm_Int              $rdi, 1, $noreg, 0, $noreg
-  $edi = VCVTSS2SIZrm_Int                      $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $edi = VCVTSS2SIZrr_Int             $xmm16
-  $edi = VCVTSS2SIZrr_Int                      $xmm16                                                     
-  ; CHECK: $rdi = VCVTTSD2SI64rm               $rdi, 1, $noreg, 0, $noreg
-  $rdi = VCVTTSD2SI64Zrm                       $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $rdi = VCVTTSD2SI64rm_Int           $rdi, 1, $noreg, 0, $noreg
-  $rdi = VCVTTSD2SI64Zrm_Int                   $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $rdi = VCVTTSD2SI64Zrr              $xmm16
-  $rdi = VCVTTSD2SI64Zrr                       $xmm16                                                     
-  ; CHECK: $rdi = VCVTTSD2SI64Zrr_Int          $xmm16
-  $rdi = VCVTTSD2SI64Zrr_Int                   $xmm16                                                     
-  ; CHECK: $edi = VCVTTSD2SIrm                 $rdi, 1, $noreg, 0, $noreg
-  $edi = VCVTTSD2SIZrm                         $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $edi = VCVTTSD2SIrm_Int             $rdi, 1, $noreg, 0, $noreg
-  $edi = VCVTTSD2SIZrm_Int                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $edi = VCVTTSD2SIZrr                $xmm16
-  $edi = VCVTTSD2SIZrr                         $xmm16                                                     
-  ; CHECK: $edi = VCVTTSD2SIZrr_Int            $xmm16
-  $edi = VCVTTSD2SIZrr_Int                     $xmm16                                                     
-  ; CHECK: $rdi = VCVTTSS2SI64rm               $rdi, 1, $noreg, 0, $noreg
-  $rdi = VCVTTSS2SI64Zrm                       $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $rdi = VCVTTSS2SI64rm_Int           $rdi, 1, $noreg, 0, $noreg
-  $rdi = VCVTTSS2SI64Zrm_Int                   $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $rdi = VCVTTSS2SI64Zrr              $xmm16
-  $rdi = VCVTTSS2SI64Zrr                       $xmm16                                                     
-  ; CHECK: $rdi = VCVTTSS2SI64Zrr_Int          $xmm16
-  $rdi = VCVTTSS2SI64Zrr_Int                   $xmm16                                                     
-  ; CHECK: $edi = VCVTTSS2SIrm                 $rdi, 1, $noreg, 0, $noreg
-  $edi = VCVTTSS2SIZrm                         $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $edi = VCVTTSS2SIrm_Int             $rdi, 1, $noreg, 0, $noreg
-  $edi = VCVTTSS2SIZrm_Int                     $rdi, 1, $noreg, 0, $noreg
-  ; CHECK: $edi = VCVTTSS2SIZrr                $xmm16
-  $edi = VCVTTSS2SIZrr                         $xmm16                                                     
-  ; CHECK: $edi = VCVTTSS2SIZrr_Int            $xmm16  
-  $edi = VCVTTSS2SIZrr_Int                     $xmm16                                                     
+  ; CHECK: $xmm16 = VCVTSI2SSZrm               $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSI2SSZrm                        $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSI2SSZrm_Int           $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSI2SSZrm_Int                    $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSI2SSZrr               $xmm16, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSI2SSZrr                        $xmm16, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSI2SSZrr_Int           $xmm16, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSI2SSZrr_Int                    $xmm16, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSI642SDZrm             $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSI642SDZrm                      $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSI642SDZrm_Int         $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSI642SDZrm_Int                  $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSI642SDZrr             $xmm16, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSI642SDZrr                      $xmm16, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSI642SDZrr_Int         $xmm16, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSI642SDZrr_Int                  $xmm16, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSI642SSZrm             $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSI642SSZrm                      $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSI642SSZrm_Int         $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSI642SSZrm_Int                  $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSI642SSZrr             $xmm16, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSI642SSZrr                      $xmm16, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSI642SSZrr_Int         $xmm16, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSI642SSZrr_Int                  $xmm16, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSS2SDZrm               $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSS2SDZrm                        $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSS2SDZrm_Int           $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSS2SDZrm_Int                    $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSS2SDZrr               $xmm16, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSS2SDZrr                        $xmm16, $noreg, implicit $mxcsr
+  ; CHECK: $xmm16 = VCVTSS2SDZrr_Int           $xmm16, $noreg, implicit $mxcsr
+  $xmm16 = VCVTSS2SDZrr_Int                    $xmm16, $noreg, implicit $mxcsr
+  ; CHECK: $rdi = VCVTSS2SI64rm_Int            $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $rdi = VCVTSS2SI64Zrm_Int                    $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $rdi = VCVTSS2SI64Zrr_Int           $xmm16, implicit $mxcsr
+  $rdi = VCVTSS2SI64Zrr_Int                    $xmm16, implicit $mxcsr
+  ; CHECK: $edi = VCVTSS2SIrm_Int              $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $edi = VCVTSS2SIZrm_Int                      $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $edi = VCVTSS2SIZrr_Int             $xmm16, implicit $mxcsr
+  $edi = VCVTSS2SIZrr_Int                      $xmm16, implicit $mxcsr
+  ; CHECK: $rdi = VCVTTSD2SI64rm               $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $rdi = VCVTTSD2SI64Zrm                       $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $rdi = VCVTTSD2SI64rm_Int           $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $rdi = VCVTTSD2SI64Zrm_Int                   $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $rdi = VCVTTSD2SI64Zrr              $xmm16, implicit $mxcsr
+  $rdi = VCVTTSD2SI64Zrr                       $xmm16, implicit $mxcsr
+  ; CHECK: $rdi = VCVTTSD2SI64Zrr_Int          $xmm16, implicit $mxcsr
+  $rdi = VCVTTSD2SI64Zrr_Int                   $xmm16, implicit $mxcsr
+  ; CHECK: $edi = VCVTTSD2SIrm                 $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $edi = VCVTTSD2SIZrm                         $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $edi = VCVTTSD2SIrm_Int             $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $edi = VCVTTSD2SIZrm_Int                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $edi = VCVTTSD2SIZrr                $xmm16, implicit $mxcsr
+  $edi = VCVTTSD2SIZrr                         $xmm16, implicit $mxcsr
+  ; CHECK: $edi = VCVTTSD2SIZrr_Int            $xmm16, implicit $mxcsr
+  $edi = VCVTTSD2SIZrr_Int                     $xmm16, implicit $mxcsr
+  ; CHECK: $rdi = VCVTTSS2SI64rm               $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $rdi = VCVTTSS2SI64Zrm                       $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $rdi = VCVTTSS2SI64rm_Int           $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $rdi = VCVTTSS2SI64Zrm_Int                   $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $rdi = VCVTTSS2SI64Zrr              $xmm16, implicit $mxcsr
+  $rdi = VCVTTSS2SI64Zrr                       $xmm16, implicit $mxcsr
+  ; CHECK: $rdi = VCVTTSS2SI64Zrr_Int          $xmm16, implicit $mxcsr
+  $rdi = VCVTTSS2SI64Zrr_Int                   $xmm16, implicit $mxcsr
+  ; CHECK: $edi = VCVTTSS2SIrm                 $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $edi = VCVTTSS2SIZrm                         $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $edi = VCVTTSS2SIrm_Int             $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  $edi = VCVTTSS2SIZrm_Int                     $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr
+  ; CHECK: $edi = VCVTTSS2SIZrr                $xmm16, implicit $mxcsr
+  $edi = VCVTTSS2SIZrr                         $xmm16, implicit $mxcsr
+  ; CHECK: $edi = VCVTTSS2SIZrr_Int            $xmm16, implicit $mxcsr
+  $edi = VCVTTSS2SIZrr_Int                     $xmm16, implicit $mxcsr
   ; CHECK: $xmm16 = VMOV64toSDZrr              $rdi    
   $xmm16 = VMOV64toSDZrr                       $rdi                                                       
   ; CHECK: $xmm16 = VMOVDI2SSZrr               $eax
@@ -4728,38 +4728,38 @@ body: |
   VUCOMISSZrm                                  $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr
   ; CHECK: VUCOMISSZrr                         $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr
   VUCOMISSZrr                                  $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr
-  ; CHECK: $xmm16 = VRNDSCALESDZm              $xmm16, $rip, 1, $rax, 0, $noreg, 15
-  $xmm16 = VRNDSCALESDZm                       $xmm16, $rip, 1, $rax, 0, $noreg, 15
-  ; CHECK: $xmm16 = VRNDSCALESDZr              $xmm16, $xmm1, 15
-  $xmm16 = VRNDSCALESDZr                       $xmm16, $xmm1, 15
-  ; CHECK: $xmm16 = VRNDSCALESSZm              $xmm16, $rip, 1, $rax, 0, $noreg, 15
-  $xmm16 = VRNDSCALESSZm                       $xmm16, $rip, 1, $rax, 0, $noreg, 15
-  ; CHECK: $xmm16 = VRNDSCALESSZr              $xmm16, $xmm1, 15
-  $xmm16 = VRNDSCALESSZr                       $xmm16, $xmm1, 15
-  ; CHECK: $xmm16 = VRNDSCALESDZm_Int          $xmm16, $rip, 1, $rax, 0, $noreg, 15
-  $xmm16 = VRNDSCALESDZm_Int                   $xmm16, $rip, 1, $rax, 0, $noreg, 15
-  ; CHECK: $xmm16 = VRNDSCALESDZr_Int          $xmm16, $xmm1, 15
-  $xmm16 = VRNDSCALESDZr_Int                   $xmm16, $xmm1, 15
-  ; CHECK: $xmm16 = VRNDSCALESSZm_Int          $xmm16, $rip, 1, $rax, 0, $noreg, 15
-  $xmm16 = VRNDSCALESSZm_Int                   $xmm16, $rip, 1, $rax, 0, $noreg, 15
-  ; CHECK: $xmm16 = VRNDSCALESSZr_Int          $xmm16, $xmm1, 15
-  $xmm16 = VRNDSCALESSZr_Int                   $xmm16, $xmm1, 15
-  ; CHECK: $xmm0 = VRNDSCALESDZm               $xmm0, $rip, 1, $rax, 0, $noreg, 31
-  $xmm0 = VRNDSCALESDZm                        $xmm0, $rip, 1, $rax, 0, $noreg, 31
-  ; CHECK: $xmm0 = VRNDSCALESDZr               $xmm0, $xmm1, 31
-  $xmm0 = VRNDSCALESDZr                        $xmm0, $xmm1, 31
-  ; CHECK: $xmm0 = VRNDSCALESSZm               $xmm0, $rip, 1, $rax, 0, $noreg, 31
-  $xmm0 = VRNDSCALESSZm                        $xmm0, $rip, 1, $rax, 0, $noreg, 31
-  ; CHECK: $xmm0 = VRNDSCALESSZr               $xmm0, $xmm1, 31
-  $xmm0 = VRNDSCALESSZr                        $xmm0, $xmm1, 31
-  ; CHECK: $xmm0 = VRNDSCALESDZm_Int           $xmm0, $rip, 1, $rax, 0, $noreg, 31
-  $xmm0 = VRNDSCALESDZm_Int                    $xmm0, $rip, 1, $rax, 0, $noreg, 31
-  ; CHECK: $xmm0 = VRNDSCALESDZr_Int           $xmm0, $xmm1, 31
-  $xmm0 = VRNDSCALESDZr_Int                    $xmm0, $xmm1, 31
-  ; CHECK: $xmm0 = VRNDSCALESSZm_Int           $xmm0, $rip, 1, $rax, 0, $noreg, 31
-  $xmm0 = VRNDSCALESSZm_Int                    $xmm0, $rip, 1, $rax, 0, $noreg, 31
-  ; CHECK: $xmm0 = VRNDSCALESSZr_Int           $xmm0, $xmm1, 31
-  $xmm0 = VRNDSCALESSZr_Int                    $xmm0, $xmm1, 31
+  ; CHECK: $xmm16 = VRNDSCALESDZm              $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  $xmm16 = VRNDSCALESDZm                       $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  ; CHECK: $xmm16 = VRNDSCALESDZr              $xmm16, $xmm1, 15, implicit $mxcsr
+  $xmm16 = VRNDSCALESDZr                       $xmm16, $xmm1, 15, implicit $mxcsr
+  ; CHECK: $xmm16 = VRNDSCALESSZm              $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  $xmm16 = VRNDSCALESSZm                       $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  ; CHECK: $xmm16 = VRNDSCALESSZr              $xmm16, $xmm1, 15, implicit $mxcsr
+  $xmm16 = VRNDSCALESSZr                       $xmm16, $xmm1, 15, implicit $mxcsr
+  ; CHECK: $xmm16 = VRNDSCALESDZm_Int          $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  $xmm16 = VRNDSCALESDZm_Int                   $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  ; CHECK: $xmm16 = VRNDSCALESDZr_Int          $xmm16, $xmm1, 15, implicit $mxcsr
+  $xmm16 = VRNDSCALESDZr_Int                   $xmm16, $xmm1, 15, implicit $mxcsr
+  ; CHECK: $xmm16 = VRNDSCALESSZm_Int          $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  $xmm16 = VRNDSCALESSZm_Int                   $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr
+  ; CHECK: $xmm16 = VRNDSCALESSZr_Int          $xmm16, $xmm1, 15, implicit $mxcsr
+  $xmm16 = VRNDSCALESSZr_Int                   $xmm16, $xmm1, 15, implicit $mxcsr
+  ; CHECK: $xmm0 = VRNDSCALESDZm               $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr
+  $xmm0 = VRNDSCALESDZm                        $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr
+  ; CHECK: $xmm0 = VRNDSCALESDZr               $xmm0, $xmm1, 31, implicit $mxcsr
+  $xmm0 = VRNDSCALESDZr                        $xmm0, $xmm1, 31, implicit $mxcsr
+  ; CHECK: $xmm0 = VRNDSCALESSZm               $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr
+  $xmm0 = VRNDSCALESSZm                        $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr
+  ; CHECK: $xmm0 = VRNDSCALESSZr               $xmm0, $xmm1, 31, implicit $mxcsr
+  $xmm0 = VRNDSCALESSZr                        $xmm0, $xmm1, 31, implicit $mxcsr
+  ; CHECK: $xmm0 = VRNDSCALESDZm_Int           $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr
+  $xmm0 = VRNDSCALESDZm_Int                    $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr
+  ; CHECK: $xmm0 = VRNDSCALESDZr_Int           $xmm0, $xmm1, 31, implicit $mxcsr
+  $xmm0 = VRNDSCALESDZr_Int                    $xmm0, $xmm1, 31, implicit $mxcsr
+  ; CHECK: $xmm0 = VRNDSCALESSZm_Int           $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr
+  $xmm0 = VRNDSCALESSZm_Int                    $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr
+  ; CHECK: $xmm0 = VRNDSCALESSZr_Int           $xmm0, $xmm1, 31, implicit $mxcsr
+  $xmm0 = VRNDSCALESSZr_Int                    $xmm0, $xmm1, 31, implicit $mxcsr
   
       RET 0, $zmm0, $zmm1
 ...


        


More information about the llvm-commits mailing list