[llvm] r358046 - [X86] Move the 2 byte VEX optimization for MOV instructions back to the X86AsmParser::processInstruction where it used to be. Block when {vex3} prefix is present.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 9 22:43:20 PDT 2019
Author: ctopper
Date: Tue Apr 9 22:43:20 2019
New Revision: 358046
URL: http://llvm.org/viewvc/llvm-project?rev=358046&view=rev
Log:
[X86] Move the 2 byte VEX optimization for MOV instructions back to the X86AsmParser::processInstruction where it used to be. Block when {vex3} prefix is present.
Years ago I moved this to an InstAlias using VR128H/VR128L. But now that we support {vex3} pseudo prefix, we need to block the optimization when it is set to match gas behavior.
Modified:
llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86RegisterInfo.td
llvm/trunk/test/MC/X86/x86_64-avx-encoding.s
Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp?rev=358046&r1=358045&r2=358046&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp (original)
+++ llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp Tue Apr 9 22:43:20 2019
@@ -2830,7 +2830,69 @@ bool X86AsmParser::ParseInstruction(Pars
}
bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
- return false;
+ const MCRegisterInfo *MRI = getContext().getRegisterInfo();
+
+ switch (Inst.getOpcode()) {
+ default: return false;
+ case X86::VMOVZPQILo2PQIrr:
+ case X86::VMOVAPDrr:
+ case X86::VMOVAPDYrr:
+ case X86::VMOVAPSrr:
+ case X86::VMOVAPSYrr:
+ case X86::VMOVDQArr:
+ case X86::VMOVDQAYrr:
+ case X86::VMOVDQUrr:
+ case X86::VMOVDQUYrr:
+ case X86::VMOVUPDrr:
+ case X86::VMOVUPDYrr:
+ case X86::VMOVUPSrr:
+ case X86::VMOVUPSYrr: {
+ // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
+ // the registers is extended, but other isn't.
+ if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
+ MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
+ MRI->getEncodingValue(Inst.getOperand(1).getReg()) < 8)
+ return false;
+
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
+ case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
+ case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
+ case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
+ case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
+ case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
+ case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
+ case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
+ case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
+ case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
+ case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
+ case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
+ case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
+ }
+ Inst.setOpcode(NewOpc);
+ return true;
+ }
+ case X86::VMOVSDrr:
+ case X86::VMOVSSrr: {
+ // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
+ // the registers is extended, but other isn't.
+ if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
+ MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
+ MRI->getEncodingValue(Inst.getOperand(2).getReg()) < 8)
+ return false;
+
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
+ case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
+ }
+ Inst.setOpcode(NewOpc);
+ return true;
+ }
+ }
}
bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=358046&r1=358045&r2=358046&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Apr 9 22:43:20 2019
@@ -350,13 +350,6 @@ let Predicates = [UseSSE2] in {
(COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
}
-// Aliases to help the assembler pick two byte VEX encodings by swapping the
-// operands relative to the normal instructions to use VEX.R instead of VEX.B.
-def : InstAlias<"vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- (VMOVSSrr_REV VR128L:$dst, VR128:$src1, VR128H:$src2), 0>;
-def : InstAlias<"vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- (VMOVSDrr_REV VR128L:$dst, VR128:$src1, VR128H:$src2), 0>;
-
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
//===----------------------------------------------------------------------===//
@@ -503,25 +496,6 @@ let SchedRW = [SchedWriteFMoveLS.YMM.RR]
} // SchedRW
} // Predicate
-// Aliases to help the assembler pick two byte VEX encodings by swapping the
-// operands relative to the normal instructions to use VEX.R instead of VEX.B.
-def : InstAlias<"vmovaps\t{$src, $dst|$dst, $src}",
- (VMOVAPSrr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovapd\t{$src, $dst|$dst, $src}",
- (VMOVAPDrr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovups\t{$src, $dst|$dst, $src}",
- (VMOVUPSrr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovupd\t{$src, $dst|$dst, $src}",
- (VMOVUPDrr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovaps\t{$src, $dst|$dst, $src}",
- (VMOVAPSYrr_REV VR256L:$dst, VR256H:$src), 0>;
-def : InstAlias<"vmovapd\t{$src, $dst|$dst, $src}",
- (VMOVAPDYrr_REV VR256L:$dst, VR256H:$src), 0>;
-def : InstAlias<"vmovups\t{$src, $dst|$dst, $src}",
- (VMOVUPSYrr_REV VR256L:$dst, VR256H:$src), 0>;
-def : InstAlias<"vmovupd\t{$src, $dst|$dst, $src}",
- (VMOVUPDYrr_REV VR256L:$dst, VR256H:$src), 0>;
-
// Reversed version with ".s" suffix for GAS compatibility.
def : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}",
(VMOVAPSrr_REV VR128:$dst, VR128:$src), 0>;
@@ -3444,17 +3418,6 @@ def MOVDQUmr : I<0x7F, MRMDestMem, (ou
} // ExeDomain = SSEPackedInt
-// Aliases to help the assembler pick two byte VEX encodings by swapping the
-// operands relative to the normal instructions to use VEX.R instead of VEX.B.
-def : InstAlias<"vmovdqa\t{$src, $dst|$dst, $src}",
- (VMOVDQArr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovdqa\t{$src, $dst|$dst, $src}",
- (VMOVDQAYrr_REV VR256L:$dst, VR256H:$src), 0>;
-def : InstAlias<"vmovdqu\t{$src, $dst|$dst, $src}",
- (VMOVDQUrr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovdqu\t{$src, $dst|$dst, $src}",
- (VMOVDQUYrr_REV VR256L:$dst, VR256H:$src), 0>;
-
// Reversed version with ".s" suffix for GAS compatibility.
def : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}",
(VMOVDQArr_REV VR128:$dst, VR128:$src), 0>;
@@ -4423,11 +4386,6 @@ def MOVPQI2QIrr : S2I<0xD6, MRMDestReg,
"movq\t{$src, $dst|$dst, $src}", []>;
}
-// Aliases to help the assembler pick two byte VEX encodings by swapping the
-// operands relative to the normal instructions to use VEX.R instead of VEX.B.
-def : InstAlias<"vmovq\t{$src, $dst|$dst, $src}",
- (VMOVPQI2QIrr VR128L:$dst, VR128H:$src), 0>;
-
def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
(VMOVPQI2QIrr VR128:$dst, VR128:$src), 0>;
def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.td?rev=358046&r1=358045&r2=358046&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RegisterInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.td Tue Apr 9 22:43:20 2019
@@ -552,17 +552,6 @@ def VR128 : RegisterClass<"X86", [v4f32,
def VR256 : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 0, 15)>;
-// Special classes that help the assembly parser choose some alternate
-// instructions to favor 2-byte VEX encodings.
-def VR128L : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
- 128, (sequence "XMM%u", 0, 7)>;
-def VR128H : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
- 128, (sequence "XMM%u", 8, 15)>;
-def VR256L : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
- 256, (sequence "YMM%u", 0, 7)>;
-def VR256H : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
- 256, (sequence "YMM%u", 8, 15)>;
-
// Status flags registers.
def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> {
let CopyCost = -1; // Don't allow copying of status registers.
Modified: llvm/trunk/test/MC/X86/x86_64-avx-encoding.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86_64-avx-encoding.s?rev=358046&r1=358045&r2=358046&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/x86_64-avx-encoding.s (original)
+++ llvm/trunk/test/MC/X86/x86_64-avx-encoding.s Tue Apr 9 22:43:20 2019
@@ -4190,62 +4190,166 @@ _foo2:
// CHECK: encoding: [0xc4,0x02,0x3d,0x91,0x14,0x4f]
vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10
+// CHECK: vmovq %xmm0, %xmm8
+// CHECK: encoding: [0xc5,0x7a,0x7e,0xc0]
+ vmovq %xmm0, %xmm8
+
+// CHECK: vmovq %xmm0, %xmm8
+// CHECK: encoding: [0xc4,0x61,0x7a,0x7e,0xc0]
+ {vex3} vmovq %xmm0, %xmm8
+
+// CHECK: vmovq %xmm8, %xmm0
+// CHECK: encoding: [0xc5,0x79,0xd6,0xc0]
+ vmovq %xmm8, %xmm0
+
+// CHECK: vmovq %xmm8, %xmm0
+// CHECK: encoding: [0xc4,0xc1,0x7a,0x7e,0xc0]
+ {vex3} vmovq %xmm8, %xmm0
+
+// CHECK: vmovdqa %xmm0, %xmm8
+// CHECK: encoding: [0xc5,0x79,0x6f,0xc0]
+ vmovdqa %xmm0, %xmm8
+
+// CHECK: vmovdqa %xmm0, %xmm8
+// CHECK: encoding: [0xc4,0x61,0x79,0x6f,0xc0]
+ {vex3} vmovdqa %xmm0, %xmm8
+
+// CHECK: vmovdqa %xmm8, %xmm0
+// CHECK: encoding: [0xc5,0x79,0x7f,0xc0]
+ vmovdqa %xmm8, %xmm0
+
+// CHECK: vmovdqa %xmm8, %xmm0
+// CHECK: encoding: [0xc4,0xc1,0x79,0x6f,0xc0]
+ {vex3} vmovdqa %xmm8, %xmm0
+
+// CHECK: vmovdqu %xmm0, %xmm8
+// CHECK: encoding: [0xc5,0x7a,0x6f,0xc0]
+ vmovdqu %xmm0, %xmm8
+
+// CHECK: vmovdqu %xmm0, %xmm8
+// CHECK: encoding: [0xc4,0x61,0x7a,0x6f,0xc0]
+ {vex3} vmovdqu %xmm0, %xmm8
+
+// CHECK: vmovdqu %xmm8, %xmm0
+// CHECK: encoding: [0xc5,0x7a,0x7f,0xc0]
+ vmovdqu %xmm8, %xmm0
+
+// CHECK: vmovdqu %xmm8, %xmm0
+// CHECK: encoding: [0xc4,0xc1,0x7a,0x6f,0xc0]
+ {vex3} vmovdqu %xmm8, %xmm0
+
// CHECK: vmovaps %xmm0, %xmm8
// CHECK: encoding: [0xc5,0x78,0x28,0xc0]
vmovaps %xmm0, %xmm8
+// CHECK: vmovaps %xmm0, %xmm8
+// CHECK: encoding: [0xc4,0x61,0x78,0x28,0xc0]
+ {vex3} vmovaps %xmm0, %xmm8
+
// CHECK: vmovaps %xmm8, %xmm0
// CHECK: encoding: [0xc5,0x78,0x29,0xc0]
vmovaps %xmm8, %xmm0
+// CHECK: vmovaps %xmm8, %xmm0
+// CHECK: encoding: [0xc4,0xc1,0x78,0x28,0xc0]
+ {vex3} vmovaps %xmm8, %xmm0
+
// CHECK: vmovaps %ymm0, %ymm8
// CHECK: encoding: [0xc5,0x7c,0x28,0xc0]
vmovaps %ymm0, %ymm8
+// CHECK: vmovaps %ymm0, %ymm8
+// CHECK: encoding: [0xc4,0x61,0x7c,0x28,0xc0]
+ {vex3} vmovaps %ymm0, %ymm8
+
// CHECK: vmovaps %ymm8, %ymm0
// CHECK: encoding: [0xc5,0x7c,0x29,0xc0]
vmovaps %ymm8, %ymm0
+// CHECK: vmovaps %ymm8, %ymm0
+// CHECK: encoding: [0xc4,0xc1,0x7c,0x28,0xc0]
+ {vex3} vmovaps %ymm8, %ymm0
+
// CHECK: vmovups %xmm0, %xmm8
// CHECK: encoding: [0xc5,0x78,0x10,0xc0]
vmovups %xmm0, %xmm8
+// CHECK: vmovups %xmm0, %xmm8
+// CHECK: encoding: [0xc4,0x61,0x78,0x10,0xc0]
+ {vex3} vmovups %xmm0, %xmm8
+
// CHECK: vmovups %xmm8, %xmm0
// CHECK: encoding: [0xc5,0x78,0x11,0xc0]
vmovups %xmm8, %xmm0
+// CHECK: vmovups %xmm8, %xmm0
+// CHECK: encoding: [0xc4,0xc1,0x78,0x10,0xc0]
+ {vex3} vmovups %xmm8, %xmm0
+
// CHECK: vmovups %ymm0, %ymm8
// CHECK: encoding: [0xc5,0x7c,0x10,0xc0]
vmovups %ymm0, %ymm8
+// CHECK: vmovups %ymm0, %ymm8
+// CHECK: encoding: [0xc4,0x61,0x7c,0x10,0xc0]
+ {vex3} vmovups %ymm0, %ymm8
+
// CHECK: vmovups %ymm8, %ymm0
// CHECK: encoding: [0xc5,0x7c,0x11,0xc0]
vmovups %ymm8, %ymm0
+// CHECK: vmovups %ymm8, %ymm0
+// CHECK: encoding: [0xc4,0xc1,0x7c,0x10,0xc0]
+ {vex3} vmovups %ymm8, %ymm0
+
// CHECK: vmovss %xmm0, %xmm0, %xmm8
// CHECK: encoding: [0xc5,0x7a,0x10,0xc0]
vmovss %xmm0, %xmm0, %xmm8
+// CHECK: vmovss %xmm0, %xmm0, %xmm8
+// CHECK: encoding: [0xc4,0x61,0x7a,0x10,0xc0]
+ {vex3} vmovss %xmm0, %xmm0, %xmm8
+
// CHECK: vmovss %xmm0, %xmm8, %xmm0
// CHECK: encoding: [0xc5,0xba,0x10,0xc0]
vmovss %xmm0, %xmm8, %xmm0
+// CHECK: vmovss %xmm0, %xmm8, %xmm0
+// CHECK: encoding: [0xc4,0xe1,0x3a,0x10,0xc0]
+ {vex3} vmovss %xmm0, %xmm8, %xmm0
+
// CHECK: vmovss %xmm8, %xmm0, %xmm0
// CHECK: encoding: [0xc5,0x7a,0x11,0xc0]
vmovss %xmm8, %xmm0, %xmm0
+// CHECK: vmovss %xmm8, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xc1,0x7a,0x10,0xc0]
+ {vex3} vmovss %xmm8, %xmm0, %xmm0
+
// CHECK: vmovsd %xmm0, %xmm0, %xmm8
// CHECK: encoding: [0xc5,0x7b,0x10,0xc0]
vmovsd %xmm0, %xmm0, %xmm8
+// CHECK: vmovsd %xmm0, %xmm0, %xmm8
+// CHECK: encoding: [0xc4,0x61,0x7b,0x10,0xc0]
+ {vex3} vmovsd %xmm0, %xmm0, %xmm8
+
// CHECK: vmovsd %xmm0, %xmm8, %xmm0
// CHECK: encoding: [0xc5,0xbb,0x10,0xc0]
vmovsd %xmm0, %xmm8, %xmm0
+// CHECK: vmovsd %xmm0, %xmm8, %xmm0
+// CHECK: encoding: [0xc4,0xe1,0x3b,0x10,0xc0]
+ {vex3} vmovsd %xmm0, %xmm8, %xmm0
+
// CHECK: vmovsd %xmm8, %xmm0, %xmm0
// CHECK: encoding: [0xc5,0x7b,0x11,0xc0]
vmovsd %xmm8, %xmm0, %xmm0
+// CHECK: vmovsd %xmm8, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xc1,0x7b,0x10,0xc0]
+ {vex3} vmovsd %xmm8, %xmm0, %xmm0
+
// CHECK: vpmaddwd %xmm3, %xmm2, %xmm1
// CHECK: encoding: [0xc5,0xe9,0xf5,0xcb]
vpmaddwd %xmm3, %xmm2, %xmm1
More information about the llvm-commits
mailing list