[llvm] r283539 - [X86][SSE] Update register class during MOVSD/MOVSS - BLENDPD/BLENDPS commutation
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 7 04:18:39 PDT 2016
Author: rksimon
Date: Fri Oct 7 06:18:38 2016
New Revision: 283539
URL: http://llvm.org/viewvc/llvm-project?rev=283539&view=rev
Log:
[X86][SSE] Update register class during MOVSD/MOVSS - BLENDPD/BLENDPS commutation
MOVSD/MOVSS take a 128-bit register and a FR32/FR64 register input, the commutation code wasn't taking this into account leading to verification errors.
This patch inserts a vreg copy mi to ensure that the registers are correct.
Fix for PR30607
Differential Revision: https://reviews.llvm.org/D25280
Added:
llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=283539&r1=283538&r2=283539&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Fri Oct 7 06:18:38 2016
@@ -3566,8 +3566,19 @@ MachineInstr *X86InstrInfo::commuteInstr
case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
}
+
+ // MOVSD/MOVSS's 2nd operand is a FR64/FR32 reg class - we need to copy
+ // this over to a VR128 class like the 1st operand to use a BLENDPD/BLENDPS.
+ auto &MRI = MI.getParent()->getParent()->getRegInfo();
+ auto VR128RC = MRI.getRegClass(MI.getOperand(1).getReg());
+ unsigned VR128 = MRI.createVirtualRegister(VR128RC);
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY),
+ VR128)
+ .addReg(MI.getOperand(2).getReg());
+
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
+ WorkingMI.getOperand(2).setReg(VR128);
WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
Added: llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll?rev=283539&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll (added)
+++ llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll Fri Oct 7 06:18:38 2016
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -verify-coalescing -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -verify-coalescing -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -verify-coalescing -mattr=+avx | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -verify-coalescing -mattr=+avx512f | FileCheck %s --check-prefix=AVX512
+
+; PR30607
+
+define <2 x double> @insert_f64(double %a0, <2 x double> %a1) {
+; SSE2-LABEL: insert_f64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: insert_f64:
+; SSE41: # BB#0:
+; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: insert_f64:
+; AVX: # BB#0:
+; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: insert_f64:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX512-NEXT: retq
+ %1 = insertelement <2 x double> %a1, double %a0, i32 0
+ ret <2 x double> %1
+}
+
+define <4 x float> @insert_f32(float %a0, <4 x float> %a1) {
+; SSE2-LABEL: insert_f32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: insert_f32:
+; SSE41: # BB#0:
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: insert_f32:
+; AVX: # BB#0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: insert_f32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512-NEXT: retq
+ %1 = insertelement <4 x float> %a1, float %a0, i32 0
+ ret <4 x float> %1
+}
More information about the llvm-commits
mailing list