[PATCH] D25280: [X86][SSE] Update register class during MOVSD/MOVSS - BLENDPD/BLENDPS commutation

Wed Oct 5 07:22:15 PDT 2016

RKSimon created this revision.
RKSimon added reviewers: qcolombet, eli.friedman, uabelho, andreadb, spatel.
RKSimon added a subscriber: llvm-commits.
RKSimon set the repository for this revision to rL LLVM.

MOVSD/MOVSS take a 128-bit register and a FR32/FR64 register input, the commutation code wasn't take this into account leading to verification errors.

This patch inserts a vreg copy mi to ensure that the registers are correct.

Fix for PR30607


Repository:
  rL LLVM

https://reviews.llvm.org/D25280

Files:
  lib/Target/X86/X86InstrInfo.cpp
  test/CodeGen/X86/coalesce_commute_movsd.ll


Index: test/CodeGen/X86/coalesce_commute_movsd.ll
===================================================================

--- test/CodeGen/X86/coalesce_commute_movsd.ll
+++ test/CodeGen/X86/coalesce_commute_movsd.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -verify-coalescing -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -verify-coalescing -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -verify-coalescing -mattr=+avx | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -verify-coalescing -mattr=+avx512f | FileCheck %s --check-prefix=AVX512
+
+; PR30607
+
+define <2 x double> @insert_f64(double %a0, <2 x double> %a1) {
+; SSE2-LABEL: insert_f64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT:    movapd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: insert_f64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: insert_f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: insert_f64:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX512-NEXT:    retq
+ %1 = insertelement <2 x double> %a1, double %a0, i32 0
+ ret <2 x double> %1
+}
+
+define <4 x float> @insert_f32(float %a0, <4 x float> %a1) {
+; SSE2-LABEL: insert_f32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: insert_f32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: insert_f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: insert_f32:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512-NEXT:    retq
+ %1 = insertelement <4 x float> %a1, float %a0, i32 0
+ ret <4 x float> %1
+}
Index: lib/Target/X86/X86InstrInfo.cpp
===================================================================
--- lib/Target/X86/X86InstrInfo.cpp
+++ lib/Target/X86/X86InstrInfo.cpp
@@ -3566,8 +3566,19 @@
     case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
     case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
     }
+
+    // MOVSD/MOVSS's 2nd operand is a FR64/FR32 reg class - we need to copy
+    // this over to a VR128 class like the 1st operand to use a BLENDPD/BLENDPS.
+    auto &MRI = MI.getParent()->getParent()->getRegInfo();
+    auto VR128RC = MRI.getRegClass(MI.getOperand(1).getReg());
+    unsigned VR128 = MRI.createVirtualRegister(VR128RC);
+    BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY),
+            VR128)
+        .addReg(MI.getOperand(2).getReg());
+
     auto &WorkingMI = cloneIfNew(MI);
     WorkingMI.setDesc(get(Opc));
+    WorkingMI.getOperand(2).setReg(VR128);
     WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
     return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
                                                    OpIdx1, OpIdx2);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D25280.73645.patch
Type: text/x-patch
Size: 3559 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161005/6e65784a/attachment.bin>