[llvm] r336731 - [X86] Teach X86InstrInfo::commuteInstructionImpl to use MOVSD/MOVSS for BLEND under optsize when the immediate allows it.

Tue Jul 10 15:02:23 PDT 2018

Author: ctopper
Date: Tue Jul 10 15:02:23 2018
New Revision: 336731

URL: http://llvm.org/viewvc/llvm-project?rev=336731&view=rev
Log:
[X86] Teach X86InstrInfo::commuteInstructionImpl to use MOVSD/MOVSS for BLEND under optsize when the immediate allows it.

Isel currently emits movss/movsd a lot of the time and an accidental double commute turns it into a blend.

Ideally we'd select blend directly in isel under optspeed and not rely on the double commute to create blend.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
    llvm/trunk/test/CodeGen/X86/sse41.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=336731&r1=336730&r2=336731&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Tue Jul 10 15:02:23 2018
@@ -1547,9 +1547,29 @@ MachineInstr *X86InstrInfo::commuteInstr
   }
   case X86::BLENDPDrri:
   case X86::BLENDPSrri:
-  case X86::PBLENDWrri:
   case X86::VBLENDPDrri:
   case X86::VBLENDPSrri:
+    // If we're optimizing for size, try to use MOVSD/MOVSS.
+    if (MI.getParent()->getParent()->getFunction().optForSize()) {
+      unsigned Mask, Opc;
+      switch (MI.getOpcode()) {
+      default: llvm_unreachable("Unreachable!");
+      case X86::BLENDPDrri:  Opc = X86::MOVSDrr;  Mask = 0x03; break;
+      case X86::BLENDPSrri:  Opc = X86::MOVSSrr;  Mask = 0x0F; break;
+      case X86::VBLENDPDrri: Opc = X86::VMOVSDrr; Mask = 0x03; break;
+      case X86::VBLENDPSrri: Opc = X86::VMOVSSrr; Mask = 0x0F; break;
+      }
+      if ((MI.getOperand(3).getImm() ^ Mask) == 1) {
+        auto &WorkingMI = cloneIfNew(MI);
+        WorkingMI.setDesc(get(Opc));
+        WorkingMI.RemoveOperand(3);
+        return TargetInstrInfo::commuteInstructionImpl(WorkingMI,
+                                                       /*NewMI=*/false,
+                                                       OpIdx1, OpIdx2);
+      }
+    }
+    LLVM_FALLTHROUGH;
+  case X86::PBLENDWrri:
   case X86::VBLENDPDYrri:
   case X86::VBLENDPSYrri:
   case X86::VPBLENDDrri:

Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll?rev=336731&r1=336730&r2=336731&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll Tue Jul 10 15:02:23 2018
@@ -820,8 +820,8 @@ define <2 x double> @test_x86_sse2_cvtss
 ; X86-AVX1:       ## %bb.0:
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
 ; X86-AVX1-NEXT:    vcvtss2sd (%eax), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x08]
-; X86-AVX1-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
-; X86-AVX1-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
+; X86-AVX1-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
+; X86-AVX1-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
 ; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
 ;
 ; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
@@ -842,8 +842,8 @@ define <2 x double> @test_x86_sse2_cvtss
 ; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
 ; X64-AVX1:       ## %bb.0:
 ; X64-AVX1-NEXT:    vcvtss2sd (%rdi), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x0f]
-; X64-AVX1-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
-; X64-AVX1-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
+; X64-AVX1-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
+; X64-AVX1-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
 ; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
 ;
 ; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:

Modified: llvm/trunk/test/CodeGen/X86/sse41.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41.ll?rev=336731&r1=336730&r2=336731&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41.ll Tue Jul 10 15:02:23 2018
@@ -394,7 +394,7 @@ define <4 x float> @insertps_or_blendps(
 ; X86-SSE:       ## %bb.0:
 ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04]
 ; X86-SSE-NEXT:    ## xmm1 = mem[0],zero,zero,zero
-; X86-SSE-NEXT:    blendps $1, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0c,0xc1,0x01]
+; X86-SSE-NEXT:    movss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x10,0xc1]
 ; X86-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1,2,3]
 ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
 ;
@@ -402,7 +402,7 @@ define <4 x float> @insertps_or_blendps(
 ; X86-AVX1:       ## %bb.0:
 ; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x04]
 ; X86-AVX1-NEXT:    ## xmm1 = mem[0],zero,zero,zero
-; X86-AVX1-NEXT:    vblendps $1, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
+; X86-AVX1-NEXT:    vmovss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x10,0xc1]
 ; X86-AVX1-NEXT:    ## xmm0 = xmm1[0],xmm0[1,2,3]
 ; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
 ;
@@ -416,13 +416,13 @@ define <4 x float> @insertps_or_blendps(
 ;
 ; X64-SSE-LABEL: insertps_or_blendps:
 ; X64-SSE:       ## %bb.0:
-; X64-SSE-NEXT:    blendps $1, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0c,0xc1,0x01]
+; X64-SSE-NEXT:    movss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x10,0xc1]
 ; X64-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1,2,3]
 ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
 ;
 ; X64-AVX1-LABEL: insertps_or_blendps:
 ; X64-AVX1:       ## %bb.0:
-; X64-AVX1-NEXT:    vblendps $1, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
+; X64-AVX1-NEXT:    vmovss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x10,0xc1]
 ; X64-AVX1-NEXT:    ## xmm0 = xmm1[0],xmm0[1,2,3]
 ; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
 ;