[llvm] r230499 - [X86][MMX] Reapply: Add MMX instructions to foldable tables

Bruno Cardoso Lopes bruno.cardoso at gmail.com
Wed Feb 25 07:14:02 PST 2015


Author: bruno
Date: Wed Feb 25 09:14:02 2015
New Revision: 230499

URL: http://llvm.org/viewvc/llvm-project?rev=230499&view=rev
Log:
[X86][MMX] Reapply: Add MMX instructions to foldable tables

Reapply r230248.

Teach the peephole optimizer to work with MMX instructions by adding
entries into the foldable tables. This covers folding opportunities not
handled during isel.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll
    llvm/trunk/test/CodeGen/X86/vec_extract-mmx.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=230499&r1=230498&r2=230499&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Feb 25 09:14:02 2015
@@ -547,6 +547,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::UCOMISDrr,       X86::UCOMISDrm,           0 },
     { X86::UCOMISSrr,       X86::UCOMISSrm,           0 },
 
+    // MMX version of foldable instructions
+    { X86::MMX_CVTPD2PIirr,   X86::MMX_CVTPD2PIirm,   0 },
+    { X86::MMX_CVTPI2PDirr,   X86::MMX_CVTPI2PDirm,   0 },
+    { X86::MMX_CVTPS2PIirr,   X86::MMX_CVTPS2PIirm,   0 },
+    { X86::MMX_CVTTPD2PIirr,  X86::MMX_CVTTPD2PIirm,  0 },
+    { X86::MMX_CVTTPS2PIirr,  X86::MMX_CVTTPS2PIirm,  0 },
+    { X86::MMX_MOVD64to64rr,  X86::MMX_MOVQ64rm,      0 },
+    { X86::MMX_PABSBrr64,     X86::MMX_PABSBrm64,     0 },
+    { X86::MMX_PABSDrr64,     X86::MMX_PABSDrm64,     0 },
+    { X86::MMX_PABSWrr64,     X86::MMX_PABSWrm64,     0 },
+    { X86::MMX_PSHUFWri,      X86::MMX_PSHUFWmi,      0 },
+
     // AVX 128-bit versions of foldable instructions
     { X86::Int_VCOMISDrr,   X86::Int_VCOMISDrm,       0 },
     { X86::Int_VCOMISSrr,   X86::Int_VCOMISSrm,       0 },
@@ -1117,6 +1129,78 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::XORPDrr,         X86::XORPDrm,       TB_ALIGN_16 },
     { X86::XORPSrr,         X86::XORPSrm,       TB_ALIGN_16 },
 
+    // MMX version of foldable instructions
+    { X86::MMX_CVTPI2PSirr,   X86::MMX_CVTPI2PSirm,   0 },
+    { X86::MMX_PACKSSDWirr,   X86::MMX_PACKSSDWirm,   0 },
+    { X86::MMX_PACKSSWBirr,   X86::MMX_PACKSSWBirm,   0 },
+    { X86::MMX_PACKUSWBirr,   X86::MMX_PACKUSWBirm,   0 },
+    { X86::MMX_PADDBirr,      X86::MMX_PADDBirm,      0 },
+    { X86::MMX_PADDDirr,      X86::MMX_PADDDirm,      0 },
+    { X86::MMX_PADDQirr,      X86::MMX_PADDQirm,      0 },
+    { X86::MMX_PADDSBirr,     X86::MMX_PADDSBirm,     0 },
+    { X86::MMX_PADDSWirr,     X86::MMX_PADDSWirm,     0 },
+    { X86::MMX_PADDUSBirr,    X86::MMX_PADDUSBirm,    0 },
+    { X86::MMX_PADDUSWirr,    X86::MMX_PADDUSWirm,    0 },
+    { X86::MMX_PADDWirr,      X86::MMX_PADDWirm,      0 },
+    { X86::MMX_PALIGNR64irr,  X86::MMX_PALIGNR64irm,  0 },
+    { X86::MMX_PANDNirr,      X86::MMX_PANDNirm,      0 },
+    { X86::MMX_PANDirr,       X86::MMX_PANDirm,       0 },
+    { X86::MMX_PAVGBirr,      X86::MMX_PAVGBirm,      0 },
+    { X86::MMX_PAVGWirr,      X86::MMX_PAVGWirm,      0 },
+    { X86::MMX_PCMPEQBirr,    X86::MMX_PCMPEQBirm,    0 },
+    { X86::MMX_PCMPEQDirr,    X86::MMX_PCMPEQDirm,    0 },
+    { X86::MMX_PCMPEQWirr,    X86::MMX_PCMPEQWirm,    0 },
+    { X86::MMX_PCMPGTBirr,    X86::MMX_PCMPGTBirm,    0 },
+    { X86::MMX_PCMPGTDirr,    X86::MMX_PCMPGTDirm,    0 },
+    { X86::MMX_PCMPGTWirr,    X86::MMX_PCMPGTWirm,    0 },
+    { X86::MMX_PHADDSWrr64,   X86::MMX_PHADDSWrm64,   0 },
+    { X86::MMX_PHADDWrr64,    X86::MMX_PHADDWrm64,    0 },
+    { X86::MMX_PHADDrr64,     X86::MMX_PHADDrm64,     0 },
+    { X86::MMX_PHSUBDrr64,    X86::MMX_PHSUBDrm64,    0 },
+    { X86::MMX_PHSUBSWrr64,   X86::MMX_PHSUBSWrm64,   0 },
+    { X86::MMX_PHSUBWrr64,    X86::MMX_PHSUBWrm64,    0 },
+    { X86::MMX_PINSRWirri,    X86::MMX_PINSRWirmi,    0 },
+    { X86::MMX_PMADDUBSWrr64, X86::MMX_PMADDUBSWrm64, 0 },
+    { X86::MMX_PMADDWDirr,    X86::MMX_PMADDWDirm,    0 },
+    { X86::MMX_PMAXSWirr,     X86::MMX_PMAXSWirm,     0 },
+    { X86::MMX_PMAXUBirr,     X86::MMX_PMAXUBirm,     0 },
+    { X86::MMX_PMINSWirr,     X86::MMX_PMINSWirm,     0 },
+    { X86::MMX_PMINUBirr,     X86::MMX_PMINUBirm,     0 },
+    { X86::MMX_PMULHRSWrr64,  X86::MMX_PMULHRSWrm64,  0 },
+    { X86::MMX_PMULHUWirr,    X86::MMX_PMULHUWirm,    0 },
+    { X86::MMX_PMULHWirr,     X86::MMX_PMULHWirm,     0 },
+    { X86::MMX_PMULLWirr,     X86::MMX_PMULLWirm,     0 },
+    { X86::MMX_PMULUDQirr,    X86::MMX_PMULUDQirm,    0 },
+    { X86::MMX_PORirr,        X86::MMX_PORirm,        0 },
+    { X86::MMX_PSADBWirr,     X86::MMX_PSADBWirm,     0 },
+    { X86::MMX_PSHUFBrr64,    X86::MMX_PSHUFBrm64,    0 },
+    { X86::MMX_PSIGNBrr64,    X86::MMX_PSIGNBrm64,    0 },
+    { X86::MMX_PSIGNDrr64,    X86::MMX_PSIGNDrm64,    0 },
+    { X86::MMX_PSIGNWrr64,    X86::MMX_PSIGNWrm64,    0 },
+    { X86::MMX_PSLLDrr,       X86::MMX_PSLLDrm,       0 },
+    { X86::MMX_PSLLQrr,       X86::MMX_PSLLQrm,       0 },
+    { X86::MMX_PSLLWrr,       X86::MMX_PSLLWrm,       0 },
+    { X86::MMX_PSRADrr,       X86::MMX_PSRADrm,       0 },
+    { X86::MMX_PSRAWrr,       X86::MMX_PSRAWrm,       0 },
+    { X86::MMX_PSRLDrr,       X86::MMX_PSRLDrm,       0 },
+    { X86::MMX_PSRLQrr,       X86::MMX_PSRLQrm,       0 },
+    { X86::MMX_PSRLWrr,       X86::MMX_PSRLWrm,       0 },
+    { X86::MMX_PSUBBirr,      X86::MMX_PSUBBirm,      0 },
+    { X86::MMX_PSUBDirr,      X86::MMX_PSUBDirm,      0 },
+    { X86::MMX_PSUBQirr,      X86::MMX_PSUBQirm,      0 },
+    { X86::MMX_PSUBSBirr,     X86::MMX_PSUBSBirm,     0 },
+    { X86::MMX_PSUBSWirr,     X86::MMX_PSUBSWirm,     0 },
+    { X86::MMX_PSUBUSBirr,    X86::MMX_PSUBUSBirm,    0 },
+    { X86::MMX_PSUBUSWirr,    X86::MMX_PSUBUSWirm,    0 },
+    { X86::MMX_PSUBWirr,      X86::MMX_PSUBWirm,      0 },
+    { X86::MMX_PUNPCKHBWirr,  X86::MMX_PUNPCKHBWirm,  0 },
+    { X86::MMX_PUNPCKHDQirr,  X86::MMX_PUNPCKHDQirm,  0 },
+    { X86::MMX_PUNPCKHWDirr,  X86::MMX_PUNPCKHWDirm,  0 },
+    { X86::MMX_PUNPCKLBWirr,  X86::MMX_PUNPCKLBWirm,  0 },
+    { X86::MMX_PUNPCKLDQirr,  X86::MMX_PUNPCKLDQirm,  0 },
+    { X86::MMX_PUNPCKLWDirr,  X86::MMX_PUNPCKLWDirm,  0 },
+    { X86::MMX_PXORirr,       X86::MMX_PXORirm,       0 },
+
     // AVX 128-bit versions of foldable instructions
     { X86::VCVTSD2SSrr,       X86::VCVTSD2SSrm,        0 },
     { X86::Int_VCVTSD2SSrr,   X86::Int_VCVTSD2SSrm,    0 },

Modified: llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll?rev=230499&r1=230498&r2=230499&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll Wed Feb 25 09:14:02 2015
@@ -135,3 +135,148 @@ entry:
   ret i64 %4
 }
 declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32)
+
+define i64 @tt0(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt0:
+; CHECK:       # BB#0:{{.*}} %entry
+; CHECK:    paddb (%[[REG3:[a-z]+]]), %mm0
+; CHECK-NEXT:    movd %mm0, %rax
+; CHECK-NEXT:    emms
+; CHECK-NEXT:    retq
+entry:
+  %v = load x86_mmx* %q
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %v)
+  %s = bitcast x86_mmx %u to i64
+  call void @llvm.x86.mmx.emms()
+  ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare void @llvm.x86.mmx.emms()
+
+define i64 @tt1(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt1:
+; CHECK:       # BB#0:{{.*}} %entry
+; CHECK:    paddw (%[[REG3]]), %mm0
+; CHECK-NEXT:    movd %mm0, %rax
+; CHECK-NEXT:    emms
+; CHECK-NEXT:    retq
+entry:
+  %v = load x86_mmx* %q
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %v)
+  %s = bitcast x86_mmx %u to i64
+  call void @llvm.x86.mmx.emms()
+  ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+
+define i64 @tt2(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt2:
+; CHECK:       # BB#0:{{.*}} %entry
+; CHECK:    paddd (%[[REG3]]), %mm0
+; CHECK-NEXT:    movd %mm0, %rax
+; CHECK-NEXT:    emms
+; CHECK-NEXT:    retq
+entry:
+  %v = load x86_mmx* %q
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %v)
+  %s = bitcast x86_mmx %u to i64
+  call void @llvm.x86.mmx.emms()
+  ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+
+define i64 @tt3(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt3:
+; CHECK:       # BB#0:{{.*}} %entry
+; CHECK:    paddq (%[[REG3]]), %mm0
+; CHECK-NEXT:    movd %mm0, %rax
+; CHECK-NEXT:    emms
+; CHECK-NEXT:    retq
+entry:
+  %v = load x86_mmx* %q
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %v)
+  %s = bitcast x86_mmx %u to i64
+  call void @llvm.x86.mmx.emms()
+  ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
+
+define i64 @tt4(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt4:
+; CHECK:       # BB#0:{{.*}} %entry
+; CHECK:    paddusb (%[[REG3]]), %mm0
+; CHECK-NEXT:    movd %mm0, %rax
+; CHECK-NEXT:    emms
+; CHECK-NEXT:    retq
+entry:
+  %v = load x86_mmx* %q
+  %u = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %t, x86_mmx %v)
+  %s = bitcast x86_mmx %u to i64
+  call void @llvm.x86.mmx.emms()
+  ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx)
+
+define i64 @tt5(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt5:
+; CHECK:       # BB#0:{{.*}} %entry
+; CHECK:    paddusw (%[[REG3]]), %mm0
+; CHECK-NEXT:    movd %mm0, %rax
+; CHECK-NEXT:    emms
+; CHECK-NEXT:    retq
+entry:
+  %v = load x86_mmx* %q
+  %u = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %t, x86_mmx %v)
+  %s = bitcast x86_mmx %u to i64
+  call void @llvm.x86.mmx.emms()
+  ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
+
+define i64 @tt6(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt6:
+; CHECK:       # BB#0:{{.*}} %entry
+; CHECK:    psrlw (%[[REG3]]), %mm0
+; CHECK-NEXT:    movd %mm0, %rax
+; CHECK-NEXT:    emms
+; CHECK-NEXT:    retq
+entry:
+  %v = load x86_mmx* %q
+  %u = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %t, x86_mmx %v)
+  %s = bitcast x86_mmx %u to i64
+  call void @llvm.x86.mmx.emms()
+  ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx)
+
+define i64 @tt7(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt7:
+; CHECK:       # BB#0:{{.*}} %entry
+; CHECK:    psrld (%[[REG3]]), %mm0
+; CHECK-NEXT:    movd %mm0, %rax
+; CHECK-NEXT:    emms
+; CHECK-NEXT:    retq
+entry:
+  %v = load x86_mmx* %q
+  %u = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %t, x86_mmx %v)
+  %s = bitcast x86_mmx %u to i64
+  call void @llvm.x86.mmx.emms()
+  ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx)
+
+define i64 @tt8(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt8:
+; CHECK:       # BB#0:{{.*}} %entry
+; CHECK:    psrlq (%[[REG3]]), %mm0
+; CHECK-NEXT:    movd %mm0, %rax
+; CHECK-NEXT:    emms
+; CHECK-NEXT:    retq
+entry:
+  %v = load x86_mmx* %q
+  %u = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %t, x86_mmx %v)
+  %s = bitcast x86_mmx %u to i64
+  call void @llvm.x86.mmx.emms()
+  ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx)

Modified: llvm/trunk/test/CodeGen/X86/vec_extract-mmx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_extract-mmx.ll?rev=230499&r1=230498&r2=230499&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_extract-mmx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_extract-mmx.ll Wed Feb 25 09:14:02 2015
@@ -49,8 +49,7 @@ entry:
 define i32 @test2(i32* nocapture readonly %ptr) {
 ; CHECK-LABEL: test2:
 ; CHECK:       ## BB#0: ## %entry
-; CHECK-NEXT:    movq (%rdi), %mm0
-; CHECK-NEXT:    pshufw $232, %mm0, %mm0
+; CHECK-NEXT:    pshufw $232, (%rdi), %mm0
 ; CHECK-NEXT:    movd %mm0, %eax
 ; CHECK-NEXT:    emms
 ; CHECK-NEXT:    retq





More information about the llvm-commits mailing list