[llvm] r294846 - [X86][3DNow!] Enable commutation for PFADD/PFMUL/PFCMPEQ/PAVGUSB/PMULHRW
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 11 05:32:56 PST 2017
Author: rksimon
Date: Sat Feb 11 07:32:55 2017
New Revision: 294846
URL: http://llvm.org/viewvc/llvm-project?rev=294846&view=rev
Log:
[X86][3DNow!] Enable commutation for PFADD/PFMUL/PFCMPEQ/PAVGUSB/PMULHRW
All commutations confirmed to give identical results - note PFMAX/PFMIN do not
PFSUB<->PFSUBR should be commutable as well
Modified:
llvm/trunk/lib/Target/X86/X86Instr3DNow.td
llvm/trunk/test/CodeGen/X86/commute-3dnow.ll
Modified: llvm/trunk/lib/Target/X86/X86Instr3DNow.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr3DNow.td?rev=294846&r1=294845&r2=294846&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Instr3DNow.td (original)
+++ llvm/trunk/lib/Target/X86/X86Instr3DNow.td Sat Feb 11 07:32:55 2017
@@ -38,7 +38,9 @@ multiclass I3DNow_binop_rm<bits<8> opc,
def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn, []>;
}
-multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, bit Commutable = 0,
+ string Ver = ""> {
+ let isCommutable = Commutable in
def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>;
@@ -63,16 +65,16 @@ multiclass I3DNow_conv_rm_int<bits<8> op
(bitconvert (load_mmx addr:$src))))]>;
}
-defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb">;
+defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb", 1>;
defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">;
defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">;
-defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd">;
-defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq">;
+defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd", 1>;
+defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq", 1>;
defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">;
defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">;
defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">;
defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">;
-defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul">;
+defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul", 1>;
defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">;
defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">;
defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">;
@@ -81,7 +83,7 @@ defm PFRSQRT : I3DNow_conv_rm_int<0x97,
defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub">;
defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr">;
defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">;
-defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
+defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", 1>;
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
@@ -98,6 +100,6 @@ def PREFETCHW : I<0x0D, MRM1m, (outs), (
// "3DNowA" instructions
defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">;
-defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">;
-defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">;
+defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", 0, "a">;
+defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", 0, "a">;
defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">;
Modified: llvm/trunk/test/CodeGen/X86/commute-3dnow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/commute-3dnow.ll?rev=294846&r1=294845&r2=294846&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/commute-3dnow.ll (original)
+++ llvm/trunk/test/CodeGen/X86/commute-3dnow.ll Sat Feb 11 07:32:55 2017
@@ -2,8 +2,6 @@
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X64
-; FIXME - missed commutation opportunities.
-
define void @commute_m_pfadd(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
; X32-LABEL: commute_m_pfadd:
; X32: # BB#0:
@@ -11,19 +9,17 @@ define void @commute_m_pfadd(x86_mmx *%a
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movq (%edx), %mm0
-; X32-NEXT: movq (%ecx), %mm1
; X32-NEXT: pfadd (%eax), %mm0
-; X32-NEXT: pfadd %mm0, %mm1
-; X32-NEXT: movq %mm1, (%ecx)
+; X32-NEXT: pfadd (%ecx), %mm0
+; X32-NEXT: movq %mm0, (%ecx)
; X32-NEXT: retl
;
; X64-LABEL: commute_m_pfadd:
; X64: # BB#0:
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: movq (%rdx), %mm1
; X64-NEXT: pfadd (%rsi), %mm0
-; X64-NEXT: pfadd %mm0, %mm1
-; X64-NEXT: movq %mm1, (%rdx)
+; X64-NEXT: pfadd (%rdx), %mm0
+; X64-NEXT: movq %mm0, (%rdx)
; X64-NEXT: retq
%1 = load x86_mmx, x86_mmx* %a0
%2 = load x86_mmx, x86_mmx* %a1
@@ -35,6 +31,7 @@ define void @commute_m_pfadd(x86_mmx *%a
}
declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx)
+; FIXME - missed PFSUB commutation.
define void @commute_m_pfsub(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
; X32-LABEL: commute_m_pfsub:
; X32: # BB#0:
@@ -66,6 +63,7 @@ define void @commute_m_pfsub(x86_mmx *%a
}
declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx)
+; FIXME - missed PFSUBR commutation.
define void @commute_m_pfsubr(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
; X32-LABEL: commute_m_pfsubr:
; X32: # BB#0:
@@ -104,19 +102,17 @@ define void @commute_m_pfmul(x86_mmx *%a
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movq (%edx), %mm0
-; X32-NEXT: movq (%ecx), %mm1
; X32-NEXT: pfmul (%eax), %mm0
-; X32-NEXT: pfmul %mm0, %mm1
-; X32-NEXT: movq %mm1, (%ecx)
+; X32-NEXT: pfmul (%ecx), %mm0
+; X32-NEXT: movq %mm0, (%ecx)
; X32-NEXT: retl
;
; X64-LABEL: commute_m_pfmul:
; X64: # BB#0:
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: movq (%rdx), %mm1
; X64-NEXT: pfmul (%rsi), %mm0
-; X64-NEXT: pfmul %mm0, %mm1
-; X64-NEXT: movq %mm1, (%rdx)
+; X64-NEXT: pfmul (%rdx), %mm0
+; X64-NEXT: movq %mm0, (%rdx)
; X64-NEXT: retq
%1 = load x86_mmx, x86_mmx* %a0
%2 = load x86_mmx, x86_mmx* %a1
@@ -135,19 +131,17 @@ define void @commute_m_pfcmpeq(x86_mmx *
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movq (%edx), %mm0
-; X32-NEXT: movq (%ecx), %mm1
; X32-NEXT: pfcmpeq (%eax), %mm0
-; X32-NEXT: pfcmpeq %mm0, %mm1
-; X32-NEXT: movq %mm1, (%ecx)
+; X32-NEXT: pfcmpeq (%ecx), %mm0
+; X32-NEXT: movq %mm0, (%ecx)
; X32-NEXT: retl
;
; X64-LABEL: commute_m_pfcmpeq:
; X64: # BB#0:
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: movq (%rdx), %mm1
; X64-NEXT: pfcmpeq (%rsi), %mm0
-; X64-NEXT: pfcmpeq %mm0, %mm1
-; X64-NEXT: movq %mm1, (%rdx)
+; X64-NEXT: pfcmpeq (%rdx), %mm0
+; X64-NEXT: movq %mm0, (%rdx)
; X64-NEXT: retq
%1 = load x86_mmx, x86_mmx* %a0
%2 = load x86_mmx, x86_mmx* %a1
@@ -166,19 +160,17 @@ define void @commute_m_pavgusb(x86_mmx *
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movq (%edx), %mm0
-; X32-NEXT: movq (%ecx), %mm1
; X32-NEXT: pavgusb (%eax), %mm0
-; X32-NEXT: pavgusb %mm0, %mm1
-; X32-NEXT: movq %mm1, (%ecx)
+; X32-NEXT: pavgusb (%ecx), %mm0
+; X32-NEXT: movq %mm0, (%ecx)
; X32-NEXT: retl
;
; X64-LABEL: commute_m_pavgusb:
; X64: # BB#0:
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: movq (%rdx), %mm1
; X64-NEXT: pavgusb (%rsi), %mm0
-; X64-NEXT: pavgusb %mm0, %mm1
-; X64-NEXT: movq %mm1, (%rdx)
+; X64-NEXT: pavgusb (%rdx), %mm0
+; X64-NEXT: movq %mm0, (%rdx)
; X64-NEXT: retq
%1 = load x86_mmx, x86_mmx* %a0
%2 = load x86_mmx, x86_mmx* %a1
@@ -197,19 +189,17 @@ define void @commute_m_pmulhrw(x86_mmx *
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movq (%edx), %mm0
-; X32-NEXT: movq (%ecx), %mm1
; X32-NEXT: pmulhrw (%eax), %mm0
-; X32-NEXT: pmulhrw %mm0, %mm1
-; X32-NEXT: movq %mm1, (%ecx)
+; X32-NEXT: pmulhrw (%ecx), %mm0
+; X32-NEXT: movq %mm0, (%ecx)
; X32-NEXT: retl
;
; X64-LABEL: commute_m_pmulhrw:
; X64: # BB#0:
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: movq (%rdx), %mm1
; X64-NEXT: pmulhrw (%rsi), %mm0
-; X64-NEXT: pmulhrw %mm0, %mm1
-; X64-NEXT: movq %mm1, (%rdx)
+; X64-NEXT: pmulhrw (%rdx), %mm0
+; X64-NEXT: movq %mm0, (%rdx)
; X64-NEXT: retq
%1 = load x86_mmx, x86_mmx* %a0
%2 = load x86_mmx, x86_mmx* %a1
More information about the llvm-commits
mailing list