[llvm] r297667 - [X86][MMX] Fix folding of shift value loads to cover whole 64-bits
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 13 14:23:30 PDT 2017
Author: rksimon
Date: Mon Mar 13 16:23:29 2017
New Revision: 297667
URL: http://llvm.org/viewvc/llvm-project?rev=297667&view=rev
Log:
[X86][MMX] Fix folding of shift value loads to cover whole 64-bits
rL230225 made the assumption that only the lower 32-bits of an MMX register load is used as a shift value, when in fact the whole 64-bits are reloaded and treated as a i64 to determine the shift value.
This patch reverts rL230225 to ensure that the whole 64-bits of memory are folded and ensures that the upper 32-bit are zero'd for cases where the shift value has come from a scalar source.
Found during fuzz testing.
Differential Revision: https://reviews.llvm.org/D30833
Modified:
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/lib/Target/X86/X86InstrMMX.td
llvm/trunk/test/CodeGen/X86/bitcast-mmx.ll
llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=297667&r1=297666&r2=297667&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Mon Mar 13 16:23:29 2017
@@ -27,8 +27,6 @@ def MMX_X86movw2d : SDNode<"X86ISD::MMX_
//===----------------------------------------------------------------------===//
def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
-def load_mvmmx : PatFrag<(ops node:$ptr),
- (x86mmx (MMX_X86movw2d (load node:$ptr)))>;
//===----------------------------------------------------------------------===//
// SSE specific DAG Nodes.
Modified: llvm/trunk/lib/Target/X86/X86InstrMMX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrMMX.td?rev=297667&r1=297666&r2=297667&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrMMX.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrMMX.td Mon Mar 13 16:23:29 2017
@@ -479,13 +479,6 @@ defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3
int_x86_mmx_psrl_q, int_x86_mmx_psrli_q,
MMX_SHIFT_ITINS>;
-def : Pat<(int_x86_mmx_psrl_w VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRLWrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psrl_d VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRLDrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psrl_q VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRLQrm VR64:$src1, addr:$src2)>;
-
defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
int_x86_mmx_psll_w, int_x86_mmx_pslli_w,
MMX_SHIFT_ITINS>;
@@ -496,13 +489,6 @@ defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3
int_x86_mmx_psll_q, int_x86_mmx_pslli_q,
MMX_SHIFT_ITINS>;
-def : Pat<(int_x86_mmx_psll_w VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSLLWrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psll_d VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSLLDrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psll_q VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSLLQrm VR64:$src1, addr:$src2)>;
-
defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
int_x86_mmx_psra_w, int_x86_mmx_psrai_w,
MMX_SHIFT_ITINS>;
@@ -510,11 +496,6 @@ defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2
int_x86_mmx_psra_d, int_x86_mmx_psrai_d,
MMX_SHIFT_ITINS>;
-def : Pat<(int_x86_mmx_psra_w VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRAWrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psra_d VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRADrm VR64:$src1, addr:$src2)>;
-
// Comparison Instructions
defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b,
MMX_INTALU_ITINS>;
Modified: llvm/trunk/test/CodeGen/X86/bitcast-mmx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-mmx.ll?rev=297667&r1=297666&r2=297667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-mmx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-mmx.ll Mon Mar 13 16:23:29 2017
@@ -34,9 +34,10 @@ define i64 @t1(i64 %x, i32 %n) nounwind
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
-; X86-NEXT: movq 8(%ebp), %mm0
-; X86-NEXT: psllq 16(%ebp), %mm0
-; X86-NEXT: movq %mm0, (%esp)
+; X86-NEXT: movd 16(%ebp), %mm0
+; X86-NEXT: movq 8(%ebp), %mm1
+; X86-NEXT: psllq %mm0, %mm1
+; X86-NEXT: movq %mm1, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ebp, %esp
@@ -64,10 +65,11 @@ define i64 @t2(i64 %x, i32 %n, i32 %w) n
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
-; X86-NEXT: movd 20(%ebp), %mm0
-; X86-NEXT: psllq 16(%ebp), %mm0
-; X86-NEXT: por 8(%ebp), %mm0
-; X86-NEXT: movq %mm0, (%esp)
+; X86-NEXT: movd 16(%ebp), %mm0
+; X86-NEXT: movd 20(%ebp), %mm1
+; X86-NEXT: psllq %mm0, %mm1
+; X86-NEXT: por 8(%ebp), %mm1
+; X86-NEXT: movq %mm1, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ebp, %esp
@@ -104,7 +106,8 @@ define i64 @t3(<1 x i64>* %y, i32* %n) n
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: psllq (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: psllq %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -115,7 +118,8 @@ define i64 @t3(<1 x i64>* %y, i32* %n) n
; X64-LABEL: t3:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: psllq (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: psllq %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
Modified: llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll?rev=297667&r1=297666&r2=297667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll Mon Mar 13 16:23:29 2017
@@ -12,7 +12,8 @@ define i64 @t0(<1 x i64>* %a, i32* %b) n
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: psllq (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: psllq %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -23,7 +24,8 @@ define i64 @t0(<1 x i64>* %a, i32* %b) n
; X64-LABEL: t0:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: psllq (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: psllq %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
@@ -46,7 +48,8 @@ define i64 @t1(<1 x i64>* %a, i32* %b) n
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: psrlq (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: psrlq %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -57,7 +60,8 @@ define i64 @t1(<1 x i64>* %a, i32* %b) n
; X64-LABEL: t1:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: psrlq (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: psrlq %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
@@ -80,7 +84,8 @@ define i64 @t2(<1 x i64>* %a, i32* %b) n
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: psllw (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: psllw %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -91,7 +96,8 @@ define i64 @t2(<1 x i64>* %a, i32* %b) n
; X64-LABEL: t2:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: psllw (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: psllw %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
@@ -114,7 +120,8 @@ define i64 @t3(<1 x i64>* %a, i32* %b) n
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: psrlw (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: psrlw %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -125,7 +132,8 @@ define i64 @t3(<1 x i64>* %a, i32* %b) n
; X64-LABEL: t3:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: psrlw (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: psrlw %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
@@ -148,7 +156,8 @@ define i64 @t4(<1 x i64>* %a, i32* %b) n
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: pslld (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: pslld %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -159,7 +168,8 @@ define i64 @t4(<1 x i64>* %a, i32* %b) n
; X64-LABEL: t4:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: pslld (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: pslld %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
@@ -182,7 +192,8 @@ define i64 @t5(<1 x i64>* %a, i32* %b) n
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: psrld (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: psrld %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -193,7 +204,8 @@ define i64 @t5(<1 x i64>* %a, i32* %b) n
; X64-LABEL: t5:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: psrld (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: psrld %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
@@ -216,7 +228,8 @@ define i64 @t6(<1 x i64>* %a, i32* %b) n
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: psraw (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: psraw %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -227,7 +240,8 @@ define i64 @t6(<1 x i64>* %a, i32* %b) n
; X64-LABEL: t6:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: psraw (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: psraw %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
@@ -250,7 +264,8 @@ define i64 @t7(<1 x i64>* %a, i32* %b) n
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movq (%ecx), %mm0
-; X86-NEXT: psrad (%eax), %mm0
+; X86-NEXT: movd (%eax), %mm1
+; X86-NEXT: psrad %mm1, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -261,7 +276,8 @@ define i64 @t7(<1 x i64>* %a, i32* %b) n
; X64-LABEL: t7:
; X64: # BB#0: # %entry
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: psrad (%rsi), %mm0
+; X64-NEXT: movd (%rsi), %mm1
+; X64-NEXT: psrad %mm1, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
entry:
@@ -563,8 +579,6 @@ entry:
}
declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx)
-; FIXME: Show issue with storing i32 to stack and then reloading as x86_mmx
-; which will lead to garbage in the other 32-bits.
define void @test_psrlq_by_volatile_shift_amount(x86_mmx* %t) nounwind {
; X86-LABEL: test_psrlq_by_volatile_shift_amount:
; X86: # BB#0: # %entry
@@ -574,11 +588,12 @@ define void @test_psrlq_by_volatile_shif
; X86-NEXT: subl $16, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $255, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: psrlq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: movq %mm0, (%eax)
+; X86-NEXT: movq {{[0-9]+}}(%esp), %mm1
+; X86-NEXT: psrlq %mm0, %mm1
+; X86-NEXT: movq %mm1, (%eax)
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
@@ -586,10 +601,11 @@ define void @test_psrlq_by_volatile_shif
; X64-LABEL: test_psrlq_by_volatile_shift_amount:
; X64: # BB#0: # %entry
; X64-NEXT: movl $1, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movd -{{[0-9]+}}(%rsp), %mm0
; X64-NEXT: movl $255, %eax
-; X64-NEXT: movd %rax, %mm0
-; X64-NEXT: psrlq -{{[0-9]+}}(%rsp), %mm0
-; X64-NEXT: movq %mm0, (%rdi)
+; X64-NEXT: movd %rax, %mm1
+; X64-NEXT: psrlq %mm0, %mm1
+; X64-NEXT: movq %mm1, (%rdi)
; X64-NEXT: retq
entry:
%0 = alloca i32, align 4
More information about the llvm-commits
mailing list