[llvm] r330896 - [x86] Allow folding unaligned memory operands into pcmp[ei]str*
Chandler Carruth via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 25 20:17:25 PDT 2018
Author: chandlerc
Date: Wed Apr 25 20:17:25 2018
New Revision: 330896
URL: http://llvm.org/viewvc/llvm-project?rev=330896&view=rev
Log:
[x86] Allow folding unaligned memory operands into pcmp[ei]str*
instructions.
These have special permission according to the x86 manual to read
unaligned memory, and this folding is done by ICC and GCC as well.
This corrects one of the issues identified in PR37246.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/sse42.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=330896&r1=330895&r2=330896&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Apr 25 20:17:25 2018
@@ -631,10 +631,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::PABSBrr, X86::PABSBrm, TB_ALIGN_16 },
{ X86::PABSDrr, X86::PABSDrm, TB_ALIGN_16 },
{ X86::PABSWrr, X86::PABSWrm, TB_ALIGN_16 },
- { X86::PCMPESTRIrr, X86::PCMPESTRIrm, TB_ALIGN_16 },
- { X86::PCMPESTRM128rr, X86::PCMPESTRM128rm, TB_ALIGN_16 },
- { X86::PCMPISTRIrr, X86::PCMPISTRIrm, TB_ALIGN_16 },
- { X86::PCMPISTRM128rr, X86::PCMPISTRM128rm, TB_ALIGN_16 },
+ { X86::PCMPESTRIrr, X86::PCMPESTRIrm, 0 },
+ { X86::PCMPESTRM128rr, X86::PCMPESTRM128rm, 0 },
+ { X86::PCMPISTRIrr, X86::PCMPISTRIrm, 0 },
+ { X86::PCMPISTRM128rr, X86::PCMPISTRM128rm, 0 },
{ X86::PHMINPOSUWrr, X86::PHMINPOSUWrm, TB_ALIGN_16 },
{ X86::PMOVSXBDrr, X86::PMOVSXBDrm, TB_NO_REVERSE },
{ X86::PMOVSXBQrr, X86::PMOVSXBQrm, TB_NO_REVERSE },
Modified: llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll?rev=330896&r1=330895&r2=330896&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll Wed Apr 25 20:17:25 2018
@@ -437,7 +437,7 @@ define <16 x i8> @test_x86_sse42_pcmpist
; VCHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; VCHECK-NEXT: vpcmpistrm $7, (%eax), %xmm0 ## encoding: [0xc4,0xe3,0x79,0x62,0x00,0x07]
; VCHECK-NEXT: retl ## encoding: [0xc3]
- %1 = load <16 x i8>, <16 x i8>* %a1
+ %1 = load <16 x i8>, <16 x i8>* %a1, align 1
%res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
Modified: llvm/trunk/test/CodeGen/X86/sse42.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42.ll?rev=330896&r1=330895&r2=330896&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42.ll Wed Apr 25 20:17:25 2018
@@ -123,19 +123,18 @@ define i1 @pcmpestri_mem_eq_i8(i8* %lhs_
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movdqu (%esi), %xmm0
-; X32-NEXT: movdqu (%ecx), %xmm1
-; X32-NEXT: pcmpestri $24, %xmm1, %xmm0
+; X32-NEXT: pcmpestri $24, (%ecx), %xmm0
; X32-NEXT: setae %al
; X32-NEXT: popl %esi
; X32-NEXT: retl
;
; X64-LABEL: pcmpestri_mem_eq_i8:
; X64: # %bb.0: # %entry
+; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movdqu (%rdi), %xmm0
-; X64-NEXT: movdqu (%rdx), %xmm1
; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl %ecx, %edx
-; X64-NEXT: pcmpestri $24, %xmm1, %xmm0
+; X64-NEXT: pcmpestri $24, (%r8), %xmm0
; X64-NEXT: setae %al
; X64-NEXT: retq
entry:
@@ -157,19 +156,18 @@ define i32 @pcmpestri_mem_idx_i8(i8* %lh
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movdqu (%esi), %xmm0
-; X32-NEXT: movdqu (%ecx), %xmm1
-; X32-NEXT: pcmpestri $24, %xmm1, %xmm0
+; X32-NEXT: pcmpestri $24, (%ecx), %xmm0
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: popl %esi
; X32-NEXT: retl
;
; X64-LABEL: pcmpestri_mem_idx_i8:
; X64: # %bb.0: # %entry
+; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movdqu (%rdi), %xmm0
-; X64-NEXT: movdqu (%rdx), %xmm1
; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl %ecx, %edx
-; X64-NEXT: pcmpestri $24, %xmm1, %xmm0
+; X64-NEXT: pcmpestri $24, (%r8), %xmm0
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
entry:
@@ -380,19 +378,18 @@ define i1 @pcmpestri_mem_eq_i16(i16* %lh
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movdqu (%esi), %xmm0
-; X32-NEXT: movdqu (%ecx), %xmm1
-; X32-NEXT: pcmpestri $25, %xmm1, %xmm0
+; X32-NEXT: pcmpestri $25, (%ecx), %xmm0
; X32-NEXT: setae %al
; X32-NEXT: popl %esi
; X32-NEXT: retl
;
; X64-LABEL: pcmpestri_mem_eq_i16:
; X64: # %bb.0: # %entry
+; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movdqu (%rdi), %xmm0
-; X64-NEXT: movdqu (%rdx), %xmm1
; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl %ecx, %edx
-; X64-NEXT: pcmpestri $25, %xmm1, %xmm0
+; X64-NEXT: pcmpestri $25, (%r8), %xmm0
; X64-NEXT: setae %al
; X64-NEXT: retq
entry:
@@ -416,19 +413,18 @@ define i32 @pcmpestri_mem_idx_i16(i16* %
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movdqu (%esi), %xmm0
-; X32-NEXT: movdqu (%ecx), %xmm1
-; X32-NEXT: pcmpestri $25, %xmm1, %xmm0
+; X32-NEXT: pcmpestri $25, (%ecx), %xmm0
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: popl %esi
; X32-NEXT: retl
;
; X64-LABEL: pcmpestri_mem_idx_i16:
; X64: # %bb.0: # %entry
+; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movdqu (%rdi), %xmm0
-; X64-NEXT: movdqu (%rdx), %xmm1
; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl %ecx, %edx
-; X64-NEXT: pcmpestri $25, %xmm1, %xmm0
+; X64-NEXT: pcmpestri $25, (%r8), %xmm0
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
entry:
@@ -622,16 +618,14 @@ define i1 @pcmpistri_mem_eq_i8(i8* %lhs_
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movdqu (%ecx), %xmm0
-; X32-NEXT: movdqu (%eax), %xmm1
-; X32-NEXT: pcmpistri $24, %xmm1, %xmm0
+; X32-NEXT: pcmpistri $24, (%eax), %xmm0
; X32-NEXT: setae %al
; X32-NEXT: retl
;
; X64-LABEL: pcmpistri_mem_eq_i8:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqu (%rdi), %xmm0
-; X64-NEXT: movdqu (%rsi), %xmm1
-; X64-NEXT: pcmpistri $24, %xmm1, %xmm0
+; X64-NEXT: pcmpistri $24, (%rsi), %xmm0
; X64-NEXT: setae %al
; X64-NEXT: retq
entry:
@@ -650,16 +644,14 @@ define i32 @pcmpistri_mem_idx_i8(i8* %lh
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movdqu (%ecx), %xmm0
-; X32-NEXT: movdqu (%eax), %xmm1
-; X32-NEXT: pcmpistri $24, %xmm1, %xmm0
+; X32-NEXT: pcmpistri $24, (%eax), %xmm0
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: retl
;
; X64-LABEL: pcmpistri_mem_idx_i8:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqu (%rdi), %xmm0
-; X64-NEXT: movdqu (%rsi), %xmm1
-; X64-NEXT: pcmpistri $24, %xmm1, %xmm0
+; X64-NEXT: pcmpistri $24, (%rsi), %xmm0
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
entry:
@@ -849,16 +841,14 @@ define i1 @pcmpistri_mem_eq_i16(i16* %lh
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movdqu (%ecx), %xmm0
-; X32-NEXT: movdqu (%eax), %xmm1
-; X32-NEXT: pcmpistri $25, %xmm1, %xmm0
+; X32-NEXT: pcmpistri $25, (%eax), %xmm0
; X32-NEXT: setae %al
; X32-NEXT: retl
;
; X64-LABEL: pcmpistri_mem_eq_i16:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqu (%rdi), %xmm0
-; X64-NEXT: movdqu (%rsi), %xmm1
-; X64-NEXT: pcmpistri $25, %xmm1, %xmm0
+; X64-NEXT: pcmpistri $25, (%rsi), %xmm0
; X64-NEXT: setae %al
; X64-NEXT: retq
entry:
@@ -879,16 +869,14 @@ define i32 @pcmpistri_mem_idx_i16(i16* %
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movdqu (%ecx), %xmm0
-; X32-NEXT: movdqu (%eax), %xmm1
-; X32-NEXT: pcmpistri $25, %xmm1, %xmm0
+; X32-NEXT: pcmpistri $25, (%eax), %xmm0
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: retl
;
; X64-LABEL: pcmpistri_mem_idx_i16:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqu (%rdi), %xmm0
-; X64-NEXT: movdqu (%rsi), %xmm1
-; X64-NEXT: pcmpistri $25, %xmm1, %xmm0
+; X64-NEXT: pcmpistri $25, (%rsi), %xmm0
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
entry:
More information about the llvm-commits
mailing list