[llvm] r288898 - [X86][XOP] Fix VPERMIL2 non-constant pool shuffle decoding (PR31296)
Sanjoy Das via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 9 14:34:33 PST 2016
Hi Simon,
I triaged a bug in our (internal) Java frontend to LLVM generating bad
code for this function:
define void @f(float %a, float %b, <4 x float>* %ptr) {
%v0 = insertelement <4 x float> undef, float %a, i32 0
%v1 = insertelement <4 x float> %v0, float %b, i32 1
%v2 = insertelement <4 x float> %v1, float %a, i32 2
%v3 = insertelement <4 x float> %v2, float %b, i32 3
store <4 x float> %v3, <4 x float>* %ptr
ret void
}
with llc -O3 -mcpu=bdver1, before this change llc used to generate:
f: # @f
vmovddup %xmm0, %xmm0 # xmm0 = xmm0[0,0]
vmovapd %xmm0, (%rdi)
retq
which is incorrect.
After this change, it seems to generate a vpermil2ps, which seems
correct at least on the surface, looking at the comment generated
alongside it.
I have two questions, both stemming from the fact that I am not very
familiar with LLVM's backend:
- Is the fix for the test case above intentional, or is the change
merely hiding the problem?
- Will it be helpful to check in the above test case, or is that
scenario already covered by some other test?
Thanks!
-- Sanjoy
On Wed, Dec 7, 2016 at 3:19 AM, Simon Pilgrim via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: rksimon
> Date: Wed Dec 7 05:19:00 2016
> New Revision: 288898
>
> URL: http://llvm.org/viewvc/llvm-project?rev=288898&view=rev
> Log:
> [X86][XOP] Fix VPERMIL2 non-constant pool shuffle decoding (PR31296)
>
> The non-constant pool version of DecodeVPERMIL2PMask was not offsetting correctly for the second input. I've updated the code to match the implementation in the constant-pool version.
>
> Annoyingly this bug was hidden for so long as it's tricky to combine to useful variable shuffle masks that don't become constant-pool entries.
>
> Modified:
> llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
> llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
>
> Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp?rev=288898&r1=288897&r2=288898&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp (original)
> +++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp Wed Dec 7 05:19:00 2016
> @@ -548,10 +548,11 @@ void DecodeVPERMIL2PMask(MVT VT, unsigne
> unsigned VecSize = VT.getSizeInBits();
> unsigned EltSize = VT.getScalarSizeInBits();
> unsigned NumLanes = VecSize / 128;
> - unsigned NumEltsPerLane = VT.getVectorNumElements() / NumLanes;
> - assert((VecSize == 128 || VecSize == 256) &&
> - "Unexpected vector size");
> + unsigned NumElts = VT.getVectorNumElements();
> + unsigned NumEltsPerLane = NumElts / NumLanes;
> + assert((VecSize == 128 || VecSize == 256) && "Unexpected vector size");
> assert((EltSize == 32 || EltSize == 64) && "Unexpected element size");
> + assert((NumElts == RawMask.size()) && "Unexpected mask size");
>
> for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
> // VPERMIL2 Operation.
> @@ -572,14 +573,15 @@ void DecodeVPERMIL2PMask(MVT VT, unsigne
> continue;
> }
>
> - unsigned Index = i & ~(NumEltsPerLane - 1);
> + int Index = i & ~(NumEltsPerLane - 1);
> if (EltSize == 64)
> Index += (Selector >> 1) & 0x1;
> else
> Index += Selector & 0x3;
>
> - unsigned SrcOffset = (Selector >> 2) & 1;
> - ShuffleMask.push_back((int)(SrcOffset + Index));
> + int Src = (Selector >> 2) & 0x1;
> + Index += Src * NumElts;
> + ShuffleMask.push_back(Index);
> }
> }
>
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll?rev=288898&r1=288897&r2=288898&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll Wed Dec 7 05:19:00 2016
> @@ -345,12 +345,18 @@ define <16 x i8> @constant_fold_vpperm()
> define <4 x float> @PR31296(i8* %in) {
> ; X32-LABEL: PR31296:
> ; X32: # BB#0: # %entry
> -; X32-NEXT: vmovaps {{.*#+}} xmm0 = [1.000000e+00,0.000000e+00,0.000000e+00,1.000000e+00]
> +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
> +; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
> +; X32-NEXT: vmovaps {{.*#+}} xmm1 = <0,1,u,u>
> +; X32-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0,0,1]
> ; X32-NEXT: retl
> ;
> ; X64-LABEL: PR31296:
> ; X64: # BB#0: # %entry
> -; X64-NEXT: vmovaps {{.*#+}} xmm0 = [1.000000e+00,0.000000e+00,0.000000e+00,1.000000e+00]
> +; X64-NEXT: movl (%rdi), %eax
> +; X64-NEXT: vmovq %rax, %xmm0
> +; X64-NEXT: vmovaps {{.*#+}} xmm1 = <0,1,u,u>
> +; X64-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0,0,1]
> ; X64-NEXT: retq
> entry:
> %0 = getelementptr i8, i8* %in, i32 0
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list