[llvm] r279622 - [X86][AVX2] Ensure on 32-bit targets that we broadcast f64 types not i64 (PR29101)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 24 05:42:32 PDT 2016
Author: rksimon
Date: Wed Aug 24 07:42:31 2016
New Revision: 279622
URL: http://llvm.org/viewvc/llvm-project?rev=279622&view=rev
Log:
[X86][AVX2] Ensure on 32-bit targets that we broadcast f64 types not i64 (PR29101)
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=279622&r1=279621&r2=279622&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Aug 24 07:42:31 2016
@@ -8733,6 +8733,13 @@ static SDValue lowerVectorShuffleAsBroad
V = DAG.getBitcast(SrcVT, V);
}
+ // 32-bit targets need to load i64 as a f64 and then bitcast the result.
+ if (!Subtarget.is64Bit() && SrcVT == MVT::i64) {
+ V = DAG.getBitcast(MVT::f64, V);
+ unsigned NumBroadcastElts = BroadcastVT.getVectorNumElements();
+ BroadcastVT = MVT::getVectorVT(MVT::f64, NumBroadcastElts);
+ }
+
return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
}
Modified: llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll?rev=279622&r1=279621&r2=279622&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll Wed Aug 24 07:42:31 2016
@@ -232,6 +232,43 @@ entry:
ret <4 x i64> %q3
}
+define <8 x i16> @broadcast_mem_v4i16_v8i16(<4 x i16>* %ptr) {
+; X32-LABEL: broadcast_mem_v4i16_v8i16:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X32-NEXT: retl
+;
+; X64-LABEL: broadcast_mem_v4i16_v8i16:
+; X64: ## BB#0:
+; X64-NEXT: vpbroadcastq (%rdi), %xmm0
+; X64-NEXT: retq
+ %load = load <4 x i16>, <4 x i16>* %ptr
+ %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ ret <8 x i16> %shuf
+}
+
+define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) {
+; X32-LABEL: broadcast_mem_v4i16_v16i16:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; X32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; X32-NEXT: vpbroadcastq %xmm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: broadcast_mem_v4i16_v16i16:
+; X64: ## BB#0:
+; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; X64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; X64-NEXT: vpbroadcastq %xmm0, %ymm0
+; X64-NEXT: retq
+ %load = load <4 x i16>, <4 x i16>* %ptr
+ %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ ret <16 x i16> %shuf
+}
+
; FIXME: Pointer adjusted broadcasts
define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
@@ -563,7 +600,7 @@ entry:
define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
; X32-LABEL: V111:
; X32: ## BB#0: ## %entry
-; X32-NEXT: vpbroadcastd LCPI27_0, %ymm1
+; X32-NEXT: vpbroadcastd LCPI29_0, %ymm1
; X32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
@@ -580,7 +617,7 @@ entry:
define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
; X32-LABEL: V113:
; X32: ## BB#0: ## %entry
-; X32-NEXT: vbroadcastss LCPI28_0, %ymm1
+; X32-NEXT: vbroadcastss LCPI30_0, %ymm1
; X32-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
@@ -597,7 +634,7 @@ entry:
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: _e2:
; X32: ## BB#0:
-; X32-NEXT: vbroadcastss LCPI29_0, %xmm0
+; X32-NEXT: vbroadcastss LCPI31_0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: _e2:
@@ -637,25 +674,25 @@ define void @crash() nounwind alwaysinli
; X32: ## BB#0: ## %WGLoopsEntry
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: testb %al, %al
-; X32-NEXT: je LBB31_1
+; X32-NEXT: je LBB33_1
; X32-NEXT: ## BB#2: ## %ret
; X32-NEXT: retl
; X32-NEXT: .p2align 4, 0x90
-; X32-NEXT: LBB31_1: ## %footer349VF
+; X32-NEXT: LBB33_1: ## %footer349VF
; X32-NEXT: ## =>This Inner Loop Header: Depth=1
-; X32-NEXT: jmp LBB31_1
+; X32-NEXT: jmp LBB33_1
;
; X64-LABEL: crash:
; X64: ## BB#0: ## %WGLoopsEntry
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: testb %al, %al
-; X64-NEXT: je LBB31_1
+; X64-NEXT: je LBB33_1
; X64-NEXT: ## BB#2: ## %ret
; X64-NEXT: retq
; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: LBB31_1: ## %footer349VF
+; X64-NEXT: LBB33_1: ## %footer349VF
; X64-NEXT: ## =>This Inner Loop Header: Depth=1
-; X64-NEXT: jmp LBB31_1
+; X64-NEXT: jmp LBB33_1
WGLoopsEntry:
br i1 undef, label %ret, label %footer329VF
More information about the llvm-commits
mailing list