[llvm] db04bec - [SDAG] Do not convert undef to 0 when folding CONCAT/BUILD_VECTOR
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 29 07:13:04 PDT 2020
Author: Krzysztof Parzyszek
Date: 2020-09-29T09:12:26-05:00
New Revision: db04bec5f1eeb581ee1470e5f444cc7b918c6d93
URL: https://github.com/llvm/llvm-project/commit/db04bec5f1eeb581ee1470e5f444cc7b918c6d93
DIFF: https://github.com/llvm/llvm-project/commit/db04bec5f1eeb581ee1470e5f444cc7b918c6d93.diff
LOG: [SDAG] Do not convert undef to 0 when folding CONCAT/BUILD_VECTOR
Differential Revision: https://reviews.llvm.org/D88273
Added:
llvm/test/CodeGen/Hexagon/autohvx/isel-undef-not-zero.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index cfb4aa2f0bb5..b9362f1e762d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4370,11 +4370,16 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
for (SDValue Op : Elts)
SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
- if (SVT.bitsGT(VT.getScalarType()))
- for (SDValue &Op : Elts)
- Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT)
- ? DAG.getZExtOrTrunc(Op, DL, SVT)
- : DAG.getSExtOrTrunc(Op, DL, SVT);
+ if (SVT.bitsGT(VT.getScalarType())) {
+ for (SDValue &Op : Elts) {
+ if (Op.isUndef())
+ Op = DAG.getUNDEF(SVT);
+ else
+ Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT)
+ ? DAG.getZExtOrTrunc(Op, DL, SVT)
+ : DAG.getSExtOrTrunc(Op, DL, SVT);
+ }
+ }
SDValue V = DAG.getBuildVector(VT, DL, Elts);
NewSDValueDbgMsg(V, "New node fold concat vectors: ", &DAG);
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-undef-not-zero.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-undef-not-zero.ll
new file mode 100644
index 000000000000..f8f0a7211a63
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-undef-not-zero.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s
+
+; Check that we don't generate lots of vinserts (of 0 that should be undef).
+; CHECK: vinsert
+; CHECK: vinsert
+; CHECK-NOT: vinsert
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+define dllexport void @f0(i8* noalias align 128 %a0) #0 {
+b0:
+ %v0 = bitcast i8* %a0 to i32*
+ %v1 = getelementptr inbounds i32, i32* %v0, i32 undef
+ %v2 = bitcast i32* %v1 to <7 x i32>*
+ br label %b1
+
+b1: ; preds = %b0
+ %v3 = load i8, i8* undef, align 1
+ %v4 = insertelement <7 x i8> undef, i8 %v3, i32 0
+ %v5 = shufflevector <7 x i8> %v4, <7 x i8> undef, <7 x i32> zeroinitializer
+ %v6 = zext <7 x i8> %v5 to <7 x i32>
+ %v7 = load <7 x i8>, <7 x i8>* undef, align 1
+ %v8 = zext <7 x i8> %v7 to <7 x i32>
+ %v9 = mul nsw <7 x i32> %v6, %v8
+ %v10 = add nsw <7 x i32> %v9, zeroinitializer
+ store <7 x i32> %v10, <7 x i32>* %v2, align 4
+ ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b" }
+
diff --git a/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll b/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll
index 97997018cff9..da8dbe32e840 100644
--- a/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll
@@ -101,7 +101,6 @@ define <2 x i32> @test_v2f32_ogt_s(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
; AVX512-32-NEXT: kmovw %eax, %k0
; AVX512-32-NEXT: vcomiss 8(%ebp), %xmm2
; AVX512-32-NEXT: seta %al
-; AVX512-32-NEXT: andl $1, %eax
; AVX512-32-NEXT: kmovw %eax, %k1
; AVX512-32-NEXT: kandw %k0, %k1, %k0
; AVX512-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
@@ -122,7 +121,6 @@ define <2 x i32> @test_v2f32_ogt_s(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
; AVX512-64-NEXT: kmovw %eax, %k0
; AVX512-64-NEXT: vcomiss %xmm3, %xmm2
; AVX512-64-NEXT: seta %al
-; AVX512-64-NEXT: andl $1, %eax
; AVX512-64-NEXT: kmovw %eax, %k1
; AVX512-64-NEXT: kandw %k0, %k1, %k0
; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3]
@@ -148,7 +146,6 @@ define <2 x i32> @test_v2f32_ogt_s(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
; AVX512F-32-NEXT: kmovw %eax, %k0
; AVX512F-32-NEXT: vcomiss 8(%ebp), %xmm2
; AVX512F-32-NEXT: seta %al
-; AVX512F-32-NEXT: andl $1, %eax
; AVX512F-32-NEXT: kmovw %eax, %k1
; AVX512F-32-NEXT: kandw %k0, %k1, %k0
; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
@@ -173,7 +170,6 @@ define <2 x i32> @test_v2f32_ogt_s(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
; AVX512F-64-NEXT: kmovw %eax, %k0
; AVX512F-64-NEXT: vcomiss %xmm3, %xmm2
; AVX512F-64-NEXT: seta %al
-; AVX512F-64-NEXT: andl $1, %eax
; AVX512F-64-NEXT: kmovw %eax, %k1
; AVX512F-64-NEXT: kandw %k0, %k1, %k0
; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3]
@@ -299,7 +295,6 @@ define <2 x i32> @test_v2f32_oeq_q(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
; AVX512-32-NEXT: sete %cl
; AVX512-32-NEXT: testb %al, %cl
; AVX512-32-NEXT: setne %al
-; AVX512-32-NEXT: andl $1, %eax
; AVX512-32-NEXT: kmovw %eax, %k1
; AVX512-32-NEXT: kandw %k0, %k1, %k0
; AVX512-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
@@ -319,27 +314,26 @@ define <2 x i32> @test_v2f32_oeq_q(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
;
; AVX512-64-LABEL: test_v2f32_oeq_q:
; AVX512-64: # %bb.0:
-; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
-; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
-; AVX512-64-NEXT: vucomiss %xmm4, %xmm5
+; AVX512-64-NEXT: vucomiss %xmm3, %xmm2
; AVX512-64-NEXT: setnp %al
; AVX512-64-NEXT: sete %cl
; AVX512-64-NEXT: testb %al, %cl
; AVX512-64-NEXT: setne %al
; AVX512-64-NEXT: kmovw %eax, %k0
-; AVX512-64-NEXT: kshiftlw $15, %k0, %k0
-; AVX512-64-NEXT: kshiftrw $14, %k0, %k0
+; AVX512-64-NEXT: movw $-3, %ax
+; AVX512-64-NEXT: kmovw %eax, %k1
+; AVX512-64-NEXT: kandw %k1, %k0, %k0
+; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
; AVX512-64-NEXT: vucomiss %xmm3, %xmm2
; AVX512-64-NEXT: setnp %al
; AVX512-64-NEXT: sete %cl
; AVX512-64-NEXT: testb %al, %cl
; AVX512-64-NEXT: setne %al
-; AVX512-64-NEXT: andl $1, %eax
; AVX512-64-NEXT: kmovw %eax, %k1
-; AVX512-64-NEXT: movw $-3, %ax
-; AVX512-64-NEXT: kmovw %eax, %k2
-; AVX512-64-NEXT: kandw %k2, %k1, %k1
-; AVX512-64-NEXT: korw %k0, %k1, %k1
+; AVX512-64-NEXT: kshiftlw $15, %k1, %k1
+; AVX512-64-NEXT: kshiftrw $14, %k1, %k1
+; AVX512-64-NEXT: korw %k1, %k0, %k1
; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
; AVX512-64-NEXT: retq
;
@@ -358,7 +352,6 @@ define <2 x i32> @test_v2f32_oeq_q(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
; AVX512F-32-NEXT: sete %cl
; AVX512F-32-NEXT: testb %al, %cl
; AVX512F-32-NEXT: setne %al
-; AVX512F-32-NEXT: andl $1, %eax
; AVX512F-32-NEXT: kmovw %eax, %k1
; AVX512F-32-NEXT: kandw %k0, %k1, %k0
; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
@@ -382,27 +375,26 @@ define <2 x i32> @test_v2f32_oeq_q(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
-; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
-; AVX512F-64-NEXT: vucomiss %xmm4, %xmm5
+; AVX512F-64-NEXT: vucomiss %xmm3, %xmm2
; AVX512F-64-NEXT: setnp %al
; AVX512F-64-NEXT: sete %cl
; AVX512F-64-NEXT: testb %al, %cl
; AVX512F-64-NEXT: setne %al
; AVX512F-64-NEXT: kmovw %eax, %k0
-; AVX512F-64-NEXT: kshiftlw $15, %k0, %k0
-; AVX512F-64-NEXT: kshiftrw $14, %k0, %k0
+; AVX512F-64-NEXT: movw $-3, %ax
+; AVX512F-64-NEXT: kmovw %eax, %k1
+; AVX512F-64-NEXT: kandw %k1, %k0, %k0
+; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
; AVX512F-64-NEXT: vucomiss %xmm3, %xmm2
; AVX512F-64-NEXT: setnp %al
; AVX512F-64-NEXT: sete %cl
; AVX512F-64-NEXT: testb %al, %cl
; AVX512F-64-NEXT: setne %al
-; AVX512F-64-NEXT: andl $1, %eax
; AVX512F-64-NEXT: kmovw %eax, %k1
-; AVX512F-64-NEXT: movw $-3, %ax
-; AVX512F-64-NEXT: kmovw %eax, %k2
-; AVX512F-64-NEXT: kandw %k2, %k1, %k1
-; AVX512F-64-NEXT: korw %k0, %k1, %k1
+; AVX512F-64-NEXT: kshiftlw $15, %k1, %k1
+; AVX512F-64-NEXT: kshiftrw $14, %k1, %k1
+; AVX512F-64-NEXT: korw %k1, %k0, %k1
; AVX512F-64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; AVX512F-64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-64-NEXT: vzeroupper
More information about the llvm-commits
mailing list