[llvm] 85a2146 - [SDAG] fold insert_vector_elt with undef index
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 27 12:28:54 PDT 2019
Author: Sanjay Patel
Date: 2019-10-27T15:28:43-04:00
New Revision: 85a2146c155953d5bdfb2e7e6ba9780fc2dab1b9
URL: https://github.com/llvm/llvm-project/commit/85a2146c155953d5bdfb2e7e6ba9780fc2dab1b9
DIFF: https://github.com/llvm/llvm-project/commit/85a2146c155953d5bdfb2e7e6ba9780fc2dab1b9.diff
LOG: [SDAG] fold insert_vector_elt with undef index
Similar to:
rG4c47617627fb
This makes the DAG behavior consistent with IR's insertelement.
https://bugs.llvm.org/show_bug.cgi?id=42689
I've tried to maintain test intent for AArch64 and WebAssembly
by replacing undef index operands with something else.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll
llvm/test/CodeGen/WebAssembly/simd.ll
llvm/test/CodeGen/X86/insertelement-var-index.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f00e332de9c0..459479394821 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16574,10 +16574,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue EltNo = N->getOperand(2);
SDLoc DL(N);
- // If the inserted element is an UNDEF, just use the input vector.
- if (InVal.isUndef())
- return InVec;
-
EVT VT = InVec.getValueType();
unsigned NumElts = VT.getVectorNumElements();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 8ea7ce94da1d..4ffc55c62f2a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5506,6 +5506,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF
if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
return getUNDEF(VT);
+
+ // Undefined index can be assumed out-of-bounds, so that's UNDEF too.
+ if (N3.isUndef())
+ return getUNDEF(VT);
+
+ // If the inserted element is an UNDEF, just use the input vector.
+ if (N2.isUndef())
+ return N1;
+
break;
}
case ISD::INSERT_SUBVECTOR: {
diff --git a/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll b/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll
index 52333463c243..4b42500e16b1 100644
--- a/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll
@@ -7,6 +7,8 @@
; CHECK: fmla
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
; CHECK-NEXT: fmla
; CHECK-NEXT: fmla
; CHECK-NEXT: fmla
@@ -16,21 +18,17 @@ target triple = "aarch64--linux-gnu"
%Struct = type { i64*, [9 x double], [16 x {float, float}], [16 x {float, float}], i32, i32 }
; Function Attrs: nounwind
-define linkonce_odr void @func(%Struct* nocapture %this) unnamed_addr #0 align 2 {
+define linkonce_odr void @func(%Struct* nocapture %this, <4 x float> %f) unnamed_addr #0 align 2 {
entry:
- %0 = insertelement <4 x float> undef, float undef, i32 0
- %1 = insertelement <4 x float> %0, float undef, i32 1
- %2 = insertelement <4 x float> %1, float undef, i32 2
- %3 = insertelement <4 x float> %2, float undef, i32 3
%scevgep = getelementptr %Struct, %Struct* %this, i64 0, i32 2, i64 8, i32 0
%struct_ptr = bitcast float* %scevgep to i8*
%vec1 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0i8(i8* %struct_ptr)
%ev1 = extractvalue { <4 x float>, <4 x float> } %vec1, 1
- %fm1 = fmul <4 x float> %0, %ev1
- %av1 = fadd <4 x float> %1, %fm1
+ %fm1 = fmul <4 x float> %f, %ev1
+ %av1 = fadd <4 x float> %f, %fm1
%ev2 = extractvalue { <4 x float>, <4 x float> } %vec1, 0
- %fm2 = fmul <4 x float> %2, %ev2
- %av2 = fadd <4 x float> %3, %fm2
+ %fm2 = fmul <4 x float> %f, %ev2
+ %av2 = fadd <4 x float> %f, %fm2
%scevgep2 = getelementptr %Struct, %Struct* %this, i64 0, i32 3, i64 8, i32 0
%struct_ptr2 = bitcast float* %scevgep2 to i8*
tail call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> %av2, <4 x float> %av1, i8* %struct_ptr2)
@@ -38,11 +36,11 @@ entry:
%struct_ptr3 = bitcast float* %scevgep3 to i8*
%vec2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0i8(i8* %struct_ptr3)
%ev3 = extractvalue { <4 x float>, <4 x float> } %vec2, 1
- %fm3 = fmul <4 x float> %0, %ev3
- %av3 = fadd <4 x float> %1, %fm3
+ %fm3 = fmul <4 x float> %f, %ev3
+ %av3 = fadd <4 x float> %f, %fm3
%ev4 = extractvalue { <4 x float>, <4 x float> } %vec2, 0
- %fm4 = fmul <4 x float> %2, %ev4
- %av4 = fadd <4 x float> %3, %fm4
+ %fm4 = fmul <4 x float> %f, %ev4
+ %av4 = fadd <4 x float> %f, %fm4
%scevgep4 = getelementptr %Struct, %Struct* %this, i64 0, i32 3, i64 12, i32 0
%struct_ptr4 = bitcast float* %scevgep4 to i8*
tail call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> %av4, <4 x float> %av3, i8* %struct_ptr4)
diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll
index e13646c2aa55..b2d063806517 100644
--- a/llvm/test/CodeGen/WebAssembly/simd.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd.ll
@@ -193,13 +193,13 @@ define <16 x i8> @replace_var_v16i8(<16 x i8> %v, i32 %i, i8 %x) {
ret <16 x i8> %res
}
-; CHECK-LABEL: replace_undef_v16i8:
+; CHECK-LABEL: replace_zero_v16i8:
; NO-SIMD128-NOT: i8x16
-; SIMD128-NEXT: .functype replace_undef_v16i8 (v128, i32) -> (v128){{$}}
+; SIMD128-NEXT: .functype replace_zero_v16i8 (v128, i32) -> (v128){{$}}
; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <16 x i8> @replace_undef_v16i8(<16 x i8> %v, i8 %x) {
- %res = insertelement <16 x i8> %v, i8 %x, i32 undef
+define <16 x i8> @replace_zero_v16i8(<16 x i8> %v, i8 %x) {
+ %res = insertelement <16 x i8> %v, i8 %x, i32 0
ret <16 x i8> %res
}
@@ -464,13 +464,13 @@ define <8 x i16> @replace_var_v8i16(<8 x i16> %v, i32 %i, i16 %x) {
ret <8 x i16> %res
}
-; CHECK-LABEL: replace_undef_v8i16:
+; CHECK-LABEL: replace_zero_v8i16:
; NO-SIMD128-NOT: i16x8
-; SIMD128-NEXT: .functype replace_undef_v8i16 (v128, i32) -> (v128){{$}}
+; SIMD128-NEXT: .functype replace_zero_v8i16 (v128, i32) -> (v128){{$}}
; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <8 x i16> @replace_undef_v8i16(<8 x i16> %v, i16 %x) {
- %res = insertelement <8 x i16> %v, i16 %x, i32 undef
+define <8 x i16> @replace_zero_v8i16(<8 x i16> %v, i16 %x) {
+ %res = insertelement <8 x i16> %v, i16 %x, i32 0
ret <8 x i16> %res
}
@@ -625,13 +625,13 @@ define <4 x i32> @replace_var_v4i32(<4 x i32> %v, i32 %i, i32 %x) {
ret <4 x i32> %res
}
-; CHECK-LABEL: replace_undef_v4i32:
+; CHECK-LABEL: replace_zero_v4i32:
; NO-SIMD128-NOT: i32x4
-; SIMD128-NEXT: .functype replace_undef_v4i32 (v128, i32) -> (v128){{$}}
+; SIMD128-NEXT: .functype replace_zero_v4i32 (v128, i32) -> (v128){{$}}
; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <4 x i32> @replace_undef_v4i32(<4 x i32> %v, i32 %x) {
- %res = insertelement <4 x i32> %v, i32 %x, i32 undef
+define <4 x i32> @replace_zero_v4i32(<4 x i32> %v, i32 %x) {
+ %res = insertelement <4 x i32> %v, i32 %x, i32 0
ret <4 x i32> %res
}
@@ -781,14 +781,14 @@ define <2 x i64> @replace_var_v2i64(<2 x i64> %v, i32 %i, i64 %x) {
ret <2 x i64> %res
}
-; CHECK-LABEL: replace_undef_v2i64:
+; CHECK-LABEL: replace_zero_v2i64:
; NO-SIMD128-NOT: i64x2
; SIMD128-VM-NOT: i64x2
-; SIMD128-NEXT: .functype replace_undef_v2i64 (v128, i64) -> (v128){{$}}
+; SIMD128-NEXT: .functype replace_zero_v2i64 (v128, i64) -> (v128){{$}}
; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <2 x i64> @replace_undef_v2i64(<2 x i64> %v, i64 %x) {
- %res = insertelement <2 x i64> %v, i64 %x, i32 undef
+define <2 x i64> @replace_zero_v2i64(<2 x i64> %v, i64 %x) {
+ %res = insertelement <2 x i64> %v, i64 %x, i32 0
ret <2 x i64> %res
}
@@ -931,13 +931,13 @@ define <4 x float> @replace_var_v4f32(<4 x float> %v, i32 %i, float %x) {
ret <4 x float> %res
}
-; CHECK-LABEL: replace_undef_v4f32:
+; CHECK-LABEL: replace_zero_v4f32:
; NO-SIMD128-NOT: f32x4
-; SIMD128-NEXT: .functype replace_undef_v4f32 (v128, f32) -> (v128){{$}}
+; SIMD128-NEXT: .functype replace_zero_v4f32 (v128, f32) -> (v128){{$}}
; SIMD128-NEXT: f32x4.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <4 x float> @replace_undef_v4f32(<4 x float> %v, float %x) {
- %res = insertelement <4 x float> %v, float %x, i32 undef
+define <4 x float> @replace_zero_v4f32(<4 x float> %v, float %x) {
+ %res = insertelement <4 x float> %v, float %x, i32 0
ret <4 x float> %res
}
@@ -1086,14 +1086,14 @@ define <2 x double> @replace_var_v2f64(<2 x double> %v, i32 %i, double %x) {
ret <2 x double> %res
}
-; CHECK-LABEL: replace_undef_v2f64:
+; CHECK-LABEL: replace_zero_v2f64:
; NO-SIMD128-NOT: f64x2
; SIMD128-VM-NOT: f64x2
-; SIMD128-NEXT: .functype replace_undef_v2f64 (v128, f64) -> (v128){{$}}
+; SIMD128-NEXT: .functype replace_zero_v2f64 (v128, f64) -> (v128){{$}}
; SIMD128-NEXT: f64x2.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <2 x double> @replace_undef_v2f64(<2 x double> %v, double %x) {
- %res = insertelement <2 x double> %v, double %x, i32 undef
+define <2 x double> @replace_zero_v2f64(<2 x double> %v, double %x) {
+ %res = insertelement <2 x double> %v, double %x, i32 0
ret <2 x double> %res
}
diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll
index cbb29202cca2..c6ab2cd7fa9b 100644
--- a/llvm/test/CodeGen/X86/insertelement-var-index.ll
+++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll
@@ -3,6 +3,22 @@
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX2
+define <16 x i8> @undef_index(i8 %x) nounwind {
+; ALL-LABEL: undef_index:
+; ALL: # %bb.0:
+; ALL-NEXT: retq
+ %ins = insertelement <16 x i8> undef, i8 %x, i64 undef
+ ret <16 x i8> %ins
+}
+
+define <16 x i8> @undef_scalar(<16 x i8> %x, i32 %index) nounwind {
+; ALL-LABEL: undef_scalar:
+; ALL: # %bb.0:
+; ALL-NEXT: retq
+ %ins = insertelement <16 x i8> %x, i8 undef, i32 %index
+ ret <16 x i8> %ins
+}
+
define <16 x i8> @arg_i8_v16i8(i8 %x, i32 %y) nounwind {
; SSE-LABEL: arg_i8_v16i8:
; SSE: # %bb.0:
More information about the llvm-commits
mailing list