[llvm] [LegalizeDAG] Freeze index when converting insert_elt/insert_subvector to load/store on stack. (PR #86850)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 27 11:39:49 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
@llvm/pr-subscribers-backend-loongarch
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
We try clamp the index to be within the bounds of the stack object we create, but if we don't freeze it, poison can propagate into the clamp code. This can cause the access to leave the bounds of the stack object.
We have other instances of this issue in type legalization and extract_elt/subvector, but posting this patch first for direction check.
Fixes #<!-- -->86717
---
Full diff: https://github.com/llvm/llvm-project/pull/86850.diff
5 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (+3)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll (+24-18)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll (+24-18)
- (modified) llvm/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/insertelement-var-index.ll (+24-20)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index e10b8bc8c5e2eb..24f69ea1b742a6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1455,6 +1455,9 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
// First store the whole vector.
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
+ // Freeze the index so we don't poison the clamping code we're about to emit.
+ Idx = DAG.getFreeze(Idx);
+
// Then store the inserted part.
if (PartVT.isVector()) {
SDValue SubStackPtr =
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
index 25106b456d2f7a..6629d34405492c 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
@@ -123,9 +123,10 @@ define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind {
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a3, 4, 0
-; CHECK-NEXT: st.b $a2, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
+; CHECK-NEXT: addi.d $a3, $sp, 0
+; CHECK-NEXT: bstrins.d $a3, $a0, 4, 0
+; CHECK-NEXT: st.b $a2, $a3, 0
; CHECK-NEXT: xvld $xr0, $sp, 0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
@@ -149,9 +150,10 @@ define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind {
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a3, 4, 1
-; CHECK-NEXT: st.h $a2, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
+; CHECK-NEXT: addi.d $a3, $sp, 0
+; CHECK-NEXT: bstrins.d $a3, $a0, 4, 1
+; CHECK-NEXT: st.h $a2, $a3, 0
; CHECK-NEXT: xvld $xr0, $sp, 0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
@@ -175,9 +177,10 @@ define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind {
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a3, 4, 2
-; CHECK-NEXT: st.w $a2, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
+; CHECK-NEXT: addi.d $a3, $sp, 0
+; CHECK-NEXT: bstrins.d $a3, $a0, 4, 2
+; CHECK-NEXT: st.w $a2, $a3, 0
; CHECK-NEXT: xvld $xr0, $sp, 0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
@@ -201,9 +204,10 @@ define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind {
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a3, 4, 3
-; CHECK-NEXT: st.d $a2, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
+; CHECK-NEXT: addi.d $a3, $sp, 0
+; CHECK-NEXT: bstrins.d $a3, $a0, 4, 3
+; CHECK-NEXT: st.d $a2, $a3, 0
; CHECK-NEXT: xvld $xr0, $sp, 0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
@@ -227,9 +231,10 @@ define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwin
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr1, $a0, 0
; CHECK-NEXT: xvst $xr1, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2
-; CHECK-NEXT: fst.s $fa0, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
+; CHECK-NEXT: addi.d $a2, $sp, 0
+; CHECK-NEXT: bstrins.d $a2, $a0, 4, 2
+; CHECK-NEXT: fst.s $fa0, $a2, 0
; CHECK-NEXT: xvld $xr0, $sp, 0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
@@ -253,9 +258,10 @@ define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounw
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr1, $a0, 0
; CHECK-NEXT: xvst $xr1, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3
-; CHECK-NEXT: fst.d $fa0, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
+; CHECK-NEXT: addi.d $a2, $sp, 0
+; CHECK-NEXT: bstrins.d $a2, $a0, 4, 3
+; CHECK-NEXT: fst.d $fa0, $a2, 0
; CHECK-NEXT: xvld $xr0, $sp, 0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll
index 7f232073ae129c..19171b7d8ed784 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll
@@ -87,9 +87,10 @@ define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind {
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vst $vr0, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a3, 3, 0
-; CHECK-NEXT: st.b $a2, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
+; CHECK-NEXT: addi.d $a3, $sp, 0
+; CHECK-NEXT: bstrins.d $a3, $a0, 3, 0
+; CHECK-NEXT: st.b $a2, $a3, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
@@ -106,9 +107,10 @@ define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind {
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vst $vr0, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a3, 3, 1
-; CHECK-NEXT: st.h $a2, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
+; CHECK-NEXT: addi.d $a3, $sp, 0
+; CHECK-NEXT: bstrins.d $a3, $a0, 3, 1
+; CHECK-NEXT: st.h $a2, $a3, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
@@ -125,9 +127,10 @@ define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind {
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vst $vr0, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a3, 3, 2
-; CHECK-NEXT: st.w $a2, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
+; CHECK-NEXT: addi.d $a3, $sp, 0
+; CHECK-NEXT: bstrins.d $a3, $a0, 3, 2
+; CHECK-NEXT: st.w $a2, $a3, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
@@ -144,9 +147,10 @@ define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind {
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vst $vr0, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a3, 3, 3
-; CHECK-NEXT: st.d $a2, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
+; CHECK-NEXT: addi.d $a3, $sp, 0
+; CHECK-NEXT: bstrins.d $a3, $a0, 3, 3
+; CHECK-NEXT: st.d $a2, $a3, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
@@ -163,9 +167,10 @@ define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwi
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr1, $a0, 0
; CHECK-NEXT: vst $vr1, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a2, 3, 2
-; CHECK-NEXT: fst.s $fa0, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
+; CHECK-NEXT: addi.d $a2, $sp, 0
+; CHECK-NEXT: bstrins.d $a2, $a0, 3, 2
+; CHECK-NEXT: fst.s $fa0, $a2, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
@@ -182,9 +187,10 @@ define void @insert_2xdouble_idx(ptr %src, ptr %dst, double %ins, i32 %idx) noun
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr1, $a0, 0
; CHECK-NEXT: vst $vr1, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 0
-; CHECK-NEXT: bstrins.d $a0, $a2, 3, 3
-; CHECK-NEXT: fst.d $fa0, $a0, 0
+; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
+; CHECK-NEXT: addi.d $a2, $sp, 0
+; CHECK-NEXT: bstrins.d $a2, $a0, 3, 3
+; CHECK-NEXT: fst.d $fa0, $a2, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
diff --git a/llvm/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll b/llvm/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
index 535450a52ff60e..695a2d0cd806e0 100644
--- a/llvm/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
+++ b/llvm/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
@@ -9,11 +9,11 @@ define <2 x i64> @_mm_insert_epi16(<2 x i64> %a, i32 %b, i32 %imm) nounwind read
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $32, %esp
-; X86-NEXT: movzwl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: andl $7, %ecx
+; X86-NEXT: movl 12(%ebp), %eax
+; X86-NEXT: movzwl 8(%ebp), %ecx
+; X86-NEXT: andl $7, %eax
; X86-NEXT: movaps %xmm0, (%esp)
-; X86-NEXT: movw %ax, (%esp,%ecx,2)
+; X86-NEXT: movw %cx, (%esp,%eax,2)
; X86-NEXT: movaps (%esp), %xmm0
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll
index 8ed8495d7a4614..5420e6b5ce86f3 100644
--- a/llvm/test/CodeGen/X86/insertelement-var-index.ll
+++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll
@@ -1009,18 +1009,19 @@ define <2 x i64> @arg_i64_v2i64(<2 x i64> %v, i64 %x, i32 %y) nounwind {
; X86AVX2-NEXT: pushl %esi
; X86AVX2-NEXT: andl $-16, %esp
; X86AVX2-NEXT: subl $48, %esp
-; X86AVX2-NEXT: movl 8(%ebp), %eax
-; X86AVX2-NEXT: movl 12(%ebp), %ecx
-; X86AVX2-NEXT: movl 16(%ebp), %edx
+; X86AVX2-NEXT: movl 8(%ebp), %edx
+; X86AVX2-NEXT: movl 12(%ebp), %eax
+; X86AVX2-NEXT: movl 16(%ebp), %ecx
; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
-; X86AVX2-NEXT: leal (%edx,%edx), %esi
+; X86AVX2-NEXT: addl %ecx, %ecx
+; X86AVX2-NEXT: movl %ecx, %esi
; X86AVX2-NEXT: andl $3, %esi
-; X86AVX2-NEXT: movl %eax, (%esp,%esi,4)
+; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
; X86AVX2-NEXT: vmovaps (%esp), %xmm0
; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
-; X86AVX2-NEXT: leal 1(%edx,%edx), %eax
-; X86AVX2-NEXT: andl $3, %eax
-; X86AVX2-NEXT: movl %ecx, 16(%esp,%eax,4)
+; X86AVX2-NEXT: incl %ecx
+; X86AVX2-NEXT: andl $3, %ecx
+; X86AVX2-NEXT: movl %eax, 16(%esp,%ecx,4)
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
; X86AVX2-NEXT: leal -4(%ebp), %esp
; X86AVX2-NEXT: popl %esi
@@ -1362,12 +1363,13 @@ define <2 x i64> @load_i64_v2i64(<2 x i64> %v, ptr %p, i32 %y) nounwind {
; X86AVX2-NEXT: movl (%ecx), %edx
; X86AVX2-NEXT: movl 4(%ecx), %ecx
; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
-; X86AVX2-NEXT: leal (%eax,%eax), %esi
+; X86AVX2-NEXT: addl %eax, %eax
+; X86AVX2-NEXT: movl %eax, %esi
; X86AVX2-NEXT: andl $3, %esi
; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
; X86AVX2-NEXT: vmovaps (%esp), %xmm0
; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
-; X86AVX2-NEXT: leal 1(%eax,%eax), %eax
+; X86AVX2-NEXT: incl %eax
; X86AVX2-NEXT: andl $3, %eax
; X86AVX2-NEXT: movl %ecx, 16(%esp,%eax,4)
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
@@ -1742,18 +1744,19 @@ define <4 x i64> @arg_i64_v4i64(<4 x i64> %v, i64 %x, i32 %y) nounwind {
; X86AVX2-NEXT: pushl %esi
; X86AVX2-NEXT: andl $-32, %esp
; X86AVX2-NEXT: subl $96, %esp
-; X86AVX2-NEXT: movl 8(%ebp), %eax
-; X86AVX2-NEXT: movl 12(%ebp), %ecx
-; X86AVX2-NEXT: movl 16(%ebp), %edx
+; X86AVX2-NEXT: movl 8(%ebp), %edx
+; X86AVX2-NEXT: movl 12(%ebp), %eax
+; X86AVX2-NEXT: movl 16(%ebp), %ecx
; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
-; X86AVX2-NEXT: leal (%edx,%edx), %esi
+; X86AVX2-NEXT: addl %ecx, %ecx
+; X86AVX2-NEXT: movl %ecx, %esi
; X86AVX2-NEXT: andl $7, %esi
-; X86AVX2-NEXT: movl %eax, (%esp,%esi,4)
+; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
; X86AVX2-NEXT: vmovaps (%esp), %ymm0
; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
-; X86AVX2-NEXT: leal 1(%edx,%edx), %eax
-; X86AVX2-NEXT: andl $7, %eax
-; X86AVX2-NEXT: movl %ecx, 32(%esp,%eax,4)
+; X86AVX2-NEXT: incl %ecx
+; X86AVX2-NEXT: andl $7, %ecx
+; X86AVX2-NEXT: movl %eax, 32(%esp,%ecx,4)
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0
; X86AVX2-NEXT: leal -4(%ebp), %esp
; X86AVX2-NEXT: popl %esi
@@ -2128,12 +2131,13 @@ define <4 x i64> @load_i64_v4i64(<4 x i64> %v, ptr %p, i32 %y) nounwind {
; X86AVX2-NEXT: movl (%ecx), %edx
; X86AVX2-NEXT: movl 4(%ecx), %ecx
; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
-; X86AVX2-NEXT: leal (%eax,%eax), %esi
+; X86AVX2-NEXT: addl %eax, %eax
+; X86AVX2-NEXT: movl %eax, %esi
; X86AVX2-NEXT: andl $7, %esi
; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
; X86AVX2-NEXT: vmovaps (%esp), %ymm0
; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
-; X86AVX2-NEXT: leal 1(%eax,%eax), %eax
+; X86AVX2-NEXT: incl %eax
; X86AVX2-NEXT: andl $7, %eax
; X86AVX2-NEXT: movl %ecx, 32(%esp,%eax,4)
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0
``````````
</details>
https://github.com/llvm/llvm-project/pull/86850
More information about the llvm-commits
mailing list