[llvm] [SelectionDAG] Use getVectorElementPointer in DAGCombiner::replaceStoreOfInsertLoad. (PR #74249)

Sun Dec 3 11:56:03 PST 2023

https://github.com/topperc created https://github.com/llvm/llvm-project/pull/74249

This ensures we clip the index to be in bounds of the vector we are inserting into. If the index is out of bounds the results of the insert element is poison. If we don't clip the index we can write memory that was not part of the original store.

Fixes #74248.

>From cbc0803be24d94ed9c3e43f020b27ad8ab556a9f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Sun, 3 Dec 2023 11:53:31 -0800
Subject: [PATCH] [SelectionDAG] Use getVectorElementPointer in
 DAGCombiner::replaceStoreOfInsertLoad.

This ensures we clip the index to be in bounds of the vector we
are inserting into. If the index is out of bounds the results of
the insert element is poison. If we don't clip the index we can
write memory that was not part of the original store.

Fixes #74248.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  8 ++-
 .../test/CodeGen/Mips/msa/basic_operations.ll | 20 +++++--
 .../CodeGen/RISCV/rvv/fixed-vectors-insert.ll | 52 +++++++------------
 llvm/test/CodeGen/X86/pr59980.ll              |  1 +
 4 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2a3425a42607e..69e3148b8c864 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -21006,20 +21006,18 @@ SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
                               &IsFast) ||
       !IsFast)
     return SDValue();
-  EVT PtrVT = Ptr.getValueType();
 
-  SDValue Offset =
-      DAG.getNode(ISD::MUL, DL, PtrVT, DAG.getZExtOrTrunc(Idx, DL, PtrVT),
-                  DAG.getConstant(EltVT.getSizeInBits() / 8, DL, PtrVT));
-  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, Offset);
   MachinePointerInfo PointerInfo(ST->getAddressSpace());
 
   // If the offset is a known constant then try to recover the pointer
   // info
+  SDValue NewPtr;
   if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
     unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
     NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
     PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
+  } else {
+    NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
   }
 
   return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
diff --git a/llvm/test/CodeGen/Mips/msa/basic_operations.ll b/llvm/test/CodeGen/Mips/msa/basic_operations.ll
index fbae1bda7c665..1f6c430a932e0 100644
--- a/llvm/test/CodeGen/Mips/msa/basic_operations.ll
+++ b/llvm/test/CodeGen/Mips/msa/basic_operations.ll
@@ -1879,6 +1879,7 @@ define void @insert_v16i8_vidx(i32 signext %a) nounwind {
 ; O32-NEXT:    addu $1, $2, $25
 ; O32-NEXT:    lw $2, %got(i32)($1)
 ; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    andi $2, $2, 15
 ; O32-NEXT:    lw $1, %got(v16i8)($1)
 ; O32-NEXT:    addu $1, $1, $2
 ; O32-NEXT:    jr $ra
@@ -1891,6 +1892,7 @@ define void @insert_v16i8_vidx(i32 signext %a) nounwind {
 ; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8_vidx)))
 ; N32-NEXT:    lw $2, %got_disp(i32)($1)
 ; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    andi $2, $2, 15
 ; N32-NEXT:    lw $1, %got_disp(v16i8)($1)
 ; N32-NEXT:    addu $1, $1, $2
 ; N32-NEXT:    jr $ra
@@ -1902,7 +1904,8 @@ define void @insert_v16i8_vidx(i32 signext %a) nounwind {
 ; N64-NEXT:    daddu $1, $1, $25
 ; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8_vidx)))
 ; N64-NEXT:    ld $2, %got_disp(i32)($1)
-; N64-NEXT:    lwu $2, 0($2)
+; N64-NEXT:    lw $2, 0($2)
+; N64-NEXT:    andi $2, $2, 15
 ; N64-NEXT:    ld $1, %got_disp(v16i8)($1)
 ; N64-NEXT:    daddu $1, $1, $2
 ; N64-NEXT:    jr $ra
@@ -1925,6 +1928,7 @@ define void @insert_v8i16_vidx(i32 signext %a) nounwind {
 ; O32-NEXT:    addu $1, $2, $25
 ; O32-NEXT:    lw $2, %got(i32)($1)
 ; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    andi $2, $2, 7
 ; O32-NEXT:    lw $1, %got(v8i16)($1)
 ; O32-NEXT:    lsa $1, $2, $1, 1
 ; O32-NEXT:    jr $ra
@@ -1937,6 +1941,7 @@ define void @insert_v8i16_vidx(i32 signext %a) nounwind {
 ; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16_vidx)))
 ; N32-NEXT:    lw $2, %got_disp(i32)($1)
 ; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    andi $2, $2, 7
 ; N32-NEXT:    lw $1, %got_disp(v8i16)($1)
 ; N32-NEXT:    lsa $1, $2, $1, 1
 ; N32-NEXT:    jr $ra
@@ -1948,7 +1953,8 @@ define void @insert_v8i16_vidx(i32 signext %a) nounwind {
 ; N64-NEXT:    daddu $1, $1, $25
 ; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16_vidx)))
 ; N64-NEXT:    ld $2, %got_disp(i32)($1)
-; N64-NEXT:    lwu $2, 0($2)
+; N64-NEXT:    lw $2, 0($2)
+; N64-NEXT:    andi $2, $2, 7
 ; N64-NEXT:    ld $1, %got_disp(v8i16)($1)
 ; N64-NEXT:    dlsa $1, $2, $1, 1
 ; N64-NEXT:    jr $ra
@@ -1971,6 +1977,7 @@ define void @insert_v4i32_vidx(i32 signext %a) nounwind {
 ; O32-NEXT:    addu $1, $2, $25
 ; O32-NEXT:    lw $2, %got(i32)($1)
 ; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    andi $2, $2, 3
 ; O32-NEXT:    lw $1, %got(v4i32)($1)
 ; O32-NEXT:    lsa $1, $2, $1, 2
 ; O32-NEXT:    jr $ra
@@ -1983,6 +1990,7 @@ define void @insert_v4i32_vidx(i32 signext %a) nounwind {
 ; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32_vidx)))
 ; N32-NEXT:    lw $2, %got_disp(i32)($1)
 ; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    andi $2, $2, 3
 ; N32-NEXT:    lw $1, %got_disp(v4i32)($1)
 ; N32-NEXT:    lsa $1, $2, $1, 2
 ; N32-NEXT:    jr $ra
@@ -1994,7 +2002,8 @@ define void @insert_v4i32_vidx(i32 signext %a) nounwind {
 ; N64-NEXT:    daddu $1, $1, $25
 ; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32_vidx)))
 ; N64-NEXT:    ld $2, %got_disp(i32)($1)
-; N64-NEXT:    lwu $2, 0($2)
+; N64-NEXT:    lw $2, 0($2)
+; N64-NEXT:    andi $2, $2, 3
 ; N64-NEXT:    ld $1, %got_disp(v4i32)($1)
 ; N64-NEXT:    dlsa $1, $2, $1, 2
 ; N64-NEXT:    jr $ra
@@ -2018,6 +2027,7 @@ define void @insert_v2i64_vidx(i64 signext %a) nounwind {
 ; O32-NEXT:    addu $1, $2, $25
 ; O32-NEXT:    lw $2, %got(i32)($1)
 ; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    andi $2, $2, 1
 ; O32-NEXT:    lw $1, %got(v2i64)($1)
 ; O32-NEXT:    lsa $1, $2, $1, 3
 ; O32-NEXT:    sw $5, 4($1)
@@ -2031,6 +2041,7 @@ define void @insert_v2i64_vidx(i64 signext %a) nounwind {
 ; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64_vidx)))
 ; N32-NEXT:    lw $2, %got_disp(i32)($1)
 ; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    andi $2, $2, 1
 ; N32-NEXT:    lw $1, %got_disp(v2i64)($1)
 ; N32-NEXT:    lsa $1, $2, $1, 3
 ; N32-NEXT:    jr $ra
@@ -2042,7 +2053,8 @@ define void @insert_v2i64_vidx(i64 signext %a) nounwind {
 ; N64-NEXT:    daddu $1, $1, $25
 ; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64_vidx)))
 ; N64-NEXT:    ld $2, %got_disp(i32)($1)
-; N64-NEXT:    lwu $2, 0($2)
+; N64-NEXT:    lw $2, 0($2)
+; N64-NEXT:    andi $2, $2, 1
 ; N64-NEXT:    ld $1, %got_disp(v2i64)($1)
 ; N64-NEXT:    dlsa $1, $2, $1, 3
 ; N64-NEXT:    jr $ra
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index a3f41fd842222..7e7b7403dbdfb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -321,20 +321,13 @@ define <32 x i16> @insertelt_v32i16(<32 x i16> %a, i16 %y, i32 %idx) {
 }
 
 define void @insertelt_v32i16_store(ptr %x, i16 %y, i32 %idx) {
-; RV32-LABEL: insertelt_v32i16_store:
-; RV32:       # %bb.0:
-; RV32-NEXT:    slli a2, a2, 1
-; RV32-NEXT:    add a0, a0, a2
-; RV32-NEXT:    sh a1, 0(a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: insertelt_v32i16_store:
-; RV64:       # %bb.0:
-; RV64-NEXT:    slli a2, a2, 32
-; RV64-NEXT:    srli a2, a2, 31
-; RV64-NEXT:    add a0, a0, a2
-; RV64-NEXT:    sh a1, 0(a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: insertelt_v32i16_store:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi a2, a2, 31
+; CHECK-NEXT:    slli a2, a2, 1
+; CHECK-NEXT:    add a0, a0, a2
+; CHECK-NEXT:    sh a1, 0(a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i16>, ptr %x
   %b = insertelement <32 x i16> %a, i16 %y, i32 %idx
   store <32 x i16> %b, ptr %x
@@ -366,20 +359,13 @@ define <8 x float> @insertelt_v8f32(<8 x float> %a, float %y, i32 %idx) {
 }
 
 define void @insertelt_v8f32_store(ptr %x, float %y, i32 %idx) {
-; RV32-LABEL: insertelt_v8f32_store:
-; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    fsw fa0, 0(a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: insertelt_v8f32_store:
-; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 30
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    fsw fa0, 0(a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: insertelt_v8f32_store:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi a1, a1, 7
+; CHECK-NEXT:    slli a1, a1, 2
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    fsw fa0, 0(a0)
+; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = insertelement <8 x float> %a, float %y, i32 %idx
   store <8 x float> %b, ptr %x
@@ -443,6 +429,7 @@ define <8 x i64> @insertelt_v8i64(<8 x i64> %a, i32 %idx) {
 define void @insertelt_v8i64_store(ptr %x, i32 %idx) {
 ; RV32-LABEL: insertelt_v8i64_store:
 ; RV32:       # %bb.0:
+; RV32-NEXT:    andi a1, a1, 7
 ; RV32-NEXT:    slli a1, a1, 3
 ; RV32-NEXT:    add a0, a0, a1
 ; RV32-NEXT:    li a1, -1
@@ -452,8 +439,8 @@ define void @insertelt_v8i64_store(ptr %x, i32 %idx) {
 ;
 ; RV64-LABEL: insertelt_v8i64_store:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 29
+; RV64-NEXT:    andi a1, a1, 7
+; RV64-NEXT:    slli a1, a1, 3
 ; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    li a1, -1
 ; RV64-NEXT:    sd a1, 0(a0)
@@ -521,6 +508,7 @@ define <8 x i64> @insertelt_c6_v8i64(<8 x i64> %a, i32 %idx) {
 define void @insertelt_c6_v8i64_store(ptr %x, i32 %idx) {
 ; RV32-LABEL: insertelt_c6_v8i64_store:
 ; RV32:       # %bb.0:
+; RV32-NEXT:    andi a1, a1, 7
 ; RV32-NEXT:    slli a1, a1, 3
 ; RV32-NEXT:    add a0, a0, a1
 ; RV32-NEXT:    sw zero, 4(a0)
@@ -530,8 +518,8 @@ define void @insertelt_c6_v8i64_store(ptr %x, i32 %idx) {
 ;
 ; RV64-LABEL: insertelt_c6_v8i64_store:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 29
+; RV64-NEXT:    andi a1, a1, 7
+; RV64-NEXT:    slli a1, a1, 3
 ; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    li a1, 6
 ; RV64-NEXT:    sd a1, 0(a0)
diff --git a/llvm/test/CodeGen/X86/pr59980.ll b/llvm/test/CodeGen/X86/pr59980.ll
index 0823f960724e2..a6d22c2244c89 100644
--- a/llvm/test/CodeGen/X86/pr59980.ll
+++ b/llvm/test/CodeGen/X86/pr59980.ll
@@ -9,6 +9,7 @@ define void @foo(ptr %0, ptr %1, ptr %2) #0 {
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl (%rdx), %eax
 ; CHECK-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
+; CHECK-NEXT:    andl $15, %eax
 ; CHECK-NEXT:    vpextrw $0, %xmm0, (%rsi,%rax,2)
 ; CHECK-NEXT:    retq
   %4 = bitcast ptr %2 to ptr