[llvm-branch-commits] [llvm] 5bc391a - [SelectionDAG] Use getVectorElementPointer in DAGCombiner::replaceStoreOfInsertLoad. (#74249)

Mon Dec 4 11:13:15 PST 2023

Author: Craig Topper
Date: 2023-12-04T11:11:37-08:00
New Revision: 5bc391a7c96a58929794ec1dc664b966c0a03d09

URL: https://github.com/llvm/llvm-project/commit/5bc391a7c96a58929794ec1dc664b966c0a03d09
DIFF: https://github.com/llvm/llvm-project/commit/5bc391a7c96a58929794ec1dc664b966c0a03d09.diff

LOG: [SelectionDAG] Use getVectorElementPointer in DAGCombiner::replaceStoreOfInsertLoad. (#74249)

This ensures we clip the index to be in bounds of the vector we are
inserting into. If the index is out of bounds the results of the insert
element is poison. If we don't clip the index we can write memory that
was not part of the original store.

Fixes #74248.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/Mips/msa/basic_operations.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
    llvm/test/CodeGen/X86/pr59980.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2a3425a42607e..69e3148b8c864 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -21006,20 +21006,18 @@ SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
                               &IsFast) ||
       !IsFast)
     return SDValue();
-  EVT PtrVT = Ptr.getValueType();
 
-  SDValue Offset =
-      DAG.getNode(ISD::MUL, DL, PtrVT, DAG.getZExtOrTrunc(Idx, DL, PtrVT),
-                  DAG.getConstant(EltVT.getSizeInBits() / 8, DL, PtrVT));
-  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, Offset);
   MachinePointerInfo PointerInfo(ST->getAddressSpace());
 
   // If the offset is a known constant then try to recover the pointer
   // info
+  SDValue NewPtr;
   if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
     unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
     NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
     PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
+  } else {
+    NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
   }
 
   return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),

diff  --git a/llvm/test/CodeGen/Mips/msa/basic_operations.ll b/llvm/test/CodeGen/Mips/msa/basic_operations.ll
index fbae1bda7c665..1f6c430a932e0 100644
--- a/llvm/test/CodeGen/Mips/msa/basic_operations.ll
+++ b/llvm/test/CodeGen/Mips/msa/basic_operations.ll
@@ -1879,6 +1879,7 @@ define void @insert_v16i8_vidx(i32 signext %a) nounwind {
 ; O32-NEXT:    addu $1, $2, $25
 ; O32-NEXT:    lw $2, %got(i32)($1)
 ; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    andi $2, $2, 15
 ; O32-NEXT:    lw $1, %got(v16i8)($1)
 ; O32-NEXT:    addu $1, $1, $2
 ; O32-NEXT:    jr $ra
@@ -1891,6 +1892,7 @@ define void @insert_v16i8_vidx(i32 signext %a) nounwind {
 ; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8_vidx)))
 ; N32-NEXT:    lw $2, %got_disp(i32)($1)
 ; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    andi $2, $2, 15
 ; N32-NEXT:    lw $1, %got_disp(v16i8)($1)
 ; N32-NEXT:    addu $1, $1, $2
 ; N32-NEXT:    jr $ra
@@ -1902,7 +1904,8 @@ define void @insert_v16i8_vidx(i32 signext %a) nounwind {
 ; N64-NEXT:    daddu $1, $1, $25
 ; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8_vidx)))
 ; N64-NEXT:    ld $2, %got_disp(i32)($1)
-; N64-NEXT:    lwu $2, 0($2)
+; N64-NEXT:    lw $2, 0($2)
+; N64-NEXT:    andi $2, $2, 15
 ; N64-NEXT:    ld $1, %got_disp(v16i8)($1)
 ; N64-NEXT:    daddu $1, $1, $2
 ; N64-NEXT:    jr $ra
@@ -1925,6 +1928,7 @@ define void @insert_v8i16_vidx(i32 signext %a) nounwind {
 ; O32-NEXT:    addu $1, $2, $25
 ; O32-NEXT:    lw $2, %got(i32)($1)
 ; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    andi $2, $2, 7
 ; O32-NEXT:    lw $1, %got(v8i16)($1)
 ; O32-NEXT:    lsa $1, $2, $1, 1
 ; O32-NEXT:    jr $ra
@@ -1937,6 +1941,7 @@ define void @insert_v8i16_vidx(i32 signext %a) nounwind {
 ; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16_vidx)))
 ; N32-NEXT:    lw $2, %got_disp(i32)($1)
 ; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    andi $2, $2, 7
 ; N32-NEXT:    lw $1, %got_disp(v8i16)($1)
 ; N32-NEXT:    lsa $1, $2, $1, 1
 ; N32-NEXT:    jr $ra
@@ -1948,7 +1953,8 @@ define void @insert_v8i16_vidx(i32 signext %a) nounwind {
 ; N64-NEXT:    daddu $1, $1, $25
 ; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16_vidx)))
 ; N64-NEXT:    ld $2, %got_disp(i32)($1)
-; N64-NEXT:    lwu $2, 0($2)
+; N64-NEXT:    lw $2, 0($2)
+; N64-NEXT:    andi $2, $2, 7
 ; N64-NEXT:    ld $1, %got_disp(v8i16)($1)
 ; N64-NEXT:    dlsa $1, $2, $1, 1
 ; N64-NEXT:    jr $ra
@@ -1971,6 +1977,7 @@ define void @insert_v4i32_vidx(i32 signext %a) nounwind {
 ; O32-NEXT:    addu $1, $2, $25
 ; O32-NEXT:    lw $2, %got(i32)($1)
 ; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    andi $2, $2, 3
 ; O32-NEXT:    lw $1, %got(v4i32)($1)
 ; O32-NEXT:    lsa $1, $2, $1, 2
 ; O32-NEXT:    jr $ra
@@ -1983,6 +1990,7 @@ define void @insert_v4i32_vidx(i32 signext %a) nounwind {
 ; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32_vidx)))
 ; N32-NEXT:    lw $2, %got_disp(i32)($1)
 ; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    andi $2, $2, 3
 ; N32-NEXT:    lw $1, %got_disp(v4i32)($1)
 ; N32-NEXT:    lsa $1, $2, $1, 2
 ; N32-NEXT:    jr $ra
@@ -1994,7 +2002,8 @@ define void @insert_v4i32_vidx(i32 signext %a) nounwind {
 ; N64-NEXT:    daddu $1, $1, $25
 ; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32_vidx)))
 ; N64-NEXT:    ld $2, %got_disp(i32)($1)
-; N64-NEXT:    lwu $2, 0($2)
+; N64-NEXT:    lw $2, 0($2)
+; N64-NEXT:    andi $2, $2, 3
 ; N64-NEXT:    ld $1, %got_disp(v4i32)($1)
 ; N64-NEXT:    dlsa $1, $2, $1, 2
 ; N64-NEXT:    jr $ra
@@ -2018,6 +2027,7 @@ define void @insert_v2i64_vidx(i64 signext %a) nounwind {
 ; O32-NEXT:    addu $1, $2, $25
 ; O32-NEXT:    lw $2, %got(i32)($1)
 ; O32-NEXT:    lw $2, 0($2)
+; O32-NEXT:    andi $2, $2, 1
 ; O32-NEXT:    lw $1, %got(v2i64)($1)
 ; O32-NEXT:    lsa $1, $2, $1, 3
 ; O32-NEXT:    sw $5, 4($1)
@@ -2031,6 +2041,7 @@ define void @insert_v2i64_vidx(i64 signext %a) nounwind {
 ; N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64_vidx)))
 ; N32-NEXT:    lw $2, %got_disp(i32)($1)
 ; N32-NEXT:    lw $2, 0($2)
+; N32-NEXT:    andi $2, $2, 1
 ; N32-NEXT:    lw $1, %got_disp(v2i64)($1)
 ; N32-NEXT:    lsa $1, $2, $1, 3
 ; N32-NEXT:    jr $ra
@@ -2042,7 +2053,8 @@ define void @insert_v2i64_vidx(i64 signext %a) nounwind {
 ; N64-NEXT:    daddu $1, $1, $25
 ; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64_vidx)))
 ; N64-NEXT:    ld $2, %got_disp(i32)($1)
-; N64-NEXT:    lwu $2, 0($2)
+; N64-NEXT:    lw $2, 0($2)
+; N64-NEXT:    andi $2, $2, 1
 ; N64-NEXT:    ld $1, %got_disp(v2i64)($1)
 ; N64-NEXT:    dlsa $1, $2, $1, 3
 ; N64-NEXT:    jr $ra

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index a3f41fd842222..7e7b7403dbdfb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -321,20 +321,13 @@ define <32 x i16> @insertelt_v32i16(<32 x i16> %a, i16 %y, i32 %idx) {
 }
 
 define void @insertelt_v32i16_store(ptr %x, i16 %y, i32 %idx) {
-; RV32-LABEL: insertelt_v32i16_store:
-; RV32:       # %bb.0:
-; RV32-NEXT:    slli a2, a2, 1
-; RV32-NEXT:    add a0, a0, a2
-; RV32-NEXT:    sh a1, 0(a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: insertelt_v32i16_store:
-; RV64:       # %bb.0:
-; RV64-NEXT:    slli a2, a2, 32
-; RV64-NEXT:    srli a2, a2, 31
-; RV64-NEXT:    add a0, a0, a2
-; RV64-NEXT:    sh a1, 0(a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: insertelt_v32i16_store:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi a2, a2, 31
+; CHECK-NEXT:    slli a2, a2, 1
+; CHECK-NEXT:    add a0, a0, a2
+; CHECK-NEXT:    sh a1, 0(a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i16>, ptr %x
   %b = insertelement <32 x i16> %a, i16 %y, i32 %idx
   store <32 x i16> %b, ptr %x
@@ -366,20 +359,13 @@ define <8 x float> @insertelt_v8f32(<8 x float> %a, float %y, i32 %idx) {
 }
 
 define void @insertelt_v8f32_store(ptr %x, float %y, i32 %idx) {
-; RV32-LABEL: insertelt_v8f32_store:
-; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 2
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    fsw fa0, 0(a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: insertelt_v8f32_store:
-; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 30
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    fsw fa0, 0(a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: insertelt_v8f32_store:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi a1, a1, 7
+; CHECK-NEXT:    slli a1, a1, 2
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    fsw fa0, 0(a0)
+; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = insertelement <8 x float> %a, float %y, i32 %idx
   store <8 x float> %b, ptr %x
@@ -443,6 +429,7 @@ define <8 x i64> @insertelt_v8i64(<8 x i64> %a, i32 %idx) {
 define void @insertelt_v8i64_store(ptr %x, i32 %idx) {
 ; RV32-LABEL: insertelt_v8i64_store:
 ; RV32:       # %bb.0:
+; RV32-NEXT:    andi a1, a1, 7
 ; RV32-NEXT:    slli a1, a1, 3
 ; RV32-NEXT:    add a0, a0, a1
 ; RV32-NEXT:    li a1, -1
@@ -452,8 +439,8 @@ define void @insertelt_v8i64_store(ptr %x, i32 %idx) {
 ;
 ; RV64-LABEL: insertelt_v8i64_store:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 29
+; RV64-NEXT:    andi a1, a1, 7
+; RV64-NEXT:    slli a1, a1, 3
 ; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    li a1, -1
 ; RV64-NEXT:    sd a1, 0(a0)
@@ -521,6 +508,7 @@ define <8 x i64> @insertelt_c6_v8i64(<8 x i64> %a, i32 %idx) {
 define void @insertelt_c6_v8i64_store(ptr %x, i32 %idx) {
 ; RV32-LABEL: insertelt_c6_v8i64_store:
 ; RV32:       # %bb.0:
+; RV32-NEXT:    andi a1, a1, 7
 ; RV32-NEXT:    slli a1, a1, 3
 ; RV32-NEXT:    add a0, a0, a1
 ; RV32-NEXT:    sw zero, 4(a0)
@@ -530,8 +518,8 @@ define void @insertelt_c6_v8i64_store(ptr %x, i32 %idx) {
 ;
 ; RV64-LABEL: insertelt_c6_v8i64_store:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 29
+; RV64-NEXT:    andi a1, a1, 7
+; RV64-NEXT:    slli a1, a1, 3
 ; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    li a1, 6
 ; RV64-NEXT:    sd a1, 0(a0)

diff  --git a/llvm/test/CodeGen/X86/pr59980.ll b/llvm/test/CodeGen/X86/pr59980.ll
index 0823f960724e2..a6d22c2244c89 100644
--- a/llvm/test/CodeGen/X86/pr59980.ll
+++ b/llvm/test/CodeGen/X86/pr59980.ll
@@ -9,6 +9,7 @@ define void @foo(ptr %0, ptr %1, ptr %2) #0 {
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl (%rdx), %eax
 ; CHECK-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
+; CHECK-NEXT:    andl $15, %eax
 ; CHECK-NEXT:    vpextrw $0, %xmm0, (%rsi,%rax,2)
 ; CHECK-NEXT:    retq
   %4 = bitcast ptr %2 to ptr