[llvm] [RISCV] Don't let performBUILD_VECTORCombine form a division or remainder with undef elements. (PR #69482)

Wed Oct 18 09:34:26 PDT 2023

https://github.com/topperc created https://github.com/llvm/llvm-project/pull/69482

Division/remainder by undef is immediate UB across the entire vector.

>From 78289462b057f6bfe6cc669cd7ae270588a5ac08 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 18 Oct 2023 09:31:23 -0700
Subject: [PATCH] [RISCV] Don't let performBUILD_VECTORCombine form a division
 or remainder with undef elements.

Division/remainder by undef is immediate UB across the entire vector.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  4 +
 .../CodeGen/RISCV/srem-seteq-illegal-types.ll | 81 ++++++++++++++++---
 2 files changed, 72 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e8f001e491cdcaa..63ebe8b9af320bc 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13744,6 +13744,10 @@ static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
   SmallVector<SDValue> RHSOps;
   for (SDValue Op : N->ops()) {
     if (Op.isUndef()) {
+      // We can't form a divide or remainder from undef.
+      if (!DAG.isSafeToSpeculativelyExecute(Opcode))
+        return SDValue();
+
       LHSOps.push_back(Op);
       RHSOps.push_back(Op);
       continue;
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 6ed352b51f25459..d311311175c15e6 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -725,24 +725,79 @@ define void @test_srem_vec(ptr %X) nounwind {
 ;
 ; RV64MV-LABEL: test_srem_vec:
 ; RV64MV:       # %bb.0:
+; RV64MV-NEXT:    ld a1, 0(a0)
+; RV64MV-NEXT:    lwu a2, 8(a0)
+; RV64MV-NEXT:    srli a3, a1, 2
+; RV64MV-NEXT:    lbu a4, 12(a0)
+; RV64MV-NEXT:    slli a5, a2, 62
+; RV64MV-NEXT:    or a3, a5, a3
+; RV64MV-NEXT:    srai a3, a3, 31
+; RV64MV-NEXT:    slli a4, a4, 32
+; RV64MV-NEXT:    or a2, a2, a4
+; RV64MV-NEXT:    slli a2, a2, 29
+; RV64MV-NEXT:    lui a4, %hi(.LCPI3_0)
+; RV64MV-NEXT:    ld a4, %lo(.LCPI3_0)(a4)
+; RV64MV-NEXT:    srai a2, a2, 31
+; RV64MV-NEXT:    slli a1, a1, 31
+; RV64MV-NEXT:    srai a1, a1, 31
+; RV64MV-NEXT:    mulh a4, a2, a4
+; RV64MV-NEXT:    srli a5, a4, 63
+; RV64MV-NEXT:    srai a4, a4, 1
+; RV64MV-NEXT:    add a4, a4, a5
+; RV64MV-NEXT:    lui a5, %hi(.LCPI3_1)
+; RV64MV-NEXT:    ld a5, %lo(.LCPI3_1)(a5)
+; RV64MV-NEXT:    add a2, a2, a4
+; RV64MV-NEXT:    slli a4, a4, 2
+; RV64MV-NEXT:    add a2, a2, a4
+; RV64MV-NEXT:    mulh a4, a3, a5
+; RV64MV-NEXT:    srli a5, a4, 63
+; RV64MV-NEXT:    srai a4, a4, 1
+; RV64MV-NEXT:    add a4, a4, a5
+; RV64MV-NEXT:    lui a5, %hi(.LCPI3_2)
+; RV64MV-NEXT:    ld a5, %lo(.LCPI3_2)(a5)
+; RV64MV-NEXT:    add a3, a3, a4
+; RV64MV-NEXT:    slli a4, a4, 3
+; RV64MV-NEXT:    sub a3, a3, a4
+; RV64MV-NEXT:    mulh a4, a1, a5
+; RV64MV-NEXT:    srli a5, a4, 63
+; RV64MV-NEXT:    add a4, a4, a5
+; RV64MV-NEXT:    li a5, 6
+; RV64MV-NEXT:    mul a4, a4, a5
+; RV64MV-NEXT:    sub a1, a1, a4
 ; RV64MV-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64MV-NEXT:    vmv.v.i v8, -1
+; RV64MV-NEXT:    vslide1down.vx v8, v8, a1
+; RV64MV-NEXT:    vslide1down.vx v8, v8, a3
+; RV64MV-NEXT:    vslide1down.vx v8, v8, a2
+; RV64MV-NEXT:    vslidedown.vi v8, v8, 1
+; RV64MV-NEXT:    li a1, -1
+; RV64MV-NEXT:    srli a1, a1, 31
+; RV64MV-NEXT:    vand.vx v8, v8, a1
+; RV64MV-NEXT:    lui a2, 32
+; RV64MV-NEXT:    addi a2, a2, 256
+; RV64MV-NEXT:    vmv.s.x v10, a2
+; RV64MV-NEXT:    vsext.vf8 v12, v10
+; RV64MV-NEXT:    vmsne.vv v0, v8, v12
+; RV64MV-NEXT:    vmv.v.i v8, 0
+; RV64MV-NEXT:    vmerge.vim v8, v8, -1, v0
 ; RV64MV-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
 ; RV64MV-NEXT:    vslidedown.vi v10, v8, 2
-; RV64MV-NEXT:    vmv.x.s a1, v10
-; RV64MV-NEXT:    slli a2, a1, 31
-; RV64MV-NEXT:    srli a2, a2, 61
-; RV64MV-NEXT:    sb a2, 12(a0)
+; RV64MV-NEXT:    vmv.x.s a2, v10
+; RV64MV-NEXT:    slli a3, a2, 31
+; RV64MV-NEXT:    srli a3, a3, 61
+; RV64MV-NEXT:    sb a3, 12(a0)
+; RV64MV-NEXT:    vmv.x.s a3, v8
+; RV64MV-NEXT:    and a1, a3, a1
 ; RV64MV-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; RV64MV-NEXT:    vslidedown.vi v8, v8, 1
-; RV64MV-NEXT:    vmv.x.s a2, v8
-; RV64MV-NEXT:    slli a3, a2, 33
-; RV64MV-NEXT:    sd a3, 0(a0)
-; RV64MV-NEXT:    slli a1, a1, 2
-; RV64MV-NEXT:    slli a2, a2, 31
-; RV64MV-NEXT:    srli a2, a2, 62
-; RV64MV-NEXT:    or a1, a2, a1
-; RV64MV-NEXT:    sw a1, 8(a0)
+; RV64MV-NEXT:    vmv.x.s a3, v8
+; RV64MV-NEXT:    slli a4, a3, 33
+; RV64MV-NEXT:    or a1, a1, a4
+; RV64MV-NEXT:    sd a1, 0(a0)
+; RV64MV-NEXT:    slli a2, a2, 2
+; RV64MV-NEXT:    slli a3, a3, 31
+; RV64MV-NEXT:    srli a3, a3, 62
+; RV64MV-NEXT:    or a2, a3, a2
+; RV64MV-NEXT:    sw a2, 8(a0)
 ; RV64MV-NEXT:    ret
   %ld = load <3 x i33>, ptr %X
   %srem = srem <3 x i33> %ld, <i33 6, i33 7, i33 -5>