[llvm] [RISCV] Don't let performBUILD_VECTORCombine form a division or remainder with undef elements. (PR #69482)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 18 09:34:26 PDT 2023
https://github.com/topperc created https://github.com/llvm/llvm-project/pull/69482
Division/remainder by undef is immediate UB across the entire vector.
>From 78289462b057f6bfe6cc669cd7ae270588a5ac08 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 18 Oct 2023 09:31:23 -0700
Subject: [PATCH] [RISCV] Don't let performBUILD_VECTORCombine form a division
or remainder with undef elements.
Division/remainder by undef is immediate UB across the entire vector.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 +
.../CodeGen/RISCV/srem-seteq-illegal-types.ll | 81 ++++++++++++++++---
2 files changed, 72 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e8f001e491cdcaa..63ebe8b9af320bc 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13744,6 +13744,10 @@ static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
SmallVector<SDValue> RHSOps;
for (SDValue Op : N->ops()) {
if (Op.isUndef()) {
+ // We can't form a divide or remainder from undef.
+ if (!DAG.isSafeToSpeculativelyExecute(Opcode))
+ return SDValue();
+
LHSOps.push_back(Op);
RHSOps.push_back(Op);
continue;
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 6ed352b51f25459..d311311175c15e6 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -725,24 +725,79 @@ define void @test_srem_vec(ptr %X) nounwind {
;
; RV64MV-LABEL: test_srem_vec:
; RV64MV: # %bb.0:
+; RV64MV-NEXT: ld a1, 0(a0)
+; RV64MV-NEXT: lwu a2, 8(a0)
+; RV64MV-NEXT: srli a3, a1, 2
+; RV64MV-NEXT: lbu a4, 12(a0)
+; RV64MV-NEXT: slli a5, a2, 62
+; RV64MV-NEXT: or a3, a5, a3
+; RV64MV-NEXT: srai a3, a3, 31
+; RV64MV-NEXT: slli a4, a4, 32
+; RV64MV-NEXT: or a2, a2, a4
+; RV64MV-NEXT: slli a2, a2, 29
+; RV64MV-NEXT: lui a4, %hi(.LCPI3_0)
+; RV64MV-NEXT: ld a4, %lo(.LCPI3_0)(a4)
+; RV64MV-NEXT: srai a2, a2, 31
+; RV64MV-NEXT: slli a1, a1, 31
+; RV64MV-NEXT: srai a1, a1, 31
+; RV64MV-NEXT: mulh a4, a2, a4
+; RV64MV-NEXT: srli a5, a4, 63
+; RV64MV-NEXT: srai a4, a4, 1
+; RV64MV-NEXT: add a4, a4, a5
+; RV64MV-NEXT: lui a5, %hi(.LCPI3_1)
+; RV64MV-NEXT: ld a5, %lo(.LCPI3_1)(a5)
+; RV64MV-NEXT: add a2, a2, a4
+; RV64MV-NEXT: slli a4, a4, 2
+; RV64MV-NEXT: add a2, a2, a4
+; RV64MV-NEXT: mulh a4, a3, a5
+; RV64MV-NEXT: srli a5, a4, 63
+; RV64MV-NEXT: srai a4, a4, 1
+; RV64MV-NEXT: add a4, a4, a5
+; RV64MV-NEXT: lui a5, %hi(.LCPI3_2)
+; RV64MV-NEXT: ld a5, %lo(.LCPI3_2)(a5)
+; RV64MV-NEXT: add a3, a3, a4
+; RV64MV-NEXT: slli a4, a4, 3
+; RV64MV-NEXT: sub a3, a3, a4
+; RV64MV-NEXT: mulh a4, a1, a5
+; RV64MV-NEXT: srli a5, a4, 63
+; RV64MV-NEXT: add a4, a4, a5
+; RV64MV-NEXT: li a5, 6
+; RV64MV-NEXT: mul a4, a4, a5
+; RV64MV-NEXT: sub a1, a1, a4
; RV64MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64MV-NEXT: vmv.v.i v8, -1
+; RV64MV-NEXT: vslide1down.vx v8, v8, a1
+; RV64MV-NEXT: vslide1down.vx v8, v8, a3
+; RV64MV-NEXT: vslide1down.vx v8, v8, a2
+; RV64MV-NEXT: vslidedown.vi v8, v8, 1
+; RV64MV-NEXT: li a1, -1
+; RV64MV-NEXT: srli a1, a1, 31
+; RV64MV-NEXT: vand.vx v8, v8, a1
+; RV64MV-NEXT: lui a2, 32
+; RV64MV-NEXT: addi a2, a2, 256
+; RV64MV-NEXT: vmv.s.x v10, a2
+; RV64MV-NEXT: vsext.vf8 v12, v10
+; RV64MV-NEXT: vmsne.vv v0, v8, v12
+; RV64MV-NEXT: vmv.v.i v8, 0
+; RV64MV-NEXT: vmerge.vim v8, v8, -1, v0
; RV64MV-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64MV-NEXT: vslidedown.vi v10, v8, 2
-; RV64MV-NEXT: vmv.x.s a1, v10
-; RV64MV-NEXT: slli a2, a1, 31
-; RV64MV-NEXT: srli a2, a2, 61
-; RV64MV-NEXT: sb a2, 12(a0)
+; RV64MV-NEXT: vmv.x.s a2, v10
+; RV64MV-NEXT: slli a3, a2, 31
+; RV64MV-NEXT: srli a3, a3, 61
+; RV64MV-NEXT: sb a3, 12(a0)
+; RV64MV-NEXT: vmv.x.s a3, v8
+; RV64MV-NEXT: and a1, a3, a1
; RV64MV-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64MV-NEXT: vslidedown.vi v8, v8, 1
-; RV64MV-NEXT: vmv.x.s a2, v8
-; RV64MV-NEXT: slli a3, a2, 33
-; RV64MV-NEXT: sd a3, 0(a0)
-; RV64MV-NEXT: slli a1, a1, 2
-; RV64MV-NEXT: slli a2, a2, 31
-; RV64MV-NEXT: srli a2, a2, 62
-; RV64MV-NEXT: or a1, a2, a1
-; RV64MV-NEXT: sw a1, 8(a0)
+; RV64MV-NEXT: vmv.x.s a3, v8
+; RV64MV-NEXT: slli a4, a3, 33
+; RV64MV-NEXT: or a1, a1, a4
+; RV64MV-NEXT: sd a1, 0(a0)
+; RV64MV-NEXT: slli a2, a2, 2
+; RV64MV-NEXT: slli a3, a3, 31
+; RV64MV-NEXT: srli a3, a3, 62
+; RV64MV-NEXT: or a2, a3, a2
+; RV64MV-NEXT: sw a2, 8(a0)
; RV64MV-NEXT: ret
%ld = load <3 x i33>, ptr %X
%srem = srem <3 x i33> %ld, <i33 6, i33 7, i33 -5>
More information about the llvm-commits
mailing list