[llvm] ed15e91 - [RISCV] Don't fold vmerge into ops if fp exception can be raised

Thu Jul 13 03:42:31 PDT 2023

Author: Luke Lau
Date: 2023-07-13T11:42:23+01:00
New Revision: ed15e9119b4a90932a286e82d74e115ad459db9d

URL: https://github.com/llvm/llvm-project/commit/ed15e9119b4a90932a286e82d74e115ad459db9d
DIFF: https://github.com/llvm/llvm-project/commit/ed15e9119b4a90932a286e82d74e115ad459db9d.diff

LOG: [RISCV] Don't fold vmerge into ops if fp exception can be raised

We are already checking for fp exceptions if VL changes, but I believe we
should also be checking for them if the mask changes as well, since that also
affects the set of active elements. From the spec:
> A vector floating-point exception at any active floating-point element sets
> the standard FP exception flags in the fflags register. Inactive elements do
> not set FP exception flags.

Note that we don't change the mask if IsMasked is true, i.e. True is masked
already, since in that case we keep the existing mask.

Reviewed By: reames

Differential Revision: https://reviews.llvm.org/D154980

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
    llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index f7844a2facb914..bee85ab4d93a2a 100644

--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3301,17 +3301,20 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
       True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
   SDValue TrueVL = True.getOperand(TrueVLIndex);
 
-  auto IsNoFPExcept = [this](SDValue N) {
-    return !this->mayRaiseFPException(N.getNode()) ||
-           N->getFlags().hasNoFPExcept();
-  };
-
-  // Allow the peephole for non-exception True with VLMAX vector length, since
-  // all the values after VL of N are dependent on Merge. VLMAX should be
-  // lowered to (XLenVT -1).
-  if (TrueVL != VL && !(IsNoFPExcept(True) && isAllOnesConstant(TrueVL)))
+  // We need the VLs to be the same. But if True has a VL of VLMAX then we can
+  // go ahead and use N's VL because we know it will be smaller, so any tail
+  // elements in the result will be from Merge.
+  if (TrueVL != VL && !isAllOnesConstant(TrueVL))
     return false;
 
+  // If we end up changing the VL or mask of True, then we need to make sure it
+  // doesn't raise any observable fp exceptions, since changing the active
+  // elements will affect how fflags is set.
+  if (TrueVL != VL || !IsMasked)
+    if (mayRaiseFPException(True.getNode()) &&
+        !True->getFlags().hasNoFPExcept())
+      return false;
+
   SDLoc DL(N);
   unsigned MaskedOpc = Info->MaskedPseudo;
 #ifndef NDEBUG

diff  --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index 350bc0100c356f..23dc3eb20dfa77 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -378,13 +378,15 @@ define <vscale x 2 x float> @vpmerge_constrained_fadd(<vscale x 2 x float> %pass
 declare <vscale x 2 x float> @llvm.experimental.constrained.fadd(<vscale x 2 x float>, <vscale x 2 x float>, metadata, metadata)
 declare <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i64)
 
-; FIXME: This shouldn't be folded because we need to preserve exceptions with
+; This shouldn't be folded because we need to preserve exceptions with
 ; "fpexcept.strict" exception behaviour, and masking may hide them.
 define <vscale x 2 x float> @vpmerge_constrained_fadd_vlmax(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m) strictfp {
 ; CHECK-LABEL: vpmerge_constrained_fadd_vlmax:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m1, tu, mu
-; CHECK-NEXT:    vfadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vfadd.vv v9, v9, v10
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 ; CHECK-NEXT:    ret
   %a = call <vscale x 2 x float> @llvm.experimental.constrained.fadd(<vscale x 2 x float> %x, <vscale x 2 x float> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
   %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 -1)