[llvm] 28ab032 - [RISCV] Add isel patterns to form tail undisturbed vfwadd.wv from fpextend_vl+vfwadd_vl+vp_merge.

Fri May 26 16:44:28 PDT 2023

Author: Craig Topper
Date: 2023-05-26T16:44:20-07:00
New Revision: 28ab032298824ed1cd6d1499a7c67cffbf11faa3

URL: https://github.com/llvm/llvm-project/commit/28ab032298824ed1cd6d1499a7c67cffbf11faa3
DIFF: https://github.com/llvm/llvm-project/commit/28ab032298824ed1cd6d1499a7c67cffbf11faa3.diff

LOG: [RISCV] Add isel patterns to form tail undisturbed vfwadd.wv from fpextend_vl+vfwadd_vl+vp_merge.

We use a special TIED instructions for vfwadd.wv to avoid an
earlyclobber constraint preventing the first source and the destination
from being the same register.

This prevents our normal post process for forming TU instructions.
Add manual isel pattern instead. This matches what we do for FMA
for example.

Added: 
    llvm/test/CodeGen/RISCV/rvv/vfwadd-vp.ll

Modified: 
    llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index ac333e61bb356..76e2a2b4f56b1 100644

--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -1385,6 +1385,16 @@ multiclass VPatWidenBinaryFPVL_WV_WF<SDNode op, PatFrags extop, string instructi
                 (!cast<Instruction>(instruction_name#"_WV_"#fvti.LMul.MX#"_TIED")
                    fwti.RegClass:$rs2, fvti.RegClass:$rs1,
                    GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+      // Tail undisturbed
+      def : Pat<(riscv_vp_merge_vl true_mask,
+                 (fwti.Vector (op (fwti.Vector fwti.RegClass:$rs2),
+                                  (fwti.Vector (extop (fvti.Vector fvti.RegClass:$rs1),
+                                                      (fvti.Mask true_mask), VLOpFrag)),
+                                  srcvalue, (fwti.Mask true_mask), VLOpFrag)),
+                 fwti.RegClass:$rs2, VLOpFrag),
+                (!cast<Instruction>(instruction_name#"_WV_"#fvti.LMul.MX#"_TIED")
+                   fwti.RegClass:$rs2, fvti.RegClass:$rs1,
+                   GPR:$vl, fvti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>;
       def : Pat<(fwti.Vector (op (fwti.Vector fwti.RegClass:$rs2),
                                  (fwti.Vector (extop (fvti.Vector (SplatFPOp fvti.ScalarRegClass:$rs1)),
                                                      (fvti.Mask true_mask), VLOpFrag)),

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-vp.ll
new file mode 100644
index 0000000000000..1c2ba683cd876
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-vp.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+experimental-zvfh | FileCheck %s
+
+define <vscale x 2 x float> @vfwadd_tu(<vscale x 2 x half> %arg, <vscale x 2 x float> %arg1, i32 signext %arg2) {
+; CHECK-LABEL: vfwadd_tu:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, ma
+; CHECK-NEXT:    vfwadd.wv v9, v9, v8
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+bb:
+  %tmp = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %arg, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 %arg2)
+  %tmp3 = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %arg1, <vscale x 2 x float> %tmp, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 %arg2)
+  %tmp4 = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x float> %tmp3, <vscale x 2 x float> %arg1, i32 %arg2)
+  ret <vscale x 2 x float> %tmp4
+}
+
+declare <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
+declare <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32)
+declare <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32)