[llvm] 3b48d84 - [RISCV] Optimize more redundant VSETVLIs

Fri Apr 2 02:11:00 PDT 2021

Author: Fraser Cormack
Date: 2021-04-02T10:04:07+01:00
New Revision: 3b48d849d44367ff351f58ce7a4949d104a30dea

URL: https://github.com/llvm/llvm-project/commit/3b48d849d44367ff351f58ce7a4949d104a30dea
DIFF: https://github.com/llvm/llvm-project/commit/3b48d849d44367ff351f58ce7a4949d104a30dea.diff

LOG: [RISCV] Optimize more redundant VSETVLIs

D99717 introduced some test cases which showed that the output of one
vsetvli into another would not be picked up by the RISCVCleanupVSETVLI
pass. This patch teaches the optimization about such a pattern. The
pattern is quite common when using the RVV vsetvli intrinsic to pass the
VL onto other intrinsics.

The second test case introduced by D99717 is left unoptimized by this
patch. It is a rarer case and will require us to rewire any uses of the
redundant vset[i]vli's output to the previous one's.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D99730

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
    llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir
    llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
    llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
index 426860f63e1b3..74f437f0ed840 100644

--- a/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
@@ -75,11 +75,19 @@ static bool isRedundantVSETVLI(MachineInstr &MI, MachineInstr *PrevVSETVLI) {
 
   assert(MI.getOpcode() == RISCV::PseudoVSETVLI);
   Register AVLReg = MI.getOperand(1).getReg();
+  Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg();
 
   // If this VSETVLI isn't changing VL, it is redundant.
   if (AVLReg == RISCV::X0 && MI.getOperand(0).getReg() == RISCV::X0)
     return true;
 
+  // If the previous VSET{I}VLI's output (which isn't X0) is fed into this
+  // VSETVLI, this one isn't changing VL so is redundant.
+  // Only perform this on virtual registers to avoid the complexity of having
+  // to work out if the physical register was clobbered somewhere in between.
+  if (AVLReg.isVirtual() && AVLReg == PrevOutVL)
+    return true;
+
   // If the previous opcode isn't vsetvli we can't do any more comparison.
   if (PrevVSETVLI->getOpcode() != RISCV::PseudoVSETVLI)
     return false;
@@ -94,7 +102,6 @@ static bool isRedundantVSETVLI(MachineInstr &MI, MachineInstr *PrevVSETVLI) {
     // This instruction is setting VL to VLMAX, this is redundant if the
     // previous VSETVLI was also setting VL to VLMAX. But it is not redundant
     // if they were setting it to any other value or leaving VL unchanged.
-    Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg();
     return PrevOutVL != RISCV::X0;
   }
 

diff  --git a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir b/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir
index 6af6204848bc8..d22f63f844cc7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir
@@ -1,17 +1,17 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc %s -mtriple=riscv64 -run-pass=riscv-cleanup-vsetvli -o - | FileCheck %s
 
-# Make sure we don't combine these two VSETVLIs in the cleanup pass. The first
-# keeps the previous value of VL, the second time sets it to VLMAX. We can't
-# remove the first since we can't tell if this is a change VL.
-
 --- |
   ; ModuleID = '../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll'
   source_filename = "../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll"
   target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
   target triple = "riscv64"
 
-  define void @cleanup_vsetvli() #0 {
+  define void @cleanup_vsetvli0() #0 {
+    ret void
+  }
+
+  define void @cleanup_vsetvli1() #0 {
     ret void
   }
 
@@ -19,7 +19,10 @@
 
 ...
 ---
-name:            cleanup_vsetvli
+# Make sure we don't combine these two VSETVLIs in the cleanup pass. The first
+# keeps the previous value of VL, the second sets it to VLMAX. We can't remove
+# the first since we can't tell if this is a change of VL.
+name:            cleanup_vsetvli0
 alignment:       4
 tracksRegLiveness: true
 registers:
@@ -29,7 +32,7 @@ frameInfo:
 machineFunctionInfo: {}
 body:             |
   bb.0 (%ir-block.0):
-    ; CHECK-LABEL: name: cleanup_vsetvli
+    ; CHECK-LABEL: name: cleanup_vsetvli0
     ; CHECK: dead $x0 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
     ; CHECK: dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
     ; CHECK: PseudoRET
@@ -38,3 +41,39 @@ body:             |
     PseudoRET
 
 ...
+---
+# 1. Ensure we can remove the second VSETVLI which takes its AVL from the first VSETVLI.
+# 2. Ensure we can remove the fourth VSETVLI which takes its AVL from the VSETIVLI.
+# 3. Make sure we don't combine the latter two VSETVLIs; the first outputs to a
+# physical register which is clobbered by a later instruction.
+name:            cleanup_vsetvli1
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: $x3
+    ; CHECK-LABEL: name: cleanup_vsetvli1
+    ; CHECK: liveins: $x3
+    ; CHECK: [[PseudoVSETVLI:%[0-9]+]]:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
+    ; CHECK: [[PseudoVSETIVLI:%[0-9]+]]:gpr = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype
+    ; CHECK: $x1 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
+    ; CHECK: $x1 = COPY $x3
+    ; CHECK: dead %4:gpr = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype
+    ; CHECK: PseudoRET
+    %0:gpr  = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
+    dead %1:gpr  = PseudoVSETVLI %0, 12, implicit-def $vl, implicit-def $vtype
+
+    %2:gpr  = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype
+    dead %3:gpr  = PseudoVSETVLI %2, 12, implicit-def $vl, implicit-def $vtype
+
+    $x1  = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
+    $x1 = COPY $x3
+    dead %4:gpr  = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype
+    PseudoRET
+
+...

diff  --git a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
index 6975cf7909a3a..9cb7ef1029f23 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
@@ -34,12 +34,10 @@ define void @test_vsetvlimax_e64m8() nounwind {
 declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i32(<vscale x 4 x i32>*, i32)
 
 ; Check that we remove the redundant vsetvli when followed by another operation
-; FIXME: We don't
 define <vscale x 4 x i32> @redundant_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr) nounwind {
 ; CHECK-LABEL: redundant_vsetvli:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
-; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
 ; CHECK-NEXT:    vle32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %vl = call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 2, i32 1)
@@ -49,13 +47,13 @@ define <vscale x 4 x i32> @redundant_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr)
 
 ; Check that we remove the repeated/redundant vsetvli when followed by another
 ; operation
-; FIXME: We don't
+; FIXME: We don't catch the second vsetvli because it has a use of its output.
+; We could replace it with the output of the first vsetvli.
 define <vscale x 4 x i32> @repeated_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr) nounwind {
 ; CHECK-LABEL: repeated_vsetvli:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
 ; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
-; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
 ; CHECK-NEXT:    vle32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %vl0 = call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 2, i32 1)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
index 9e693a876de8c..e14e8bd80ced9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
@@ -52,12 +52,10 @@ define void @test_vsetvlimax_e64m4() nounwind {
 declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>*, i64)
 
 ; Check that we remove the redundant vsetvli when followed by another operation
-; FIXME: We don't
 define <vscale x 4 x i32> @redundant_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr) nounwind {
 ; CHECK-LABEL: redundant_vsetvli:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
-; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
 ; CHECK-NEXT:    vle32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %vl = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 1)
@@ -67,13 +65,13 @@ define <vscale x 4 x i32> @redundant_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr)
 
 ; Check that we remove the repeated/redundant vsetvli when followed by another
 ; operation
-; FIXME: We don't
+; FIXME: We don't catch the second vsetvli because it has a use of its output.
+; We could replace it with the output of the first vsetvli.
 define <vscale x 4 x i32> @repeated_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr) nounwind {
 ; CHECK-LABEL: repeated_vsetvli:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
 ; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
-; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
 ; CHECK-NEXT:    vle32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %vl0 = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 1)