[llvm] [RISCV] Separate doLocalPostpass into new pass and move to post vector regalloc (PR #88295)

Wed Apr 10 22:31:55 PDT 2024

================
@@ -288,32 +288,44 @@ define <vscale x 16 x i64> @vector_interleave_nxv16i64_nxv8i64(<vscale x 8 x i64
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT:    vmv8r.v v0, v8
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
----------------
BeMg wrote:

Additional spill code here, likely caused by an extra vsetvli dependence during machine scheduling.  There is a possibility that the scheduler itself will be able to fix this issue once we move the vsetvl insertion pass into postRA completely.

```
; without patch
200B	  %11:vrm2nov0 = PseudoVSRL_VI_M2 undef %11:vrm2nov0(tied-def 0), %7:vrm2, 1, -1, 4, 3, implicit $vl, implicit $vtype

; with patch
160B	  %11:vrm2nov0 = PseudoVSRL_VI_M2 undef %11:vrm2nov0(tied-def 0), %7:vrm2, 1, -1, 4, 3, implicit $vl, implicit $vtype
```

```
; Without patch
# *** IR Dump After Machine Instruction Scheduler (machine-scheduler) ***:
# Machine code for function vector_interleave_nxv16i64_nxv8i64: NoPHIs, TracksLiveness, TiedOpsRewritten
Function Live Ins: $v8m8 in %0, $v16m8 in %1

0B	bb.0 (%ir-block.0):
	  liveins: $v8m8, $v16m8
16B	  %16:vrm8 = COPY $v16m8
32B	  %0:vrm8 = COPY $v8m8
96B	  %5:gpr = PseudoReadVLENB
112B	  %6:gpr = SRLI %5:gpr, 1
128B	  dead %18:gpr = PseudoVSETVLIX0 $x0, 73, implicit-def $vl, implicit-def $vtype
144B	  %7:vrm2 = PseudoVID_V_M2 undef %7:vrm2(tied-def 0), -1, 4, 1, implicit $vl, implicit $vtype
176B	  %9:vrm2 = PseudoVAND_VI_M2 undef %9:vrm2(tied-def 0), %7:vrm2, 1, -1, 4, 3, implicit $vl, implicit $vtype
192B	  early-clobber %10:vr = PseudoVMSNE_VI_M2 %9:vrm2, 0, -1, 4, implicit $vl, implicit $vtype
200B	  %11:vrm2nov0 = PseudoVSRL_VI_M2 undef %11:vrm2nov0(tied-def 0), %7:vrm2, 1, -1, 4, 3, implicit $vl, implicit $vtype
256B	  %12:vrm8 = COPY %0:vrm8
260B	  $v0 = COPY %10:vr
264B	  %11:vrm2nov0 = PseudoVADD_VX_M2_MASK %11:vrm2nov0(tied-def 0), %11:vrm2nov0, %6:gpr, $v0, -1, 4, 1, implicit $vl, implicit $vtype
272B	  %12.sub_vrm4_1:vrm8 = COPY %16.sub_vrm4_0:vrm8
288B	  dead $x0 = PseudoVSETVLIX0 killed $x0, 219, implicit-def $vl, implicit-def $vtype, implicit $vl
304B	  early-clobber %13:vrm8 = PseudoVRGATHEREI16_VV_M8_E64_M2 undef %13:vrm8(tied-def 0), %12:vrm8, %11:vrm2nov0, -1, 6, 1, implicit $vl, implicit $vtype
320B	  %16.sub_vrm4_0:vrm8 = COPY %0.sub_vrm4_1:vrm8
368B	  early-clobber %17:vrm8 = PseudoVRGATHEREI16_VV_M8_E64_M2 undef %17:vrm8(tied-def 0), %16:vrm8, %11:vrm2nov0, -1, 6, 1, implicit $vl, implicit $vtype
384B	  $v8m8 = COPY %13:vrm8
400B	  $v16m8 = COPY %17:vrm8
416B	  PseudoRET implicit $v8m8, implicit $v16m8
```

```
; With patch 
# *** IR Dump After Machine Instruction Scheduler (machine-scheduler) ***:
# Machine code for function vector_interleave_nxv16i64_nxv8i64: NoPHIs, TracksLiveness, TiedOpsRewritten
Function Live Ins: $v8m8 in %0, $v16m8 in %1

0B	bb.0 (%ir-block.0):
	  liveins: $v8m8, $v16m8
16B	  %16:vrm8 = COPY $v16m8
32B	  %0:vrm8 = COPY $v8m8
96B	  %5:gpr = PseudoReadVLENB
112B	  %6:gpr = SRLI %5:gpr, 1
128B	  dead %18:gpr = PseudoVSETVLIX0 $x0, 201, implicit-def $vl, implicit-def $vtype
144B	  %7:vrm2 = PseudoVID_V_M2 undef %7:vrm2(tied-def 0), -1, 4, 1, implicit $vl, implicit $vtype
160B	  %11:vrm2nov0 = PseudoVSRL_VI_M2 undef %11:vrm2nov0(tied-def 0), %7:vrm2, 1, -1, 4, 3, implicit $vl, implicit $vtype
176B	  %9:vrm2 = PseudoVAND_VI_M2 undef %9:vrm2(tied-def 0), %7:vrm2, 1, -1, 4, 3, implicit $vl, implicit $vtype
192B	  early-clobber %10:vr = PseudoVMSNE_VI_M2 %9:vrm2, 0, -1, 4, implicit $vl, implicit $vtype
224B	  dead $x0 = PseudoVSETVLIX0 killed $x0, 73, implicit-def $vl, implicit-def $vtype, implicit $vl
272B	  %12:vrm8 = COPY %0:vrm8
276B	  $v0 = COPY %10:vr
280B	  %11:vrm2nov0 = PseudoVADD_VX_M2_MASK %11:vrm2nov0(tied-def 0), %11:vrm2nov0, %6:gpr, $v0, -1, 4, 1, implicit $vl, implicit $vtype
288B	  %12.sub_vrm4_1:vrm8 = COPY %16.sub_vrm4_0:vrm8
304B	  dead $x0 = PseudoVSETVLIX0 killed $x0, 219, implicit-def $vl, implicit-def $vtype, implicit $vl
320B	  early-clobber %13:vrm8 = PseudoVRGATHEREI16_VV_M8_E64_M2 undef %13:vrm8(tied-def 0), %12:vrm8, %11:vrm2nov0, -1, 6, 1, implicit $vl, implicit $vtype
336B	  %16.sub_vrm4_0:vrm8 = COPY %0.sub_vrm4_1:vrm8
384B	  early-clobber %17:vrm8 = PseudoVRGATHEREI16_VV_M8_E64_M2 undef %17:vrm8(tied-def 0), %16:vrm8, %11:vrm2nov0, -1, 6, 1, implicit $vl, implicit $vtype
400B	  $v8m8 = COPY %13:vrm8
416B	  $v16m8 = COPY %17:vrm8
432B	  PseudoRET implicit $v8m8, implicit $v16m8
```


https://github.com/llvm/llvm-project/pull/88295