[llvm-branch-commits] [llvm] PeepholeOpt: Fix looking for def of current copy to coalesce (PR #125533)
Pengcheng Wang via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Feb 4 22:58:47 PST 2025
================
@@ -403,236 +396,253 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vmv1r.v v0, v13
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 84
+; RV32-NEXT: li a3, 80
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vmerge.vvm v4, v8, v24, v0
+; RV32-NEXT: vmerge.vvm v12, v8, v24, v0
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 28
+; RV32-NEXT: li a3, 20
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: vmv1r.v v0, v14
+; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 76
+; RV32-NEXT: li a3, 72
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 68
-; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: slli a1, a1, 6
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vmerge.vvm v16, v24, v16, v0
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 76
+; RV32-NEXT: li a2, 72
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: vmv1r.v v0, v12
+; RV32-NEXT: vmv1r.v v0, v2
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 84
+; RV32-NEXT: li a2, 80
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vmerge.vvm v8, v8, v16, v0
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 68
-; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: slli a1, a1, 6
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, 32
; RV32-NEXT: addi a1, a1, 4
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v16, a1
+; RV32-NEXT: vmv.v.x v12, a1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 40
+; RV32-NEXT: li a2, 36
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vrgatherei16.vv v20, v8, v16
+; RV32-NEXT: vrgatherei16.vv v16, v8, v12
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 52
+; RV32-NEXT: li a2, 48
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma
-; RV32-NEXT: vmv.v.v v20, v8
+; RV32-NEXT: vmv.v.v v16, v8
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 84
+; RV32-NEXT: li a2, 80
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, 48
; RV32-NEXT: lui a2, %hi(.LCPI8_3)
; RV32-NEXT: addi a2, a2, %lo(.LCPI8_3)
; RV32-NEXT: addi a1, a1, 5
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v28, (a2)
+; RV32-NEXT: vle16.v v24, (a2)
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v20, a1
+; RV32-NEXT: vmv.v.x v25, a1
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: li a2, 24
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vrgatherei16.vv v8, v12, v20
+; RV32-NEXT: vrgatherei16.vv v16, v8, v25
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 44
+; RV32-NEXT: li a2, 40
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma
-; RV32-NEXT: vmv.v.v v8, v16
+; RV32-NEXT: vmv.v.v v16, v8
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 52
+; RV32-NEXT: li a2, 56
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill
; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vrgatherei16.vv v24, v12, v28
+; RV32-NEXT: vrgatherei16.vv v16, v8, v24
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
+; RV32-NEXT: li a2, 28
+; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
-; RV32-NEXT: vmv.v.v v24, v16
+; RV32-NEXT: vmv.v.v v16, v8
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a2, 40
+; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, %hi(.LCPI8_4)
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_4)
; RV32-NEXT: lui a2, %hi(.LCPI8_5)
; RV32-NEXT: addi a2, a2, %lo(.LCPI8_5)
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV32-NEXT: vle16.v v12, (a1)
+; RV32-NEXT: vle16.v v26, (a1)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v28, (a2)
+; RV32-NEXT: vle16.v v24, (a2)
; RV32-NEXT: lui a1, %hi(.LCPI8_6)
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_6)
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vle16.v v30, (a1)
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 4
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v16, v0, v12
+; RV32-NEXT: vle16.v v2, (a1)
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: li a2, 12
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v8, v16, v26
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vrgatherei16.vv v12, v20, v28
+; RV32-NEXT: vrgatherei16.vv v20, v4, v24
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
-; RV32-NEXT: vmv.v.v v12, v16
+; RV32-NEXT: vmv.v.v v20, v8
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vrgatherei16.vv v16, v0, v30
+; RV32-NEXT: vrgatherei16.vv v24, v8, v2
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a2, 48
+; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, %hi(.LCPI8_7)
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_7)
; RV32-NEXT: lui a2, %hi(.LCPI8_8)
; RV32-NEXT: addi a2, a2, %lo(.LCPI8_8)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v20, (a1)
+; RV32-NEXT: vle16.v v12, (a1)
; RV32-NEXT: lui a1, %hi(.LCPI8_9)
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_9)
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV32-NEXT: vle16.v v8, (a2)
+; RV32-NEXT: vle16.v v16, (a2)
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vle16.v v10, (a1)
+; RV32-NEXT: vle16.v v18, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 28
+; RV32-NEXT: li a2, 20
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v0, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v28, v0, v20
+; RV32-NEXT: vrgatherei16.vv v24, v0, v12
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a2, 48
+; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
----------------
wangpc-pp wrote:
Ditto.
https://github.com/llvm/llvm-project/pull/125533
More information about the llvm-branch-commits
mailing list