[llvm] [RISCV][TII] Add and use new hook to optimize/canonicalize instructions after MachineCopyPropagation (PR #137973)

Thu May 1 09:09:44 PDT 2025

================
@@ -510,6 +510,16 @@ class TargetInstrInfo : public MCInstrInfo {
     return false;
   }
 
+  /// If possible, converts the instruction to a more 'optimized'/canonical
+  /// form. Returns true if the instruction was modified.
+  ///
+  /// This function is only called after register allocation. The MI will be
+  /// modified in place. This is called by passes such as
+  /// MachineCopyPropagation, where their mutation of the MI operands may
+  /// expose opportunities to convert the instruction to a simpler form (e.g.
+  /// a load of 0).
+  virtual bool optimizeInstruction(MachineInstr &MI) const { return false; }
----------------
asb wrote:

Oh I see the cause of confusion - this is indeed cleaning up after tail duplication - the use of the TailDuplicator utility class (llvm/lib/CodeGen/TailDuplicator.cpp) in MachineBlockPlacement.

Here is a roughly reduced example representing the above snippet:
```
; ModuleID = '<stdin>'
source_filename = "<stdin>"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"

define i64 @ham(i1 %arg, i32 %arg1) {
bb:
  br label %bb2

bb2:                                              ; preds = %bb15, %bb
  %and = and i32 0, 8388607
  br i1 %arg, label %bb4, label %bb3

bb3:                                              ; preds = %bb2
  br label %bb4

bb4:                                              ; preds = %bb3, %bb2
  %phi = phi i32 [ %arg1, %bb3 ], [ 0, %bb2 ]
  %lshr = lshr i32 %phi, 16
  %and5 = and i32 %lshr, 32768
  %lshr6 = lshr i32 %phi, 23
  %and7 = and i32 %lshr6, 255
  %and8 = and i32 %phi, 8388607
  %icmp = icmp ult i32 %and7, 113
  br i1 %icmp, label %bb9, label %bb10

bb9:                                              ; preds = %bb4
  %or = or i32 %and8, %and5
  %trunc = trunc i32 %or to i16
  br label %bb15

bb10:                                             ; preds = %bb4
  br i1 %arg, label %bb11, label %bb13

bb11:                                             ; preds = %bb10
  %trunc12 = trunc i32 %and5 to i16
  br label %bb15

bb13:                                             ; preds = %bb10
  %trunc14 = trunc i32 %and8 to i16
  br label %bb15

bb15:                                             ; preds = %bb13, %bb11, %bb9
  %phi16 = phi i16 [ %trunc, %bb9 ], [ %trunc12, %bb11 ], [ %trunc14, %bb13 ]
  %trunc17 = trunc i16 %phi16 to i8
  store i8 %trunc17, ptr null, align 1
  br label %bb2
}
```
If you run that through `llc -O3` you'll see the tail duplication happening as part of MachineBlockPlacement and then MCP runs.

So you have a block:
```
bb.4.bb4:
; predecessors: %bb.3, %bb.2
  successors: %bb.5(0x40000000), %bb.6(0x40000000); %bb.5(50.00%), %bb.6(50.00%)
  liveins: $x10, $x11, $x12, $x13, $x15
  renamable $x14 = SRLIW renamable $x15, 31
  renamable $x16 = SLLI renamable $x15, 33
  renamable $x14 = SLLI killed renamable $x14, 15
  renamable $x16 = SRLI killed renamable $x16, 56
  renamable $x15 = AND killed renamable $x15, renamable $x12
  BLTU renamable $x13, killed renamable $x16, %bb.6
```

After tail duplication is applied in MBP you get:
```
bb.2:
; predecessors: %bb.1
  successors: %bb.5(0x40000000), %bb.6(0x40000000); %bb.5(50.00%), %bb.6(50.00%)
  liveins: $x10, $x11, $x12, $x13
  $x15 = ADDI $x0, 0
  renamable $x14 = SRLIW renamable $x15, 31
  renamable $x16 = SLLI renamable $x15, 33
  renamable $x14 = SLLI killed renamable $x14, 15
  renamable $x16 = SRLI killed renamable $x16, 56
  renamable $x15 = AND killed renamable $x15, renamable $x12
  BGEU renamable $x13, killed renamable $x16, %bb.5
  PseudoBR %bb.6

bb.3.bb3:
; predecessors: %bb.1
  successors: %bb.5(0x40000000), %bb.6(0x40000000); %bb.5(50.00%), %bb.6(50.00%)
  liveins: $x10, $x11, $x12, $x13
  $x15 = ADDI renamable $x11, 0
  renamable $x14 = SRLIW renamable $x15, 31
  renamable $x16 = SLLI renamable $x15, 33
  renamable $x14 = SLLI killed renamable $x14, 15
  renamable $x16 = SRLI killed renamable $x16, 56
  renamable $x15 = AND killed renamable $x15, renamable $x12
  BGEU renamable $x13, killed renamable $x16, %bb.5
```
Where obviously bb.2 can be cleaned up which MCP does to a certain extent.

https://github.com/llvm/llvm-project/pull/137973