[llvm] [RISCV] Add basic scalar support for MERGE, MVM, and MVMN from P extension (PR #180677)

Mon Feb 9 21:09:22 PST 2026

llvmbot wrote:




@llvm/pr-subscribers-backend-risc-v

Author: Craig Topper (topperc)

<details>
<summary>Changes</summary>

These are 3 variations of the same operation with a different operand
tied to the destination register. We need to pick the one that
minimizes the number of mvs.

To do this we take the approach used by AArch64 to select between
BIT, BIF, and BSL which the same operations. We define a pseudo
with no tied constraint and expand it after register allocation based
on where the destination register ended up. If the destination
register is none of the operands, we'll insert a mv.

I've replaced RISCVISD::MVM with RISCVISD::MERGE and updated the operand
order accordingly. I find the MERGE name easier to read so I've made it
the canonical name.

Ideally we could use commuteInstructionImpl and the TwoAddressInstructionPass
to select the opcode before register allocation. That only works if
you can commute exactly 2 operands and maybe change the opcode in the MI
representation of any of the forms to get to the either of the other 2 forms.
That is not possible. We'd need to define 3 more pseudoinstructions
with different permutations.

With the current approach it might be possible that we insert a mv
not because all of the operand registers we needed by later instructions,
but because the register allocator needed to put the result in a
different register. It's possible a different allocation for other
instructions might have avoided the mv.

I wrote the patch based on the AArch64, but the tests were generated
by AI.

---
Full diff: https://github.com/llvm/llvm-project/pull/180677.diff


5 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+1-1) 
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfoP.td (+15-7) 
- (modified) llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp (+74) 
- (modified) llvm/test/CodeGen/RISCV/rv32p.ll (+135) 
- (modified) llvm/test/CodeGen/RISCV/rv64p.ll (+135) 


``````````diff

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 975baa7e2e504..d6d604d288fc6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10758,7 +10758,7 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
                                        DAG.getConstant(ShiftAmt, DL, XLenVT));
       SDValue Mask = DAG.getConstant(PosMask, DL, XLenVT);
       SDValue Result =
-          DAG.getNode(RISCVISD::MVM, DL, XLenVT, Vec, ShiftedVal, Mask);
+          DAG.getNode(RISCVISD::MERGE, DL, XLenVT, Mask, Vec, ShiftedVal);
       return DAG.getBitcast(VecVT, Result);
     }
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 774e1e024a4be..6f9c167e20a1d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1517,13 +1517,14 @@ def riscv_mulhr : RVSDNode<"MULHR", SDTIntBinOp>;
 def riscv_mulhru : RVSDNode<"MULHRU", SDTIntBinOp>;
 def riscv_mulhrsu : RVSDNode<"MULHRSU", SDTIntBinOp>;
 
-def SDT_RISCVMVM : SDTypeProfile<1, 3, [SDTCisInt<0>,
-                                        SDTCisSameAs<0, 1>,
-                                        SDTCisSameAs<0, 2>,
-                                        SDTCisSameAs<0, 3>]>;
-def riscv_mvm : RVSDNode<"MVM", SDT_RISCVMVM>;
+def SDT_RISCVMERGE : SDTypeProfile<1, 3, [SDTCisInt<0>,
+                                          SDTCisSameAs<0, 1>,
+                                          SDTCisSameAs<0, 2>,
+                                          SDTCisSameAs<0, 3>]>;
+def riscv_merge : RVSDNode<"MERGE", SDT_RISCVMERGE>;
 
 let Predicates = [HasStdExtP] in {
+
   def : PatGpr<abs, ABS>;
   def : PatGpr<ctls, CLS>;
 
@@ -1532,9 +1533,16 @@ let Predicates = [HasStdExtP] in {
   def : Pat<(XLenVT (fshr GPR:$rs1, GPR:$rd, shiftMaskXLen:$rs2)),
             (SRX GPR:$rd, GPR:$rs1, shiftMaskXLen:$rs2)>;
 
+  // Pseudo version of MERGE without the tied constraint. Will be expanded to
+  // MERGE, MVM, or MVMN after register allocation.
+  def PseudoMERGE : Pseudo<(outs GPR:$dst), (ins GPR:$rd, GPR:$rs1, GPR:$rs2),
+                           []>;
+  def : Pat<(XLenVT (or (and GPR:$rd, GPR:$rs2), (and (not GPR:$rd), GPR:$rs1))),
+            (PseudoMERGE GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+
   // Pattern for insert_vector_elt
-  def : Pat<(XLenVT (riscv_mvm GPR:$rd, GPR:$rs1, GPR:$rs2)),
-            (MVM GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+  def : Pat<(XLenVT (riscv_merge GPR:$rd, GPR:$rs1, GPR:$rs2)),
+            (PseudoMERGE GPR:$rd, GPR:$rs1, GPR:$rs2)>;
 
   // Basic 8-bit arithmetic patterns
   def: Pat<(XLenVecI8VT (add GPR:$rs1, GPR:$rs2)), (PADD_B GPR:$rs1, GPR:$rs2)>;
diff --git a/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
index 08e2b835547ca..8e2832f654fd8 100644
--- a/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
@@ -43,6 +43,7 @@ class RISCVPostRAExpandPseudo : public MachineFunctionPass {
                 MachineBasicBlock::iterator &NextMBBI);
   bool expandMovImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
   bool expandMovAddr(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
+  bool expandMERGE(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
 };
 
 char RISCVPostRAExpandPseudo::ID = 0;
@@ -76,6 +77,8 @@ bool RISCVPostRAExpandPseudo::expandMI(MachineBasicBlock &MBB,
     return expandMovImm(MBB, MBBI);
   case RISCV::PseudoMovAddr:
     return expandMovAddr(MBB, MBBI);
+  case RISCV::PseudoMERGE:
+    return expandMERGE(MBB, MBBI);
   default:
     return false;
   }
@@ -118,6 +121,77 @@ bool RISCVPostRAExpandPseudo::expandMovAddr(MachineBasicBlock &MBB,
   return true;
 }
 
+/// Transfer implicit operands on the pseudo instruction to the
+/// instructions created from the expansion.
+static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
+                           MachineInstrBuilder &DefMI) {
+  const MCInstrDesc &Desc = OldMI.getDesc();
+  for (const MachineOperand &MO :
+       llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
+    assert(MO.isReg() && MO.getReg());
+    if (MO.isUse())
+      UseMI.add(MO);
+    else
+      DefMI.add(MO);
+  }
+}
+
+// Expand PseudoMERGE to MERGE, MVM, or MVMN.
+bool RISCVPostRAExpandPseudo::expandMERGE(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator MBBI) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  Register DstReg = MI.getOperand(0).getReg();
+  if (DstReg == MI.getOperand(3).getReg()) {
+    // Expand to MVMN
+    auto I = BuildMI(MBB, MBBI, DL, TII->get(RISCV::MVMN))
+                 .add(MI.getOperand(0))
+                 .add(MI.getOperand(3))
+                 .add(MI.getOperand(2))
+                 .add(MI.getOperand(1));
+    transferImpOps(*MBBI, I, I);
+  } else if (DstReg == MBBI->getOperand(2).getReg()) {
+    // Expand to MVM
+    auto I = BuildMI(MBB, MBBI, DL, TII->get(RISCV::MVM))
+                 .add(MI.getOperand(0))
+                 .add(MI.getOperand(2))
+                 .add(MI.getOperand(3))
+                 .add(MI.getOperand(1));
+    transferImpOps(*MBBI, I, I);
+  } else if (DstReg == MI.getOperand(1).getReg()) {
+    // Expand to MERGE
+    auto I = BuildMI(MBB, MBBI, DL, TII->get(RISCV::MERGE))
+                 .add(MI.getOperand(0))
+                 .add(MI.getOperand(1))
+                 .add(MI.getOperand(2))
+                 .add(MI.getOperand(3));
+    transferImpOps(*MBBI, I, I);
+  } else {
+    // Use an additional move.
+    RegState RegState =
+        getRenamableRegState(MI.getOperand(1).isRenamable()) |
+        getKillRegState(MI.getOperand(1).isKill() &&
+                        MI.getOperand(1).getReg() !=
+                            MI.getOperand(2).getReg() &&
+                        MI.getOperand(1).getReg() != MI.getOperand(3).getReg());
+    BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(RISCV::ADDI))
+        .addDef(DstReg, getRenamableRegState(MI.getOperand(0).isRenamable()))
+        .addReg(MI.getOperand(1).getReg(), RegState)
+        .addImm(0);
+    auto I = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(RISCV::MERGE))
+                 .add(MI.getOperand(0))
+                 .addReg(DstReg,
+                         RegState::Kill | getRenamableRegState(
+                                              MI.getOperand(0).isRenamable()))
+                 .add(MI.getOperand(2))
+                 .add(MI.getOperand(3));
+    transferImpOps(*MBBI, I, I);
+  }
+  MI.eraseFromParent();
+  return true;
+}
+
 } // end of anonymous namespace
 
 INITIALIZE_PASS(RISCVPostRAExpandPseudo, "riscv-post-ra-expand-pseudo",
diff --git a/llvm/test/CodeGen/RISCV/rv32p.ll b/llvm/test/CodeGen/RISCV/rv32p.ll
index 1e31983df0b8c..e4d4c68109dea 100644
--- a/llvm/test/CodeGen/RISCV/rv32p.ll
+++ b/llvm/test/CodeGen/RISCV/rv32p.ll
@@ -725,3 +725,138 @@ define void @wmaccu_multiple_uses(i32 %a, i32 %b, i64 %c, ptr %out1, ptr %out2)
   store i64 %mul, ptr %out2
   ret void
 }
+
+; Test bitwise merge: (mask & b) | (~mask & a)
+define i32 @merge_i32(i32 %mask, i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: merge_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    merge a0, a1, a2
+; CHECK-NEXT:    ret
+  %and1 = and i32 %mask, %b
+  %not = xor i32 %mask, -1
+  %and2 = and i32 %not, %a
+  %or = or i32 %and1, %and2
+  ret i32 %or
+}
+
+; Test MERGE with swapped a/b arguments
+define i32 @merge_i32_2(i32 %mask, i32 %b, i32 %a) nounwind {
+; CHECK-LABEL: merge_i32_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    merge a0, a2, a1
+; CHECK-NEXT:    ret
+  %and1 = and i32 %mask, %b
+  %not = xor i32 %mask, -1
+  %and2 = and i32 %not, %a
+  %or = or i32 %and1, %and2
+  ret i32 %or
+}
+
+; Test MVM: result overwrites rs1 (%a)
+define i32 @mvm_i32(i32 %a, i32 %mask, i32 %b) nounwind {
+; CHECK-LABEL: mvm_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mvm a0, a2, a1
+; CHECK-NEXT:    ret
+  %and1 = and i32 %mask, %b
+  %not = xor i32 %mask, -1
+  %and2 = and i32 %not, %a
+  %or = or i32 %and1, %and2
+  ret i32 %or
+}
+
+; Test MVM with mask as last argument
+define i32 @mvm_i32_2(i32 %a, i32 %b, i32 %mask) nounwind {
+; CHECK-LABEL: mvm_i32_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mvm a0, a1, a2
+; CHECK-NEXT:    ret
+  %and1 = and i32 %mask, %b
+  %not = xor i32 %mask, -1
+  %and2 = and i32 %not, %a
+  %or = or i32 %and1, %and2
+  ret i32 %or
+}
+
+; Test MVMN: result overwrites rs2 (%b)
+define i32 @mvmn_i32(i32 %b, i32 %mask, i32 %a) nounwind {
+; CHECK-LABEL: mvmn_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mvmn a0, a2, a1
+; CHECK-NEXT:    ret
+  %and1 = and i32 %mask, %b
+  %not = xor i32 %mask, -1
+  %and2 = and i32 %not, %a
+  %or = or i32 %and1, %and2
+  ret i32 %or
+}
+
+; Test MVMN with mask as last argument
+define i32 @mvmn_i32_2(i32 %b, i32 %a, i32 %mask) nounwind {
+; CHECK-LABEL: mvmn_i32_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mvmn a0, a1, a2
+; CHECK-NEXT:    ret
+  %and1 = and i32 %mask, %b
+  %not = xor i32 %mask, -1
+  %and2 = and i32 %not, %a
+  %or = or i32 %and1, %and2
+  ret i32 %or
+}
+
+; Test case where none of the source operands can be overwritten,
+; requiring a mv before merge
+define i32 @merge_i32_mv(i32 %mask, i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: merge_i32_mv:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mv a3, a0
+; CHECK-NEXT:    merge a3, a1, a2
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    add a0, a3, a0
+; CHECK-NEXT:    add a0, a0, a2
+; CHECK-NEXT:    ret
+  %and1 = and i32 %mask, %b
+  %not = xor i32 %mask, -1
+  %and2 = and i32 %not, %a
+  %or = or i32 %and1, %and2
+  %sum1 = add i32 %or, %mask
+  %sum2 = add i32 %sum1, %a
+  %sum3 = add i32 %sum2, %b
+  ret i32 %sum3
+}
+
+; Test alternate merge pattern: (a ^ b) & mask ^ a
+define i32 @merge_xor_i32(i32 %mask, i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: merge_xor_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    merge a0, a1, a2
+; CHECK-NEXT:    ret
+  %xor1 = xor i32 %a, %b
+  %and = and i32 %xor1, %mask
+  %xor2 = xor i32 %and, %a
+  ret i32 %xor2
+}
+
+; Test alternate merge pattern with different argument order for MVM
+define i32 @mvm_xor_i32(i32 %a, i32 %mask, i32 %b) nounwind {
+; CHECK-LABEL: mvm_xor_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mvm a0, a2, a1
+; CHECK-NEXT:    ret
+  %xor1 = xor i32 %a, %b
+  %and = and i32 %xor1, %mask
+  %xor2 = xor i32 %and, %a
+  ret i32 %xor2
+}
+
+; Test alternate merge pattern with different argument order for MVMN
+define i32 @mvmn_xor_i32(i32 %b, i32 %mask, i32 %a) nounwind {
+; CHECK-LABEL: mvmn_xor_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mvmn a0, a2, a1
+; CHECK-NEXT:    ret
+  %xor1 = xor i32 %a, %b
+  %and = and i32 %xor1, %mask
+  %xor2 = xor i32 %and, %a
+  ret i32 %xor2
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64p.ll b/llvm/test/CodeGen/RISCV/rv64p.ll
index 2d6d615d9f7b9..53ca8476034a1 100644
--- a/llvm/test/CodeGen/RISCV/rv64p.ll
+++ b/llvm/test/CodeGen/RISCV/rv64p.ll
@@ -378,3 +378,138 @@ define i128 @srxi_i128(i128 %x) {
   %a = lshr i128 %x, 49
   ret i128 %a
 }
+
+; Test bitwise merge: (mask & b) | (~mask & a)
+define i64 @merge_i64(i64 %mask, i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: merge_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    merge a0, a1, a2
+; CHECK-NEXT:    ret
+  %and1 = and i64 %mask, %b
+  %not = xor i64 %mask, -1
+  %and2 = and i64 %not, %a
+  %or = or i64 %and1, %and2
+  ret i64 %or
+}
+
+; Test MERGE with swapped a/b arguments
+define i64 @merge_i64_2(i64 %mask, i64 %b, i64 %a) nounwind {
+; CHECK-LABEL: merge_i64_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    merge a0, a2, a1
+; CHECK-NEXT:    ret
+  %and1 = and i64 %mask, %b
+  %not = xor i64 %mask, -1
+  %and2 = and i64 %not, %a
+  %or = or i64 %and1, %and2
+  ret i64 %or
+}
+
+; Test MVM: result overwrites rs1 (%a)
+define i64 @mvm_i64(i64 %a, i64 %mask, i64 %b) nounwind {
+; CHECK-LABEL: mvm_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mvm a0, a2, a1
+; CHECK-NEXT:    ret
+  %and1 = and i64 %mask, %b
+  %not = xor i64 %mask, -1
+  %and2 = and i64 %not, %a
+  %or = or i64 %and1, %and2
+  ret i64 %or
+}
+
+; Test MVM with mask as last argument
+define i64 @mvm_i64_2(i64 %a, i64 %b, i64 %mask) nounwind {
+; CHECK-LABEL: mvm_i64_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mvm a0, a1, a2
+; CHECK-NEXT:    ret
+  %and1 = and i64 %mask, %b
+  %not = xor i64 %mask, -1
+  %and2 = and i64 %not, %a
+  %or = or i64 %and1, %and2
+  ret i64 %or
+}
+
+; Test MVMN: result overwrites rs2 (%b)
+define i64 @mvmn_i64(i64 %b, i64 %mask, i64 %a) nounwind {
+; CHECK-LABEL: mvmn_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mvmn a0, a2, a1
+; CHECK-NEXT:    ret
+  %and1 = and i64 %mask, %b
+  %not = xor i64 %mask, -1
+  %and2 = and i64 %not, %a
+  %or = or i64 %and1, %and2
+  ret i64 %or
+}
+
+; Test MVMN with mask as last argument
+define i64 @mvmn_i64_2(i64 %b, i64 %a, i64 %mask) nounwind {
+; CHECK-LABEL: mvmn_i64_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mvmn a0, a1, a2
+; CHECK-NEXT:    ret
+  %and1 = and i64 %mask, %b
+  %not = xor i64 %mask, -1
+  %and2 = and i64 %not, %a
+  %or = or i64 %and1, %and2
+  ret i64 %or
+}
+
+; Test case where none of the source operands can be overwritten,
+; requiring a mv before merge
+define i64 @merge_i64_mv(i64 %mask, i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: merge_i64_mv:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mv a3, a0
+; CHECK-NEXT:    merge a3, a1, a2
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    add a0, a3, a0
+; CHECK-NEXT:    add a0, a0, a2
+; CHECK-NEXT:    ret
+  %and1 = and i64 %mask, %b
+  %not = xor i64 %mask, -1
+  %and2 = and i64 %not, %a
+  %or = or i64 %and1, %and2
+  %sum1 = add i64 %or, %mask
+  %sum2 = add i64 %sum1, %a
+  %sum3 = add i64 %sum2, %b
+  ret i64 %sum3
+}
+
+; Test alternate merge pattern: (a ^ b) & mask ^ a
+define i64 @merge_xor_i64(i64 %mask, i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: merge_xor_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    merge a0, a1, a2
+; CHECK-NEXT:    ret
+  %xor1 = xor i64 %a, %b
+  %and = and i64 %xor1, %mask
+  %xor2 = xor i64 %and, %a
+  ret i64 %xor2
+}
+
+; Test alternate merge pattern with different argument order for MVM
+define i64 @mvm_xor_i64(i64 %a, i64 %mask, i64 %b) nounwind {
+; CHECK-LABEL: mvm_xor_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mvm a0, a2, a1
+; CHECK-NEXT:    ret
+  %xor1 = xor i64 %a, %b
+  %and = and i64 %xor1, %mask
+  %xor2 = xor i64 %and, %a
+  ret i64 %xor2
+}
+
+; Test alternate merge pattern with different argument order for MVMN
+define i64 @mvmn_xor_i64(i64 %b, i64 %mask, i64 %a) nounwind {
+; CHECK-LABEL: mvmn_xor_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mvmn a0, a2, a1
+; CHECK-NEXT:    ret
+  %xor1 = xor i64 %a, %b
+  %and = and i64 %xor1, %mask
+  %xor2 = xor i64 %and, %a
+  ret i64 %xor2
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/180677