[llvm] [RISCV] Teach SelectAddrRegRegScale that ADD is commutable. (PR #149231)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 16 21:50:55 PDT 2025


https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/149231

>From bce9a7ac63d1555b80b41c6a501ddaf18f267089 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 16 Jul 2025 18:06:10 -0700
Subject: [PATCH 1/4] [RISCV] Teach SelectAddrRegRegScale that ADD is
 commutable.

Still need to add profitability checks.
---
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 26 +++++++++-----
 llvm/test/CodeGen/RISCV/xqcisls.ll          | 10 +++---
 llvm/test/CodeGen/RISCV/xtheadmemidx.ll     | 40 ++++++++++-----------
 3 files changed, 41 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 0f948b22759fe..0d2fac903d960 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3058,17 +3058,27 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
   };
 
   if (auto *C1 = dyn_cast<ConstantSDNode>(RHS)) {
+    // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
     if (LHS.getOpcode() == ISD::ADD &&
-        SelectShl(LHS.getOperand(0), Index, Scale) &&
         !isa<ConstantSDNode>(LHS.getOperand(1)) &&
         isInt<12>(C1->getSExtValue())) {
-      // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
-      SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
-                                                SDLoc(Addr), VT);
-      Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
-                                            LHS.getOperand(1), C1Val),
-                     0);
-      return true;
+      if (SelectShl(LHS.getOperand(1), Index, Scale)) {
+        SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
+                                                  SDLoc(Addr), VT);
+        Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
+                                              LHS.getOperand(0), C1Val),
+                       0);
+        return true;
+      }
+
+      if (SelectShl(LHS.getOperand(0), Index, Scale)) {
+        SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
+                                                  SDLoc(Addr), VT);
+        Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
+                                              LHS.getOperand(1), C1Val),
+                       0);
+        return true;
+      }
     }
 
     // Don't match add with constants.
diff --git a/llvm/test/CodeGen/RISCV/xqcisls.ll b/llvm/test/CodeGen/RISCV/xqcisls.ll
index 828a0760044aa..709dc4ce074dc 100644
--- a/llvm/test/CodeGen/RISCV/xqcisls.ll
+++ b/llvm/test/CodeGen/RISCV/xqcisls.ll
@@ -309,8 +309,8 @@ define i64 @lrd(ptr %a, i32 %b) {
 ; RV32IZBAXQCISLS-LABEL: lrd:
 ; RV32IZBAXQCISLS:       # %bb.0:
 ; RV32IZBAXQCISLS-NEXT:    qc.lrw a2, a0, a1, 3
-; RV32IZBAXQCISLS-NEXT:    sh3add a0, a1, a0
-; RV32IZBAXQCISLS-NEXT:    lw a1, 4(a0)
+; RV32IZBAXQCISLS-NEXT:    addi a0, a0, 4
+; RV32IZBAXQCISLS-NEXT:    qc.lrw a1, a0, a1, 3
 ; RV32IZBAXQCISLS-NEXT:    add a0, a2, a2
 ; RV32IZBAXQCISLS-NEXT:    sltu a2, a0, a2
 ; RV32IZBAXQCISLS-NEXT:    add a1, a1, a1
@@ -473,10 +473,10 @@ define void @srd(ptr %a, i32 %b, i64 %c) {
 ; RV32IZBAXQCISLS-NEXT:    add a4, a2, a2
 ; RV32IZBAXQCISLS-NEXT:    add a3, a3, a3
 ; RV32IZBAXQCISLS-NEXT:    sltu a2, a4, a2
-; RV32IZBAXQCISLS-NEXT:    add a2, a3, a2
-; RV32IZBAXQCISLS-NEXT:    sh3add a3, a1, a0
 ; RV32IZBAXQCISLS-NEXT:    qc.srw a4, a0, a1, 3
-; RV32IZBAXQCISLS-NEXT:    sw a2, 4(a3)
+; RV32IZBAXQCISLS-NEXT:    add a2, a3, a2
+; RV32IZBAXQCISLS-NEXT:    addi a0, a0, 4
+; RV32IZBAXQCISLS-NEXT:    qc.srw a2, a0, a1, 3
 ; RV32IZBAXQCISLS-NEXT:    ret
   %1 = add i64 %c, %c
   %2 = getelementptr i64, ptr %a, i32 %b
diff --git a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
index 578f51a957a75..fc20fcb371179 100644
--- a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
+++ b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
@@ -858,14 +858,13 @@ define i64 @lurwu(ptr %a, i32 %b) {
 define i64 @lrd(ptr %a, i64 %b) {
 ; RV32XTHEADMEMIDX-LABEL: lrd:
 ; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    slli a2, a1, 3
+; RV32XTHEADMEMIDX-NEXT:    th.lrw a2, a0, a1, 3
+; RV32XTHEADMEMIDX-NEXT:    addi a0, a0, 4
 ; RV32XTHEADMEMIDX-NEXT:    th.lrw a1, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT:    add a0, a0, a2
-; RV32XTHEADMEMIDX-NEXT:    lw a2, 4(a0)
-; RV32XTHEADMEMIDX-NEXT:    add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT:    add a1, a2, a1
+; RV32XTHEADMEMIDX-NEXT:    add a0, a2, a2
+; RV32XTHEADMEMIDX-NEXT:    sltu a2, a0, a2
+; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a1
+; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a2
 ; RV32XTHEADMEMIDX-NEXT:    ret
 ;
 ; RV64XTHEADMEMIDX-LABEL: lrd:
@@ -908,14 +907,13 @@ define i64 @lrd_2(ptr %a, i64 %b) {
 define i64 @lurd(ptr %a, i32 %b) {
 ; RV32XTHEADMEMIDX-LABEL: lurd:
 ; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    slli a2, a1, 3
+; RV32XTHEADMEMIDX-NEXT:    th.lrw a2, a0, a1, 3
+; RV32XTHEADMEMIDX-NEXT:    addi a0, a0, 4
 ; RV32XTHEADMEMIDX-NEXT:    th.lrw a1, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT:    add a0, a0, a2
-; RV32XTHEADMEMIDX-NEXT:    lw a2, 4(a0)
-; RV32XTHEADMEMIDX-NEXT:    add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT:    add a1, a2, a1
+; RV32XTHEADMEMIDX-NEXT:    add a0, a2, a2
+; RV32XTHEADMEMIDX-NEXT:    sltu a2, a0, a2
+; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a1
+; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a2
 ; RV32XTHEADMEMIDX-NEXT:    ret
 ;
 ; RV64XTHEADMEMIDX-LABEL: lurd:
@@ -1047,11 +1045,10 @@ define void @srd(ptr %a, i64 %b, i64 %c) {
 ; RV32XTHEADMEMIDX-NEXT:    add a2, a3, a3
 ; RV32XTHEADMEMIDX-NEXT:    add a4, a4, a4
 ; RV32XTHEADMEMIDX-NEXT:    sltu a3, a2, a3
-; RV32XTHEADMEMIDX-NEXT:    add a3, a4, a3
-; RV32XTHEADMEMIDX-NEXT:    slli a4, a1, 3
-; RV32XTHEADMEMIDX-NEXT:    add a4, a0, a4
 ; RV32XTHEADMEMIDX-NEXT:    th.srw a2, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT:    sw a3, 4(a4)
+; RV32XTHEADMEMIDX-NEXT:    add a3, a4, a3
+; RV32XTHEADMEMIDX-NEXT:    addi a0, a0, 4
+; RV32XTHEADMEMIDX-NEXT:    th.srw a3, a0, a1, 3
 ; RV32XTHEADMEMIDX-NEXT:    ret
 ;
 ; RV64XTHEADMEMIDX-LABEL: srd:
@@ -1071,11 +1068,10 @@ define void @surd(ptr %a, i32 %b, i64 %c) {
 ; RV32XTHEADMEMIDX-NEXT:    add a4, a2, a2
 ; RV32XTHEADMEMIDX-NEXT:    add a3, a3, a3
 ; RV32XTHEADMEMIDX-NEXT:    sltu a2, a4, a2
-; RV32XTHEADMEMIDX-NEXT:    add a2, a3, a2
-; RV32XTHEADMEMIDX-NEXT:    slli a3, a1, 3
-; RV32XTHEADMEMIDX-NEXT:    add a3, a0, a3
 ; RV32XTHEADMEMIDX-NEXT:    th.srw a4, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT:    sw a2, 4(a3)
+; RV32XTHEADMEMIDX-NEXT:    add a2, a3, a2
+; RV32XTHEADMEMIDX-NEXT:    addi a0, a0, 4
+; RV32XTHEADMEMIDX-NEXT:    th.srw a2, a0, a1, 3
 ; RV32XTHEADMEMIDX-NEXT:    ret
 ;
 ; RV64XTHEADMEMIDX-LABEL: surd:

>From be6ac96b8a8d13a054d0c51ec0be690d6a37c5ea Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 16 Jul 2025 21:39:33 -0700
Subject: [PATCH 2/4] Pre-commit tests.

---
 llvm/test/CodeGen/RISCV/xtheadmemidx.ll | 1252 +++++++++++------------
 1 file changed, 587 insertions(+), 665 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
index fc20fcb371179..4d039977a73f8 100644
--- a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
+++ b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
@@ -1,23 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d -mattr=+xtheadmemidx -mattr=+m -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefix=RV32XTHEADMEMIDX
-; RUN: llc -mtriple=riscv64 -mattr=+d -mattr=+xtheadmemidx -mattr=+m -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefix=RV64XTHEADMEMIDX
+; RUN: llc -mtriple=riscv32 -mattr=+xtheadmemidx -mattr=+m -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=CHECK,RV32,RV32XTHEADMEMIDX
+; RUN: llc -mtriple=riscv64 -mattr=+xtheadmemidx -mattr=+m -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=CHECK,RV64,RV64XTHEADMEMIDX
+; RUN: llc -mtriple=riscv32 -mattr=+xtheadmemidx,+xtheadba -mattr=+m -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=CHECK,RV32,RV32XTHEADBA
+; RUN: llc -mtriple=riscv64 -mattr=+xtheadmemidx,+xtheadba -mattr=+m -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=CHECK,RV64,RV64XTHEADBA
 
 define ptr @lbia(ptr %base, ptr %addr.2, i8 %a) {
-; RV32XTHEADMEMIDX-LABEL: lbia:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lbia a3, (a0), -1, 0
-; RV32XTHEADMEMIDX-NEXT:    add a2, a3, a2
-; RV32XTHEADMEMIDX-NEXT:    sb a2, 0(a1)
-; RV32XTHEADMEMIDX-NEXT:    ret
-;
-; RV64XTHEADMEMIDX-LABEL: lbia:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lbia a3, (a0), -1, 0
-; RV64XTHEADMEMIDX-NEXT:    add a2, a3, a2
-; RV64XTHEADMEMIDX-NEXT:    sb a2, 0(a1)
-; RV64XTHEADMEMIDX-NEXT:    ret
+; CHECK-LABEL: lbia:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    th.lbia a3, (a0), -1, 0
+; CHECK-NEXT:    add a2, a3, a2
+; CHECK-NEXT:    sb a2, 0(a1)
+; CHECK-NEXT:    ret
   %addr = getelementptr i8, ptr %base, i8 0
   %ld = load i8, ptr %addr
   %addr.1 = getelementptr i8, ptr %base, i8 -1
@@ -27,19 +24,12 @@ define ptr @lbia(ptr %base, ptr %addr.2, i8 %a) {
 }
 
 define ptr @lbib(ptr %base, i8 %a) {
-; RV32XTHEADMEMIDX-LABEL: lbib:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lbib a2, (a0), 1, 0
-; RV32XTHEADMEMIDX-NEXT:    add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT:    sb a1, 1(a0)
-; RV32XTHEADMEMIDX-NEXT:    ret
-;
-; RV64XTHEADMEMIDX-LABEL: lbib:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lbib a2, (a0), 1, 0
-; RV64XTHEADMEMIDX-NEXT:    add a1, a2, a1
-; RV64XTHEADMEMIDX-NEXT:    sb a1, 1(a0)
-; RV64XTHEADMEMIDX-NEXT:    ret
+; CHECK-LABEL: lbib:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    th.lbib a2, (a0), 1, 0
+; CHECK-NEXT:    add a1, a2, a1
+; CHECK-NEXT:    sb a1, 1(a0)
+; CHECK-NEXT:    ret
   %addr = getelementptr i8, ptr %base, i8 1
   %ld = load i8, ptr %addr
   %addr.1 = getelementptr i8, ptr %base, i8 2
@@ -49,22 +39,22 @@ define ptr @lbib(ptr %base, i8 %a) {
 }
 
 define ptr @lbuia(ptr %base, ptr %addr.2, i64 %a) {
-; RV32XTHEADMEMIDX-LABEL: lbuia:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lbuia a4, (a0), -1, 0
-; RV32XTHEADMEMIDX-NEXT:    add a2, a4, a2
-; RV32XTHEADMEMIDX-NEXT:    sltu a4, a2, a4
-; RV32XTHEADMEMIDX-NEXT:    add a3, a3, a4
-; RV32XTHEADMEMIDX-NEXT:    sw a2, 0(a1)
-; RV32XTHEADMEMIDX-NEXT:    sw a3, 4(a1)
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lbuia:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lbuia a4, (a0), -1, 0
+; RV32-NEXT:    add a2, a4, a2
+; RV32-NEXT:    sltu a4, a2, a4
+; RV32-NEXT:    add a3, a3, a4
+; RV32-NEXT:    sw a2, 0(a1)
+; RV32-NEXT:    sw a3, 4(a1)
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lbuia:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lbuia a3, (a0), -1, 0
-; RV64XTHEADMEMIDX-NEXT:    add a2, a3, a2
-; RV64XTHEADMEMIDX-NEXT:    sd a2, 0(a1)
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lbuia:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lbuia a3, (a0), -1, 0
+; RV64-NEXT:    add a2, a3, a2
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
   %addr = getelementptr i8, ptr %base, i8 0
   %ld = load i8, ptr %addr
   %zext = zext i8 %ld to i64
@@ -75,22 +65,22 @@ define ptr @lbuia(ptr %base, ptr %addr.2, i64 %a) {
 }
 
 define ptr @lbuib(ptr %base, i64 %a, ptr %addr.1) {
-; RV32XTHEADMEMIDX-LABEL: lbuib:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lbuib a4, (a0), 1, 0
-; RV32XTHEADMEMIDX-NEXT:    add a1, a4, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a4, a1, a4
-; RV32XTHEADMEMIDX-NEXT:    add a2, a2, a4
-; RV32XTHEADMEMIDX-NEXT:    sw a1, 0(a3)
-; RV32XTHEADMEMIDX-NEXT:    sw a2, 4(a3)
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lbuib:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lbuib a4, (a0), 1, 0
+; RV32-NEXT:    add a1, a4, a1
+; RV32-NEXT:    sltu a4, a1, a4
+; RV32-NEXT:    add a2, a2, a4
+; RV32-NEXT:    sw a1, 0(a3)
+; RV32-NEXT:    sw a2, 4(a3)
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lbuib:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lbuib a3, (a0), 1, 0
-; RV64XTHEADMEMIDX-NEXT:    add a1, a3, a1
-; RV64XTHEADMEMIDX-NEXT:    sd a1, 0(a2)
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lbuib:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lbuib a3, (a0), 1, 0
+; RV64-NEXT:    add a1, a3, a1
+; RV64-NEXT:    sd a1, 0(a2)
+; RV64-NEXT:    ret
   %addr = getelementptr i8, ptr %base, i8 1
   %ld = load i8, ptr %addr
   %zext = zext i8 %ld to i64
@@ -100,19 +90,12 @@ define ptr @lbuib(ptr %base, i64 %a, ptr %addr.1) {
 }
 
 define ptr @lhia(ptr %base, ptr %addr.2, i16 %a) {
-; RV32XTHEADMEMIDX-LABEL: lhia:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lhia a3, (a0), -16, 1
-; RV32XTHEADMEMIDX-NEXT:    add a2, a3, a2
-; RV32XTHEADMEMIDX-NEXT:    sh a2, 0(a1)
-; RV32XTHEADMEMIDX-NEXT:    ret
-;
-; RV64XTHEADMEMIDX-LABEL: lhia:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lhia a3, (a0), -16, 1
-; RV64XTHEADMEMIDX-NEXT:    add a2, a3, a2
-; RV64XTHEADMEMIDX-NEXT:    sh a2, 0(a1)
-; RV64XTHEADMEMIDX-NEXT:    ret
+; CHECK-LABEL: lhia:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    th.lhia a3, (a0), -16, 1
+; CHECK-NEXT:    add a2, a3, a2
+; CHECK-NEXT:    sh a2, 0(a1)
+; CHECK-NEXT:    ret
   %addr = getelementptr i16, ptr %base, i16 0
   %ld = load i16, ptr %addr
   %addr.1 = getelementptr i16, ptr %base, i16 -16
@@ -122,19 +105,12 @@ define ptr @lhia(ptr %base, ptr %addr.2, i16 %a) {
 }
 
 define ptr @lhib(ptr %base, i16 %a) {
-; RV32XTHEADMEMIDX-LABEL: lhib:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lhib a2, (a0), 2, 0
-; RV32XTHEADMEMIDX-NEXT:    add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT:    sh a1, 2(a0)
-; RV32XTHEADMEMIDX-NEXT:    ret
-;
-; RV64XTHEADMEMIDX-LABEL: lhib:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lhib a2, (a0), 2, 0
-; RV64XTHEADMEMIDX-NEXT:    add a1, a2, a1
-; RV64XTHEADMEMIDX-NEXT:    sh a1, 2(a0)
-; RV64XTHEADMEMIDX-NEXT:    ret
+; CHECK-LABEL: lhib:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    th.lhib a2, (a0), 2, 0
+; CHECK-NEXT:    add a1, a2, a1
+; CHECK-NEXT:    sh a1, 2(a0)
+; CHECK-NEXT:    ret
   %addr = getelementptr i16, ptr %base, i16 1
   %ld = load i16, ptr %addr
   %addr.1 = getelementptr i16, ptr %base, i16 2
@@ -144,22 +120,22 @@ define ptr @lhib(ptr %base, i16 %a) {
 }
 
 define ptr @lhuia(ptr %base, ptr %addr.2, i64 %a) {
-; RV32XTHEADMEMIDX-LABEL: lhuia:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lhuia a4, (a0), -16, 1
-; RV32XTHEADMEMIDX-NEXT:    add a2, a4, a2
-; RV32XTHEADMEMIDX-NEXT:    sltu a4, a2, a4
-; RV32XTHEADMEMIDX-NEXT:    add a3, a3, a4
-; RV32XTHEADMEMIDX-NEXT:    sw a2, 0(a1)
-; RV32XTHEADMEMIDX-NEXT:    sw a3, 4(a1)
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lhuia:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lhuia a4, (a0), -16, 1
+; RV32-NEXT:    add a2, a4, a2
+; RV32-NEXT:    sltu a4, a2, a4
+; RV32-NEXT:    add a3, a3, a4
+; RV32-NEXT:    sw a2, 0(a1)
+; RV32-NEXT:    sw a3, 4(a1)
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lhuia:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lhuia a3, (a0), -16, 1
-; RV64XTHEADMEMIDX-NEXT:    add a2, a3, a2
-; RV64XTHEADMEMIDX-NEXT:    sd a2, 0(a1)
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lhuia:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lhuia a3, (a0), -16, 1
+; RV64-NEXT:    add a2, a3, a2
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
   %addr = getelementptr i16, ptr %base, i16 0
   %ld = load i16, ptr %addr
   %zext = zext i16 %ld to i64
@@ -170,22 +146,22 @@ define ptr @lhuia(ptr %base, ptr %addr.2, i64 %a) {
 }
 
 define ptr @lhuib(ptr %base, i64 %a, ptr %addr.1) {
-; RV32XTHEADMEMIDX-LABEL: lhuib:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lhuib a4, (a0), 2, 0
-; RV32XTHEADMEMIDX-NEXT:    add a1, a4, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a4, a1, a4
-; RV32XTHEADMEMIDX-NEXT:    add a2, a2, a4
-; RV32XTHEADMEMIDX-NEXT:    sw a1, 0(a3)
-; RV32XTHEADMEMIDX-NEXT:    sw a2, 4(a3)
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lhuib:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lhuib a4, (a0), 2, 0
+; RV32-NEXT:    add a1, a4, a1
+; RV32-NEXT:    sltu a4, a1, a4
+; RV32-NEXT:    add a2, a2, a4
+; RV32-NEXT:    sw a1, 0(a3)
+; RV32-NEXT:    sw a2, 4(a3)
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lhuib:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lhuib a3, (a0), 2, 0
-; RV64XTHEADMEMIDX-NEXT:    add a1, a3, a1
-; RV64XTHEADMEMIDX-NEXT:    sd a1, 0(a2)
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lhuib:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lhuib a3, (a0), 2, 0
+; RV64-NEXT:    add a1, a3, a1
+; RV64-NEXT:    sd a1, 0(a2)
+; RV64-NEXT:    ret
   %addr = getelementptr i16, ptr %base, i16 1
   %ld = load i16, ptr %addr
   %zext = zext i16 %ld to i64
@@ -195,19 +171,12 @@ define ptr @lhuib(ptr %base, i64 %a, ptr %addr.1) {
 }
 
 define ptr @lwia(ptr %base, ptr %addr.2, i32 %a) {
-; RV32XTHEADMEMIDX-LABEL: lwia:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lwia a3, (a0), -16, 2
-; RV32XTHEADMEMIDX-NEXT:    add a2, a3, a2
-; RV32XTHEADMEMIDX-NEXT:    sw a2, 0(a1)
-; RV32XTHEADMEMIDX-NEXT:    ret
-;
-; RV64XTHEADMEMIDX-LABEL: lwia:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lwia a3, (a0), -16, 2
-; RV64XTHEADMEMIDX-NEXT:    add a2, a3, a2
-; RV64XTHEADMEMIDX-NEXT:    sw a2, 0(a1)
-; RV64XTHEADMEMIDX-NEXT:    ret
+; CHECK-LABEL: lwia:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    th.lwia a3, (a0), -16, 2
+; CHECK-NEXT:    add a2, a3, a2
+; CHECK-NEXT:    sw a2, 0(a1)
+; CHECK-NEXT:    ret
   %addr = getelementptr i32, ptr %base, i32 0
   %ld = load i32, ptr %addr
   %addr.1 = getelementptr i32, ptr %base, i32 -16
@@ -217,19 +186,12 @@ define ptr @lwia(ptr %base, ptr %addr.2, i32 %a) {
 }
 
 define ptr @lwib(ptr %base, i32 %a) {
-; RV32XTHEADMEMIDX-LABEL: lwib:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lwib a2, (a0), 4, 0
-; RV32XTHEADMEMIDX-NEXT:    add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT:    sw a1, 4(a0)
-; RV32XTHEADMEMIDX-NEXT:    ret
-;
-; RV64XTHEADMEMIDX-LABEL: lwib:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lwib a2, (a0), 4, 0
-; RV64XTHEADMEMIDX-NEXT:    add a1, a2, a1
-; RV64XTHEADMEMIDX-NEXT:    sw a1, 4(a0)
-; RV64XTHEADMEMIDX-NEXT:    ret
+; CHECK-LABEL: lwib:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    th.lwib a2, (a0), 4, 0
+; CHECK-NEXT:    add a1, a2, a1
+; CHECK-NEXT:    sw a1, 4(a0)
+; CHECK-NEXT:    ret
   %addr = getelementptr i32, ptr %base, i32 1
   %ld = load i32, ptr %addr
   %addr.1 = getelementptr i32, ptr %base, i32 2
@@ -239,22 +201,22 @@ define ptr @lwib(ptr %base, i32 %a) {
 }
 
 define ptr @lwuia(ptr %base, ptr %addr.2, i64 %a) {
-; RV32XTHEADMEMIDX-LABEL: lwuia:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lwia a4, (a0), -16, 2
-; RV32XTHEADMEMIDX-NEXT:    add a2, a4, a2
-; RV32XTHEADMEMIDX-NEXT:    sltu a4, a2, a4
-; RV32XTHEADMEMIDX-NEXT:    add a3, a3, a4
-; RV32XTHEADMEMIDX-NEXT:    sw a2, 0(a1)
-; RV32XTHEADMEMIDX-NEXT:    sw a3, 4(a1)
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lwuia:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lwia a4, (a0), -16, 2
+; RV32-NEXT:    add a2, a4, a2
+; RV32-NEXT:    sltu a4, a2, a4
+; RV32-NEXT:    add a3, a3, a4
+; RV32-NEXT:    sw a2, 0(a1)
+; RV32-NEXT:    sw a3, 4(a1)
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lwuia:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lwuia a3, (a0), -16, 2
-; RV64XTHEADMEMIDX-NEXT:    add a2, a3, a2
-; RV64XTHEADMEMIDX-NEXT:    sd a2, 0(a1)
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lwuia:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lwuia a3, (a0), -16, 2
+; RV64-NEXT:    add a2, a3, a2
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
   %addr = getelementptr i32, ptr %base, i32 0
   %ld = load i32, ptr %addr
   %zext = zext i32 %ld to i64
@@ -265,22 +227,22 @@ define ptr @lwuia(ptr %base, ptr %addr.2, i64 %a) {
 }
 
 define ptr @lwuib(ptr %base, i64 %a, ptr %addr.1) {
-; RV32XTHEADMEMIDX-LABEL: lwuib:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lwib a4, (a0), 4, 0
-; RV32XTHEADMEMIDX-NEXT:    add a1, a4, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a4, a1, a4
-; RV32XTHEADMEMIDX-NEXT:    add a2, a2, a4
-; RV32XTHEADMEMIDX-NEXT:    sw a1, 0(a3)
-; RV32XTHEADMEMIDX-NEXT:    sw a2, 4(a3)
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lwuib:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lwib a4, (a0), 4, 0
+; RV32-NEXT:    add a1, a4, a1
+; RV32-NEXT:    sltu a4, a1, a4
+; RV32-NEXT:    add a2, a2, a4
+; RV32-NEXT:    sw a1, 0(a3)
+; RV32-NEXT:    sw a2, 4(a3)
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lwuib:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lwuib a3, (a0), 4, 0
-; RV64XTHEADMEMIDX-NEXT:    add a1, a3, a1
-; RV64XTHEADMEMIDX-NEXT:    sd a1, 0(a2)
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lwuib:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lwuib a3, (a0), 4, 0
+; RV64-NEXT:    add a1, a3, a1
+; RV64-NEXT:    sd a1, 0(a2)
+; RV64-NEXT:    ret
   %addr = getelementptr i32, ptr %base, i32 1
   %ld = load i32, ptr %addr
   %zext = zext i32 %ld to i64
@@ -290,25 +252,25 @@ define ptr @lwuib(ptr %base, i64 %a, ptr %addr.1) {
 }
 
 define ptr @ldia(ptr %base, ptr %addr.2, i64 %a) {
-; RV32XTHEADMEMIDX-LABEL: ldia:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    lw a4, 4(a0)
-; RV32XTHEADMEMIDX-NEXT:    lw a5, 0(a0)
-; RV32XTHEADMEMIDX-NEXT:    addi a0, a0, -128
-; RV32XTHEADMEMIDX-NEXT:    add a3, a4, a3
-; RV32XTHEADMEMIDX-NEXT:    add a2, a5, a2
-; RV32XTHEADMEMIDX-NEXT:    sltu a4, a2, a5
-; RV32XTHEADMEMIDX-NEXT:    add a3, a3, a4
-; RV32XTHEADMEMIDX-NEXT:    sw a2, 0(a1)
-; RV32XTHEADMEMIDX-NEXT:    sw a3, 4(a1)
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: ldia:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a4, 4(a0)
+; RV32-NEXT:    lw a5, 0(a0)
+; RV32-NEXT:    addi a0, a0, -128
+; RV32-NEXT:    add a3, a4, a3
+; RV32-NEXT:    add a2, a5, a2
+; RV32-NEXT:    sltu a4, a2, a5
+; RV32-NEXT:    add a3, a3, a4
+; RV32-NEXT:    sw a2, 0(a1)
+; RV32-NEXT:    sw a3, 4(a1)
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: ldia:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.ldia a3, (a0), -16, 3
-; RV64XTHEADMEMIDX-NEXT:    add a2, a3, a2
-; RV64XTHEADMEMIDX-NEXT:    sd a2, 0(a1)
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: ldia:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.ldia a3, (a0), -16, 3
+; RV64-NEXT:    add a2, a3, a2
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
   %addr = getelementptr i64, ptr %base, i64 0
   %ld = load i64, ptr %addr
   %addr.1 = getelementptr i64, ptr %base, i64 -16
@@ -318,24 +280,24 @@ define ptr @ldia(ptr %base, ptr %addr.2, i64 %a) {
 }
 
 define ptr @ldib(ptr %base, i64 %a) {
-; RV32XTHEADMEMIDX-LABEL: ldib:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lwib a3, (a0), 8, 0
-; RV32XTHEADMEMIDX-NEXT:    lw a4, 4(a0)
-; RV32XTHEADMEMIDX-NEXT:    add a1, a3, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a3, a1, a3
-; RV32XTHEADMEMIDX-NEXT:    add a2, a2, a3
-; RV32XTHEADMEMIDX-NEXT:    add a2, a4, a2
-; RV32XTHEADMEMIDX-NEXT:    sw a1, 8(a0)
-; RV32XTHEADMEMIDX-NEXT:    sw a2, 12(a0)
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: ldib:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lwib a3, (a0), 8, 0
+; RV32-NEXT:    lw a4, 4(a0)
+; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    sltu a3, a1, a3
+; RV32-NEXT:    add a2, a2, a3
+; RV32-NEXT:    add a2, a4, a2
+; RV32-NEXT:    sw a1, 8(a0)
+; RV32-NEXT:    sw a2, 12(a0)
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: ldib:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.ldib a2, (a0), 8, 0
-; RV64XTHEADMEMIDX-NEXT:    add a1, a2, a1
-; RV64XTHEADMEMIDX-NEXT:    sd a1, 8(a0)
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: ldib:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.ldib a2, (a0), 8, 0
+; RV64-NEXT:    add a1, a2, a1
+; RV64-NEXT:    sd a1, 8(a0)
+; RV64-NEXT:    ret
   %addr = getelementptr i64, ptr %base, i64 1
   %ld = load i64, ptr %addr
   %addr.1 = getelementptr i64, ptr %base, i64 2
@@ -345,17 +307,11 @@ define ptr @ldib(ptr %base, i64 %a) {
 }
 
 define ptr @sbia(ptr %base, i8 %a, i8 %b) {
-; RV32XTHEADMEMIDX-LABEL: sbia:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT:    th.sbia a1, (a0), 1, 0
-; RV32XTHEADMEMIDX-NEXT:    ret
-;
-; RV64XTHEADMEMIDX-LABEL: sbia:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT:    th.sbia a1, (a0), 1, 0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; CHECK-LABEL: sbia:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    add a1, a1, a2
+; CHECK-NEXT:    th.sbia a1, (a0), 1, 0
+; CHECK-NEXT:    ret
   %addr.1 = getelementptr i8, ptr %base, i8 1
   %res = add i8 %a, %b
   store i8 %res, ptr %base
@@ -363,17 +319,11 @@ define ptr @sbia(ptr %base, i8 %a, i8 %b) {
 }
 
 define ptr @sbib(ptr %base, i8 %a, i8 %b) {
-; RV32XTHEADMEMIDX-LABEL: sbib:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT:    th.sbib a1, (a0), 1, 0
-; RV32XTHEADMEMIDX-NEXT:    ret
-;
-; RV64XTHEADMEMIDX-LABEL: sbib:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT:    th.sbib a1, (a0), 1, 0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; CHECK-LABEL: sbib:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    add a1, a1, a2
+; CHECK-NEXT:    th.sbib a1, (a0), 1, 0
+; CHECK-NEXT:    ret
   %addr.1 = getelementptr i8, ptr %base, i8 1
   %res = add i8 %a, %b
   store i8 %res, ptr %addr.1
@@ -381,17 +331,11 @@ define ptr @sbib(ptr %base, i8 %a, i8 %b) {
 }
 
 define ptr @shia(ptr %base, i16 %a, i16 %b) {
-; RV32XTHEADMEMIDX-LABEL: shia:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT:    th.shia a1, (a0), -9, 1
-; RV32XTHEADMEMIDX-NEXT:    ret
-;
-; RV64XTHEADMEMIDX-LABEL: shia:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT:    th.shia a1, (a0), -9, 1
-; RV64XTHEADMEMIDX-NEXT:    ret
+; CHECK-LABEL: shia:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    add a1, a1, a2
+; CHECK-NEXT:    th.shia a1, (a0), -9, 1
+; CHECK-NEXT:    ret
   %addr.1 = getelementptr i16, ptr %base, i16 -9
   %res = add i16 %a, %b
   store i16 %res, ptr %base
@@ -399,17 +343,11 @@ define ptr @shia(ptr %base, i16 %a, i16 %b) {
 }
 
 define ptr @shib(ptr %base, i16 %a, i16 %b) {
-; RV32XTHEADMEMIDX-LABEL: shib:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT:    th.shib a1, (a0), 2, 0
-; RV32XTHEADMEMIDX-NEXT:    ret
-;
-; RV64XTHEADMEMIDX-LABEL: shib:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT:    th.shib a1, (a0), 2, 0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; CHECK-LABEL: shib:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    add a1, a1, a2
+; CHECK-NEXT:    th.shib a1, (a0), 2, 0
+; CHECK-NEXT:    ret
   %addr.1 = getelementptr i16, ptr %base, i16 1
   %res = add i16 %a, %b
   store i16 %res, ptr %addr.1
@@ -417,17 +355,11 @@ define ptr @shib(ptr %base, i16 %a, i16 %b) {
 }
 
 define ptr @swia(ptr %base, i32 %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: swia:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT:    th.swia a1, (a0), 8, 2
-; RV32XTHEADMEMIDX-NEXT:    ret
-;
-; RV64XTHEADMEMIDX-LABEL: swia:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT:    th.swia a1, (a0), 8, 2
-; RV64XTHEADMEMIDX-NEXT:    ret
+; CHECK-LABEL: swia:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    add a1, a1, a2
+; CHECK-NEXT:    th.swia a1, (a0), 8, 2
+; CHECK-NEXT:    ret
   %addr.1 = getelementptr i32, ptr %base, i32 8
   %res = add i32 %a, %b
   store i32 %res, ptr %base
@@ -435,17 +367,11 @@ define ptr @swia(ptr %base, i32 %a, i32 %b) {
 }
 
 define ptr @swib(ptr %base, i32 %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: swib:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT:    th.swib a1, (a0), -13, 3
-; RV32XTHEADMEMIDX-NEXT:    ret
-;
-; RV64XTHEADMEMIDX-LABEL: swib:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT:    th.swib a1, (a0), -13, 3
-; RV64XTHEADMEMIDX-NEXT:    ret
+; CHECK-LABEL: swib:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    add a1, a1, a2
+; CHECK-NEXT:    th.swib a1, (a0), -13, 3
+; CHECK-NEXT:    ret
   %addr.1 = getelementptr i32, ptr %base, i32 -26
   %res = add i32 %a, %b
   store i32 %res, ptr %addr.1
@@ -453,23 +379,23 @@ define ptr @swib(ptr %base, i32 %a, i32 %b) {
 }
 
 define ptr @sdia(ptr %base, i64 %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: sdia:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    addi a5, a0, 64
-; RV32XTHEADMEMIDX-NEXT:    add a2, a2, a4
-; RV32XTHEADMEMIDX-NEXT:    add a3, a1, a3
-; RV32XTHEADMEMIDX-NEXT:    sltu a1, a3, a1
-; RV32XTHEADMEMIDX-NEXT:    add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT:    sw a3, 0(a0)
-; RV32XTHEADMEMIDX-NEXT:    sw a1, 4(a0)
-; RV32XTHEADMEMIDX-NEXT:    mv a0, a5
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: sdia:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi a5, a0, 64
+; RV32-NEXT:    add a2, a2, a4
+; RV32-NEXT:    add a3, a1, a3
+; RV32-NEXT:    sltu a1, a3, a1
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    sw a3, 0(a0)
+; RV32-NEXT:    sw a1, 4(a0)
+; RV32-NEXT:    mv a0, a5
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: sdia:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT:    th.sdia a1, (a0), 8, 3
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: sdia:
+; RV64:       # %bb.0:
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    th.sdia a1, (a0), 8, 3
+; RV64-NEXT:    ret
   %addr.1 = getelementptr i64, ptr %base, i64 8
   %res = add i64 %a, %b
   store i64 %res, ptr %base
@@ -477,21 +403,21 @@ define ptr @sdia(ptr %base, i64 %a, i64 %b) {
 }
 
 define ptr @sdib(ptr %base, i64 %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: sdib:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    add a2, a2, a4
-; RV32XTHEADMEMIDX-NEXT:    add a3, a1, a3
-; RV32XTHEADMEMIDX-NEXT:    sltu a1, a3, a1
-; RV32XTHEADMEMIDX-NEXT:    add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT:    th.swib a3, (a0), 8, 0
-; RV32XTHEADMEMIDX-NEXT:    sw a1, 4(a0)
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: sdib:
+; RV32:       # %bb.0:
+; RV32-NEXT:    add a2, a2, a4
+; RV32-NEXT:    add a3, a1, a3
+; RV32-NEXT:    sltu a1, a3, a1
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    th.swib a3, (a0), 8, 0
+; RV32-NEXT:    sw a1, 4(a0)
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: sdib:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT:    th.sdib a1, (a0), 8, 0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: sdib:
+; RV64:       # %bb.0:
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    th.sdib a1, (a0), 8, 0
+; RV64-NEXT:    ret
   %addr.1 = getelementptr i64, ptr %base, i64 1
   %res = add i64 %a, %b
   store i64 %res, ptr %addr.1
@@ -499,36 +425,31 @@ define ptr @sdib(ptr %base, i64 %a, i64 %b) {
 }
 
 define i8 @lrb_anyext(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrb_anyext:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrb a0, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT:    ret
-;
-; RV64XTHEADMEMIDX-LABEL: lrb_anyext:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lrb a0, a0, a1, 0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; CHECK-LABEL: lrb_anyext:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    th.lrb a0, a0, a1, 0
+; CHECK-NEXT:    ret
   %1 = getelementptr i8, ptr %a, i64 %b
   %2 = load i8, ptr %1, align 1
   ret i8 %2
 }
 
 define i64 @lrb(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrb:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrb a1, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT:    srai a2, a1, 31
-; RV32XTHEADMEMIDX-NEXT:    add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT:    add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lrb:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lrb a1, a0, a1, 0
+; RV32-NEXT:    srai a2, a1, 31
+; RV32-NEXT:    add a0, a1, a1
+; RV32-NEXT:    sltu a1, a0, a1
+; RV32-NEXT:    add a2, a2, a2
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lrb:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lrb a0, a0, a1, 0
-; RV64XTHEADMEMIDX-NEXT:    add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lrb:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lrb a0, a0, a1, 0
+; RV64-NEXT:    add a0, a0, a0
+; RV64-NEXT:    ret
   %1 = getelementptr i8, ptr %a, i64 %b
   %2 = load i8, ptr %1, align 1
   %3 = sext i8 %2 to i64
@@ -537,15 +458,15 @@ define i64 @lrb(ptr %a, i64 %b) {
 }
 
 define i8 @lurb_anyext(ptr %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: lurb_anyext:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrb a0, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lurb_anyext:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lrb a0, a0, a1, 0
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lurb_anyext:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lurb a0, a0, a1, 0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lurb_anyext:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lurb a0, a0, a1, 0
+; RV64-NEXT:    ret
   %1 = zext i32 %b to i64
   %2 = getelementptr i8, ptr %a, i64 %1
   %3 = load i8, ptr %2, align 1
@@ -553,21 +474,21 @@ define i8 @lurb_anyext(ptr %a, i32 %b) {
 }
 
 define i64 @lurb(ptr %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: lurb:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrb a1, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT:    srai a2, a1, 31
-; RV32XTHEADMEMIDX-NEXT:    add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT:    add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lurb:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lrb a1, a0, a1, 0
+; RV32-NEXT:    srai a2, a1, 31
+; RV32-NEXT:    add a0, a1, a1
+; RV32-NEXT:    sltu a1, a0, a1
+; RV32-NEXT:    add a2, a2, a2
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lurb:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lurb a0, a0, a1, 0
-; RV64XTHEADMEMIDX-NEXT:    add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lurb:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lurb a0, a0, a1, 0
+; RV64-NEXT:    add a0, a0, a0
+; RV64-NEXT:    ret
   %1 = zext i32 %b to i64
   %2 = getelementptr i8, ptr %a, i64 %1
   %3 = load i8, ptr %2, align 1
@@ -577,18 +498,18 @@ define i64 @lurb(ptr %a, i32 %b) {
 }
 
 define i64 @lrbu(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrbu:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrbu a1, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT:    add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lrbu:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lrbu a1, a0, a1, 0
+; RV32-NEXT:    add a0, a1, a1
+; RV32-NEXT:    sltu a1, a0, a1
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lrbu:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lrbu a0, a0, a1, 0
-; RV64XTHEADMEMIDX-NEXT:    add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lrbu:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lrbu a0, a0, a1, 0
+; RV64-NEXT:    add a0, a0, a0
+; RV64-NEXT:    ret
   %1 = getelementptr i8, ptr %a, i64 %b
   %2 = load i8, ptr %1, align 1
   %3 = zext i8 %2 to i64
@@ -597,18 +518,18 @@ define i64 @lrbu(ptr %a, i64 %b) {
 }
 
 define i64 @lurbu(ptr %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: lurbu:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrbu a1, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT:    add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lurbu:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lrbu a1, a0, a1, 0
+; RV32-NEXT:    add a0, a1, a1
+; RV32-NEXT:    sltu a1, a0, a1
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lurbu:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lurbu a0, a0, a1, 0
-; RV64XTHEADMEMIDX-NEXT:    add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lurbu:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lurbu a0, a0, a1, 0
+; RV64-NEXT:    add a0, a0, a0
+; RV64-NEXT:    ret
   %1 = zext i32 %b to i64
   %2 = getelementptr i8, ptr %a, i64 %1
   %3 = load i8, ptr %2, align 1
@@ -618,36 +539,31 @@ define i64 @lurbu(ptr %a, i32 %b) {
 }
 
 define i16 @lrh_anyext(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrh_anyext:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrh a0, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT:    ret
-;
-; RV64XTHEADMEMIDX-LABEL: lrh_anyext:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lrh a0, a0, a1, 1
-; RV64XTHEADMEMIDX-NEXT:    ret
+; CHECK-LABEL: lrh_anyext:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    th.lrh a0, a0, a1, 1
+; CHECK-NEXT:    ret
   %1 = getelementptr i16, ptr %a, i64 %b
   %2 = load i16, ptr %1, align 2
   ret i16 %2
 }
 
 define i64 @lrh(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrh:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrh a1, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT:    srai a2, a1, 31
-; RV32XTHEADMEMIDX-NEXT:    add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT:    add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lrh:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lrh a1, a0, a1, 1
+; RV32-NEXT:    srai a2, a1, 31
+; RV32-NEXT:    add a0, a1, a1
+; RV32-NEXT:    sltu a1, a0, a1
+; RV32-NEXT:    add a2, a2, a2
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lrh:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lrh a0, a0, a1, 1
-; RV64XTHEADMEMIDX-NEXT:    add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lrh:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lrh a0, a0, a1, 1
+; RV64-NEXT:    add a0, a0, a0
+; RV64-NEXT:    ret
   %1 = getelementptr i16, ptr %a, i64 %b
   %2 = load i16, ptr %1, align 2
   %3 = sext i16 %2 to i64
@@ -656,15 +572,15 @@ define i64 @lrh(ptr %a, i64 %b) {
 }
 
 define i16 @lurh_anyext(ptr %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: lurh_anyext:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrh a0, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lurh_anyext:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lrh a0, a0, a1, 1
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lurh_anyext:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lurh a0, a0, a1, 1
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lurh_anyext:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lurh a0, a0, a1, 1
+; RV64-NEXT:    ret
   %1 = zext i32 %b to i64
   %2 = getelementptr i16, ptr %a, i64 %1
   %3 = load i16, ptr %2, align 2
@@ -672,21 +588,21 @@ define i16 @lurh_anyext(ptr %a, i32 %b) {
 }
 
 define i64 @lurh(ptr %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: lurh:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrh a1, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT:    srai a2, a1, 31
-; RV32XTHEADMEMIDX-NEXT:    add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT:    add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lurh:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lrh a1, a0, a1, 1
+; RV32-NEXT:    srai a2, a1, 31
+; RV32-NEXT:    add a0, a1, a1
+; RV32-NEXT:    sltu a1, a0, a1
+; RV32-NEXT:    add a2, a2, a2
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lurh:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lurh a0, a0, a1, 1
-; RV64XTHEADMEMIDX-NEXT:    add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lurh:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lurh a0, a0, a1, 1
+; RV64-NEXT:    add a0, a0, a0
+; RV64-NEXT:    ret
   %1 = zext i32 %b to i64
   %2 = getelementptr i16, ptr %a, i64 %1
   %3 = load i16, ptr %2, align 2
@@ -696,18 +612,18 @@ define i64 @lurh(ptr %a, i32 %b) {
 }
 
 define i64 @lrhu(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrhu:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrhu a1, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT:    add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lrhu:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lrhu a1, a0, a1, 1
+; RV32-NEXT:    add a0, a1, a1
+; RV32-NEXT:    sltu a1, a0, a1
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lrhu:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lrhu a0, a0, a1, 1
-; RV64XTHEADMEMIDX-NEXT:    add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lrhu:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lrhu a0, a0, a1, 1
+; RV64-NEXT:    add a0, a0, a0
+; RV64-NEXT:    ret
   %1 = getelementptr i16, ptr %a, i64 %b
   %2 = load i16, ptr %1, align 2
   %3 = zext i16 %2 to i64
@@ -716,18 +632,18 @@ define i64 @lrhu(ptr %a, i64 %b) {
 }
 
 define i64 @lurhu(ptr %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: lurhu:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrhu a1, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT:    add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lurhu:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lrhu a1, a0, a1, 1
+; RV32-NEXT:    add a0, a1, a1
+; RV32-NEXT:    sltu a1, a0, a1
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lurhu:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lurhu a0, a0, a1, 1
-; RV64XTHEADMEMIDX-NEXT:    add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lurhu:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lurhu a0, a0, a1, 1
+; RV64-NEXT:    add a0, a0, a0
+; RV64-NEXT:    ret
   %1 = zext i32 %b to i64
   %2 = getelementptr i16, ptr %a, i64 %1
   %3 = load i16, ptr %2, align 2
@@ -737,36 +653,31 @@ define i64 @lurhu(ptr %a, i32 %b) {
 }
 
 define i32 @lrw_anyext(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrw_anyext:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrw a0, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT:    ret
-;
-; RV64XTHEADMEMIDX-LABEL: lrw_anyext:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lrw a0, a0, a1, 2
-; RV64XTHEADMEMIDX-NEXT:    ret
+; CHECK-LABEL: lrw_anyext:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    th.lrw a0, a0, a1, 2
+; CHECK-NEXT:    ret
   %1 = getelementptr i32, ptr %a, i64 %b
   %2 = load i32, ptr %1, align 4
   ret i32 %2
 }
 
 define i64 @lrw(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrw:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrw a1, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT:    srai a2, a1, 31
-; RV32XTHEADMEMIDX-NEXT:    add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT:    add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lrw:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lrw a1, a0, a1, 2
+; RV32-NEXT:    srai a2, a1, 31
+; RV32-NEXT:    add a0, a1, a1
+; RV32-NEXT:    sltu a1, a0, a1
+; RV32-NEXT:    add a2, a2, a2
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lrw:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lrw a0, a0, a1, 2
-; RV64XTHEADMEMIDX-NEXT:    add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lrw:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lrw a0, a0, a1, 2
+; RV64-NEXT:    add a0, a0, a0
+; RV64-NEXT:    ret
   %1 = getelementptr i32, ptr %a, i64 %b
   %2 = load i32, ptr %1, align 4
   %3 = sext i32 %2 to i64
@@ -775,15 +686,15 @@ define i64 @lrw(ptr %a, i64 %b) {
 }
 
 define i32 @lurw_anyext(ptr %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: lurw_anyext:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrw a0, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lurw_anyext:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lrw a0, a0, a1, 2
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lurw_anyext:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lurw a0, a0, a1, 2
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lurw_anyext:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lurw a0, a0, a1, 2
+; RV64-NEXT:    ret
   %1 = zext i32 %b to i64
   %2 = getelementptr i32, ptr %a, i64 %1
   %3 = load i32, ptr %2, align 4
@@ -791,21 +702,21 @@ define i32 @lurw_anyext(ptr %a, i32 %b) {
 }
 
 define i64 @lurw(ptr %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: lurw:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrw a1, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT:    srai a2, a1, 31
-; RV32XTHEADMEMIDX-NEXT:    add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT:    add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lurw:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lrw a1, a0, a1, 2
+; RV32-NEXT:    srai a2, a1, 31
+; RV32-NEXT:    add a0, a1, a1
+; RV32-NEXT:    sltu a1, a0, a1
+; RV32-NEXT:    add a2, a2, a2
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lurw:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lurw a0, a0, a1, 2
-; RV64XTHEADMEMIDX-NEXT:    add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lurw:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lurw a0, a0, a1, 2
+; RV64-NEXT:    add a0, a0, a0
+; RV64-NEXT:    ret
   %1 = zext i32 %b to i64
   %2 = getelementptr i32, ptr %a, i64 %1
   %3 = load i32, ptr %2, align 4
@@ -815,18 +726,18 @@ define i64 @lurw(ptr %a, i32 %b) {
 }
 
 define i64 @lrwu(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrwu:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrw a1, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT:    add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lrwu:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lrw a1, a0, a1, 2
+; RV32-NEXT:    add a0, a1, a1
+; RV32-NEXT:    sltu a1, a0, a1
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lrwu:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lrwu a0, a0, a1, 2
-; RV64XTHEADMEMIDX-NEXT:    add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lrwu:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lrwu a0, a0, a1, 2
+; RV64-NEXT:    add a0, a0, a0
+; RV64-NEXT:    ret
   %1 = getelementptr i32, ptr %a, i64 %b
   %2 = load i32, ptr %1, align 4
   %3 = zext i32 %2 to i64
@@ -835,18 +746,18 @@ define i64 @lrwu(ptr %a, i64 %b) {
 }
 
 define i64 @lurwu(ptr %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: lurwu:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrw a1, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT:    add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT:    sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lurwu:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lrw a1, a0, a1, 2
+; RV32-NEXT:    add a0, a1, a1
+; RV32-NEXT:    sltu a1, a0, a1
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lurwu:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lurwu a0, a0, a1, 2
-; RV64XTHEADMEMIDX-NEXT:    add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lurwu:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lurwu a0, a0, a1, 2
+; RV64-NEXT:    add a0, a0, a0
+; RV64-NEXT:    ret
   %1 = zext i32 %b to i64
   %2 = getelementptr i32, ptr %a, i64 %1
   %3 = load i32, ptr %2, align 4
@@ -856,22 +767,22 @@ define i64 @lurwu(ptr %a, i32 %b) {
 }
 
 define i64 @lrd(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrd:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrw a2, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT:    addi a0, a0, 4
-; RV32XTHEADMEMIDX-NEXT:    th.lrw a1, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT:    add a0, a2, a2
-; RV32XTHEADMEMIDX-NEXT:    sltu a2, a0, a2
-; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a1
-; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lrd:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lrw a2, a0, a1, 3
+; RV32-NEXT:    addi a0, a0, 4
+; RV32-NEXT:    th.lrw a1, a0, a1, 3
+; RV32-NEXT:    add a0, a2, a2
+; RV32-NEXT:    sltu a2, a0, a2
+; RV32-NEXT:    add a1, a1, a1
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lrd:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lrd a0, a0, a1, 3
-; RV64XTHEADMEMIDX-NEXT:    add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lrd:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lrd a0, a0, a1, 3
+; RV64-NEXT:    add a0, a0, a0
+; RV64-NEXT:    ret
   %1 = getelementptr i64, ptr %a, i64 %b
   %2 = load i64, ptr %1, align 8
   %3 = add i64 %2, %2
@@ -879,24 +790,24 @@ define i64 @lrd(ptr %a, i64 %b) {
 }
 
 define i64 @lrd_2(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrd_2:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    addi a2, a0, 96
-; RV32XTHEADMEMIDX-NEXT:    th.lrw a2, a2, a1, 3
-; RV32XTHEADMEMIDX-NEXT:    addi a0, a0, 100
-; RV32XTHEADMEMIDX-NEXT:    th.lrw a1, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT:    add a0, a2, a2
-; RV32XTHEADMEMIDX-NEXT:    sltu a2, a0, a2
-; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a1
-; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lrd_2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi a2, a0, 96
+; RV32-NEXT:    th.lrw a2, a2, a1, 3
+; RV32-NEXT:    addi a0, a0, 100
+; RV32-NEXT:    th.lrw a1, a0, a1, 3
+; RV32-NEXT:    add a0, a2, a2
+; RV32-NEXT:    sltu a2, a0, a2
+; RV32-NEXT:    add a1, a1, a1
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lrd_2:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    addi a0, a0, 96
-; RV64XTHEADMEMIDX-NEXT:    th.lrd a0, a0, a1, 3
-; RV64XTHEADMEMIDX-NEXT:    add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lrd_2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a0, a0, 96
+; RV64-NEXT:    th.lrd a0, a0, a1, 3
+; RV64-NEXT:    add a0, a0, a0
+; RV64-NEXT:    ret
   %1 = add i64 %b, 12
   %2 = getelementptr i64, ptr %a, i64 %1
   %3 = load i64, ptr %2, align 8
@@ -905,22 +816,22 @@ define i64 @lrd_2(ptr %a, i64 %b) {
 }
 
 define i64 @lurd(ptr %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: lurd:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    th.lrw a2, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT:    addi a0, a0, 4
-; RV32XTHEADMEMIDX-NEXT:    th.lrw a1, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT:    add a0, a2, a2
-; RV32XTHEADMEMIDX-NEXT:    sltu a2, a0, a2
-; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a1
-; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lurd:
+; RV32:       # %bb.0:
+; RV32-NEXT:    th.lrw a2, a0, a1, 3
+; RV32-NEXT:    addi a0, a0, 4
+; RV32-NEXT:    th.lrw a1, a0, a1, 3
+; RV32-NEXT:    add a0, a2, a2
+; RV32-NEXT:    sltu a2, a0, a2
+; RV32-NEXT:    add a1, a1, a1
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lurd:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    th.lurd a0, a0, a1, 3
-; RV64XTHEADMEMIDX-NEXT:    add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lurd:
+; RV64:       # %bb.0:
+; RV64-NEXT:    th.lurd a0, a0, a1, 3
+; RV64-NEXT:    add a0, a0, a0
+; RV64-NEXT:    ret
   %1 = zext i32 %b to i64
   %2 = getelementptr i64, ptr %a, i64 %1
   %3 = load i64, ptr %2, align 8
@@ -929,17 +840,17 @@ define i64 @lurd(ptr %a, i32 %b) {
 }
 
 define void @srb(ptr %a, i64 %b, i8 %c) {
-; RV32XTHEADMEMIDX-LABEL: srb:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    add a3, a3, a3
-; RV32XTHEADMEMIDX-NEXT:    th.srb a3, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: srb:
+; RV32:       # %bb.0:
+; RV32-NEXT:    add a3, a3, a3
+; RV32-NEXT:    th.srb a3, a0, a1, 0
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: srb:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV64XTHEADMEMIDX-NEXT:    th.srb a2, a0, a1, 0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: srb:
+; RV64:       # %bb.0:
+; RV64-NEXT:    add a2, a2, a2
+; RV64-NEXT:    th.srb a2, a0, a1, 0
+; RV64-NEXT:    ret
   %1 = add i8 %c, %c
   %2 = getelementptr i8, ptr %a, i64 %b
   store i8 %1, ptr %2, align 1
@@ -947,17 +858,17 @@ define void @srb(ptr %a, i64 %b, i8 %c) {
 }
 
 define void @surb(ptr %a, i32 %b, i8 %c) {
-; RV32XTHEADMEMIDX-LABEL: surb:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT:    th.srb a2, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: surb:
+; RV32:       # %bb.0:
+; RV32-NEXT:    add a2, a2, a2
+; RV32-NEXT:    th.srb a2, a0, a1, 0
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: surb:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV64XTHEADMEMIDX-NEXT:    th.surb a2, a0, a1, 0
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: surb:
+; RV64:       # %bb.0:
+; RV64-NEXT:    add a2, a2, a2
+; RV64-NEXT:    th.surb a2, a0, a1, 0
+; RV64-NEXT:    ret
   %1 = zext i32 %b to i64
   %2 = add i8 %c, %c
   %3 = getelementptr i8, ptr %a, i64 %1
@@ -966,17 +877,17 @@ define void @surb(ptr %a, i32 %b, i8 %c) {
 }
 
 define void @srh(ptr %a, i64 %b, i16 %c) {
-; RV32XTHEADMEMIDX-LABEL: srh:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    add a3, a3, a3
-; RV32XTHEADMEMIDX-NEXT:    th.srh a3, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: srh:
+; RV32:       # %bb.0:
+; RV32-NEXT:    add a3, a3, a3
+; RV32-NEXT:    th.srh a3, a0, a1, 1
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: srh:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV64XTHEADMEMIDX-NEXT:    th.srh a2, a0, a1, 1
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: srh:
+; RV64:       # %bb.0:
+; RV64-NEXT:    add a2, a2, a2
+; RV64-NEXT:    th.srh a2, a0, a1, 1
+; RV64-NEXT:    ret
   %1 = add i16 %c, %c
   %2 = getelementptr i16, ptr %a, i64 %b
   store i16 %1, ptr %2, align 2
@@ -984,17 +895,17 @@ define void @srh(ptr %a, i64 %b, i16 %c) {
 }
 
 define void @surh(ptr %a, i32 %b, i16 %c) {
-; RV32XTHEADMEMIDX-LABEL: surh:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT:    th.srh a2, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: surh:
+; RV32:       # %bb.0:
+; RV32-NEXT:    add a2, a2, a2
+; RV32-NEXT:    th.srh a2, a0, a1, 1
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: surh:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV64XTHEADMEMIDX-NEXT:    th.surh a2, a0, a1, 1
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: surh:
+; RV64:       # %bb.0:
+; RV64-NEXT:    add a2, a2, a2
+; RV64-NEXT:    th.surh a2, a0, a1, 1
+; RV64-NEXT:    ret
   %1 = zext i32 %b to i64
   %2 = add i16 %c, %c
   %3 = getelementptr i16, ptr %a, i64 %1
@@ -1003,17 +914,17 @@ define void @surh(ptr %a, i32 %b, i16 %c) {
 }
 
 define void @srw(ptr %a, i64 %b, i32 %c) {
-; RV32XTHEADMEMIDX-LABEL: srw:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    add a3, a3, a3
-; RV32XTHEADMEMIDX-NEXT:    th.srw a3, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: srw:
+; RV32:       # %bb.0:
+; RV32-NEXT:    add a3, a3, a3
+; RV32-NEXT:    th.srw a3, a0, a1, 2
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: srw:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV64XTHEADMEMIDX-NEXT:    th.srw a2, a0, a1, 2
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: srw:
+; RV64:       # %bb.0:
+; RV64-NEXT:    add a2, a2, a2
+; RV64-NEXT:    th.srw a2, a0, a1, 2
+; RV64-NEXT:    ret
   %1 = add i32 %c, %c
   %2 = getelementptr i32, ptr %a, i64 %b
   store i32 %1, ptr %2, align 4
@@ -1021,17 +932,17 @@ define void @srw(ptr %a, i64 %b, i32 %c) {
 }
 
 define void @surw(ptr %a, i32 %b, i32 %c) {
-; RV32XTHEADMEMIDX-LABEL: surw:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT:    th.srw a2, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: surw:
+; RV32:       # %bb.0:
+; RV32-NEXT:    add a2, a2, a2
+; RV32-NEXT:    th.srw a2, a0, a1, 2
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: surw:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV64XTHEADMEMIDX-NEXT:    th.surw a2, a0, a1, 2
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: surw:
+; RV64:       # %bb.0:
+; RV64-NEXT:    add a2, a2, a2
+; RV64-NEXT:    th.surw a2, a0, a1, 2
+; RV64-NEXT:    ret
   %1 = zext i32 %b to i64
   %2 = add i32 %c, %c
   %3 = getelementptr i32, ptr %a, i64 %1
@@ -1040,22 +951,22 @@ define void @surw(ptr %a, i32 %b, i32 %c) {
 }
 
 define void @srd(ptr %a, i64 %b, i64 %c) {
-; RV32XTHEADMEMIDX-LABEL: srd:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    add a2, a3, a3
-; RV32XTHEADMEMIDX-NEXT:    add a4, a4, a4
-; RV32XTHEADMEMIDX-NEXT:    sltu a3, a2, a3
-; RV32XTHEADMEMIDX-NEXT:    th.srw a2, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT:    add a3, a4, a3
-; RV32XTHEADMEMIDX-NEXT:    addi a0, a0, 4
-; RV32XTHEADMEMIDX-NEXT:    th.srw a3, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: srd:
+; RV32:       # %bb.0:
+; RV32-NEXT:    add a2, a3, a3
+; RV32-NEXT:    add a4, a4, a4
+; RV32-NEXT:    sltu a3, a2, a3
+; RV32-NEXT:    th.srw a2, a0, a1, 3
+; RV32-NEXT:    add a3, a4, a3
+; RV32-NEXT:    addi a0, a0, 4
+; RV32-NEXT:    th.srw a3, a0, a1, 3
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: srd:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV64XTHEADMEMIDX-NEXT:    th.srd a2, a0, a1, 3
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: srd:
+; RV64:       # %bb.0:
+; RV64-NEXT:    add a2, a2, a2
+; RV64-NEXT:    th.srd a2, a0, a1, 3
+; RV64-NEXT:    ret
   %1 = add i64 %c, %c
   %2 = getelementptr i64, ptr %a, i64 %b
   store i64 %1, ptr %2, align 8
@@ -1063,22 +974,22 @@ define void @srd(ptr %a, i64 %b, i64 %c) {
 }
 
 define void @surd(ptr %a, i32 %b, i64 %c) {
-; RV32XTHEADMEMIDX-LABEL: surd:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    add a4, a2, a2
-; RV32XTHEADMEMIDX-NEXT:    add a3, a3, a3
-; RV32XTHEADMEMIDX-NEXT:    sltu a2, a4, a2
-; RV32XTHEADMEMIDX-NEXT:    th.srw a4, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT:    add a2, a3, a2
-; RV32XTHEADMEMIDX-NEXT:    addi a0, a0, 4
-; RV32XTHEADMEMIDX-NEXT:    th.srw a2, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: surd:
+; RV32:       # %bb.0:
+; RV32-NEXT:    add a4, a2, a2
+; RV32-NEXT:    add a3, a3, a3
+; RV32-NEXT:    sltu a2, a4, a2
+; RV32-NEXT:    th.srw a4, a0, a1, 3
+; RV32-NEXT:    add a2, a3, a2
+; RV32-NEXT:    addi a0, a0, 4
+; RV32-NEXT:    th.srw a2, a0, a1, 3
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: surd:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    add a2, a2, a2
-; RV64XTHEADMEMIDX-NEXT:    th.surd a2, a0, a1, 3
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: surd:
+; RV64:       # %bb.0:
+; RV64-NEXT:    add a2, a2, a2
+; RV64-NEXT:    th.surd a2, a0, a1, 3
+; RV64-NEXT:    ret
   %1 = zext i32 %b to i64
   %2 = add i64 %c, %c
   %3 = getelementptr i64, ptr %a, i64 %1
@@ -1087,17 +998,11 @@ define void @surd(ptr %a, i32 %b, i64 %c) {
 }
 
 define ptr @test_simm5(ptr %base, i32 %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: test_simm5:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT:    th.swia a1, (a0), -12, 2
-; RV32XTHEADMEMIDX-NEXT:    ret
-;
-; RV64XTHEADMEMIDX-LABEL: test_simm5:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT:    th.swia a1, (a0), -12, 2
-; RV64XTHEADMEMIDX-NEXT:    ret
+; CHECK-LABEL: test_simm5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    add a1, a1, a2
+; CHECK-NEXT:    th.swia a1, (a0), -12, 2
+; CHECK-NEXT:    ret
   %addr.1 = getelementptr i32, ptr %base, i32 -12
   %res = add i32 %a, %b
   store i32 %res, ptr %base
@@ -1105,20 +1010,20 @@ define ptr @test_simm5(ptr %base, i32 %a, i32 %b) {
 }
 
 define i64 @lrd_large_shift(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrd_large_shift:
-; RV32XTHEADMEMIDX:       # %bb.0:
-; RV32XTHEADMEMIDX-NEXT:    slli a1, a1, 5
-; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a0
-; RV32XTHEADMEMIDX-NEXT:    lw a0, 384(a1)
-; RV32XTHEADMEMIDX-NEXT:    lw a1, 388(a1)
-; RV32XTHEADMEMIDX-NEXT:    ret
+; RV32-LABEL: lrd_large_shift:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    lw a0, 384(a1)
+; RV32-NEXT:    lw a1, 388(a1)
+; RV32-NEXT:    ret
 ;
-; RV64XTHEADMEMIDX-LABEL: lrd_large_shift:
-; RV64XTHEADMEMIDX:       # %bb.0:
-; RV64XTHEADMEMIDX-NEXT:    slli a1, a1, 5
-; RV64XTHEADMEMIDX-NEXT:    add a0, a1, a0
-; RV64XTHEADMEMIDX-NEXT:    ld a0, 384(a0)
-; RV64XTHEADMEMIDX-NEXT:    ret
+; RV64-LABEL: lrd_large_shift:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    ld a0, 384(a0)
+; RV64-NEXT:    ret
   %1 = add i64 %b, 12
   %2 = shl i64 %1, 2
   %3 = getelementptr i64, ptr %a, i64 %2
@@ -1145,6 +1050,23 @@ define i64 @lrd_large_offset(ptr %a, i64 %b) {
 ; RV64XTHEADMEMIDX-NEXT:    add a0, a0, a1
 ; RV64XTHEADMEMIDX-NEXT:    ld a0, 1792(a0)
 ; RV64XTHEADMEMIDX-NEXT:    ret
+;
+; RV32XTHEADBA-LABEL: lrd_large_offset:
+; RV32XTHEADBA:       # %bb.0:
+; RV32XTHEADBA-NEXT:    th.addsl a0, a0, a1, 3
+; RV32XTHEADBA-NEXT:    lui a1, 23
+; RV32XTHEADBA-NEXT:    add a1, a0, a1
+; RV32XTHEADBA-NEXT:    lw a0, 1792(a1)
+; RV32XTHEADBA-NEXT:    lw a1, 1796(a1)
+; RV32XTHEADBA-NEXT:    ret
+;
+; RV64XTHEADBA-LABEL: lrd_large_offset:
+; RV64XTHEADBA:       # %bb.0:
+; RV64XTHEADBA-NEXT:    th.addsl a0, a0, a1, 3
+; RV64XTHEADBA-NEXT:    lui a1, 23
+; RV64XTHEADBA-NEXT:    add a0, a0, a1
+; RV64XTHEADBA-NEXT:    ld a0, 1792(a0)
+; RV64XTHEADBA-NEXT:    ret
   %1 = add i64 %b, 12000
   %2 = getelementptr i64, ptr %a, i64 %1
   %3 = load i64, ptr %2, align 8

>From 22d036be35b745f12358f524853089f6cda8c581 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 16 Jul 2025 21:49:34 -0700
Subject: [PATCH 3/4] foo

---
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp |  19 ++-
 llvm/test/CodeGen/RISCV/xqcisls.ll          |  25 ++-
 llvm/test/CodeGen/RISCV/xtheadmemidx.ll     | 176 +++++++++++++-------
 3 files changed, 148 insertions(+), 72 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 0d2fac903d960..44174a9d5c4c3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3032,6 +3032,19 @@ bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
   return true;
 }
 
+static bool isWorthFoldingSHLOverADDI(SDValue Addi, SDValue Shift,
+                                      const RISCVSubtarget &Subtarget) {
+  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
+
+  if (ShiftAmt <= 3 && (Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa()))
+    return false;
+
+  if (ShiftAmt >= 4 && Subtarget.hasVendorXqciac())
+    return false;
+
+  return true;
+}
+
 bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
                                               unsigned MaxShiftAmount,
                                               SDValue &Base, SDValue &Index,
@@ -3062,7 +3075,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
     if (LHS.getOpcode() == ISD::ADD &&
         !isa<ConstantSDNode>(LHS.getOperand(1)) &&
         isInt<12>(C1->getSExtValue())) {
-      if (SelectShl(LHS.getOperand(1), Index, Scale)) {
+      if (SelectShl(LHS.getOperand(1), Index, Scale) &&
+          isWorthFoldingSHLOverADDI(Addr, LHS.getOperand(1), *Subtarget)) {
         SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
                                                   SDLoc(Addr), VT);
         Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
@@ -3071,7 +3085,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
         return true;
       }
 
-      if (SelectShl(LHS.getOperand(0), Index, Scale)) {
+      if (SelectShl(LHS.getOperand(0), Index, Scale) &&
+          isWorthFoldingSHLOverADDI(Addr, LHS.getOperand(0), *Subtarget)) {
         SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
                                                   SDLoc(Addr), VT);
         Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
diff --git a/llvm/test/CodeGen/RISCV/xqcisls.ll b/llvm/test/CodeGen/RISCV/xqcisls.ll
index 709dc4ce074dc..bfa3d2003271d 100644
--- a/llvm/test/CodeGen/RISCV/xqcisls.ll
+++ b/llvm/test/CodeGen/RISCV/xqcisls.ll
@@ -309,8 +309,8 @@ define i64 @lrd(ptr %a, i32 %b) {
 ; RV32IZBAXQCISLS-LABEL: lrd:
 ; RV32IZBAXQCISLS:       # %bb.0:
 ; RV32IZBAXQCISLS-NEXT:    qc.lrw a2, a0, a1, 3
-; RV32IZBAXQCISLS-NEXT:    addi a0, a0, 4
-; RV32IZBAXQCISLS-NEXT:    qc.lrw a1, a0, a1, 3
+; RV32IZBAXQCISLS-NEXT:    sh3add a0, a1, a0
+; RV32IZBAXQCISLS-NEXT:    lw a1, 4(a0)
 ; RV32IZBAXQCISLS-NEXT:    add a0, a2, a2
 ; RV32IZBAXQCISLS-NEXT:    sltu a2, a0, a2
 ; RV32IZBAXQCISLS-NEXT:    add a1, a1, a1
@@ -348,14 +348,13 @@ define i64 @lrd_2(ptr %a, i32 %b) {
 ;
 ; RV32IZBAXQCISLS-LABEL: lrd_2:
 ; RV32IZBAXQCISLS:       # %bb.0:
-; RV32IZBAXQCISLS-NEXT:    addi a2, a0, 96
-; RV32IZBAXQCISLS-NEXT:    qc.lrw a2, a2, a1, 3
-; RV32IZBAXQCISLS-NEXT:    addi a0, a0, 100
-; RV32IZBAXQCISLS-NEXT:    qc.lrw a1, a0, a1, 3
-; RV32IZBAXQCISLS-NEXT:    add a0, a2, a2
-; RV32IZBAXQCISLS-NEXT:    sltu a2, a0, a2
-; RV32IZBAXQCISLS-NEXT:    add a1, a1, a1
-; RV32IZBAXQCISLS-NEXT:    add a1, a1, a2
+; RV32IZBAXQCISLS-NEXT:    sh3add a0, a1, a0
+; RV32IZBAXQCISLS-NEXT:    lw a1, 96(a0)
+; RV32IZBAXQCISLS-NEXT:    lw a2, 100(a0)
+; RV32IZBAXQCISLS-NEXT:    add a0, a1, a1
+; RV32IZBAXQCISLS-NEXT:    sltu a1, a0, a1
+; RV32IZBAXQCISLS-NEXT:    add a2, a2, a2
+; RV32IZBAXQCISLS-NEXT:    add a1, a2, a1
 ; RV32IZBAXQCISLS-NEXT:    ret
   %1 = add i32 %b, 12
   %2 = getelementptr i64, ptr %a, i32 %1
@@ -473,10 +472,10 @@ define void @srd(ptr %a, i32 %b, i64 %c) {
 ; RV32IZBAXQCISLS-NEXT:    add a4, a2, a2
 ; RV32IZBAXQCISLS-NEXT:    add a3, a3, a3
 ; RV32IZBAXQCISLS-NEXT:    sltu a2, a4, a2
-; RV32IZBAXQCISLS-NEXT:    qc.srw a4, a0, a1, 3
 ; RV32IZBAXQCISLS-NEXT:    add a2, a3, a2
-; RV32IZBAXQCISLS-NEXT:    addi a0, a0, 4
-; RV32IZBAXQCISLS-NEXT:    qc.srw a2, a0, a1, 3
+; RV32IZBAXQCISLS-NEXT:    sh3add a3, a1, a0
+; RV32IZBAXQCISLS-NEXT:    qc.srw a4, a0, a1, 3
+; RV32IZBAXQCISLS-NEXT:    sw a2, 4(a3)
 ; RV32IZBAXQCISLS-NEXT:    ret
   %1 = add i64 %c, %c
   %2 = getelementptr i64, ptr %a, i32 %b
diff --git a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
index 4d039977a73f8..0b810cf6ed01e 100644
--- a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
+++ b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
@@ -767,22 +767,33 @@ define i64 @lurwu(ptr %a, i32 %b) {
 }
 
 define i64 @lrd(ptr %a, i64 %b) {
-; RV32-LABEL: lrd:
-; RV32:       # %bb.0:
-; RV32-NEXT:    th.lrw a2, a0, a1, 3
-; RV32-NEXT:    addi a0, a0, 4
-; RV32-NEXT:    th.lrw a1, a0, a1, 3
-; RV32-NEXT:    add a0, a2, a2
-; RV32-NEXT:    sltu a2, a0, a2
-; RV32-NEXT:    add a1, a1, a1
-; RV32-NEXT:    add a1, a1, a2
-; RV32-NEXT:    ret
+; RV32XTHEADMEMIDX-LABEL: lrd:
+; RV32XTHEADMEMIDX:       # %bb.0:
+; RV32XTHEADMEMIDX-NEXT:    th.lrw a2, a0, a1, 3
+; RV32XTHEADMEMIDX-NEXT:    addi a0, a0, 4
+; RV32XTHEADMEMIDX-NEXT:    th.lrw a1, a0, a1, 3
+; RV32XTHEADMEMIDX-NEXT:    add a0, a2, a2
+; RV32XTHEADMEMIDX-NEXT:    sltu a2, a0, a2
+; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a1
+; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a2
+; RV32XTHEADMEMIDX-NEXT:    ret
 ;
 ; RV64-LABEL: lrd:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    th.lrd a0, a0, a1, 3
 ; RV64-NEXT:    add a0, a0, a0
 ; RV64-NEXT:    ret
+;
+; RV32XTHEADBA-LABEL: lrd:
+; RV32XTHEADBA:       # %bb.0:
+; RV32XTHEADBA-NEXT:    th.lrw a2, a0, a1, 3
+; RV32XTHEADBA-NEXT:    th.addsl a0, a0, a1, 3
+; RV32XTHEADBA-NEXT:    lw a1, 4(a0)
+; RV32XTHEADBA-NEXT:    add a0, a2, a2
+; RV32XTHEADBA-NEXT:    sltu a2, a0, a2
+; RV32XTHEADBA-NEXT:    add a1, a1, a1
+; RV32XTHEADBA-NEXT:    add a1, a1, a2
+; RV32XTHEADBA-NEXT:    ret
   %1 = getelementptr i64, ptr %a, i64 %b
   %2 = load i64, ptr %1, align 8
   %3 = add i64 %2, %2
@@ -790,24 +801,42 @@ define i64 @lrd(ptr %a, i64 %b) {
 }
 
 define i64 @lrd_2(ptr %a, i64 %b) {
-; RV32-LABEL: lrd_2:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi a2, a0, 96
-; RV32-NEXT:    th.lrw a2, a2, a1, 3
-; RV32-NEXT:    addi a0, a0, 100
-; RV32-NEXT:    th.lrw a1, a0, a1, 3
-; RV32-NEXT:    add a0, a2, a2
-; RV32-NEXT:    sltu a2, a0, a2
-; RV32-NEXT:    add a1, a1, a1
-; RV32-NEXT:    add a1, a1, a2
-; RV32-NEXT:    ret
+; RV32XTHEADMEMIDX-LABEL: lrd_2:
+; RV32XTHEADMEMIDX:       # %bb.0:
+; RV32XTHEADMEMIDX-NEXT:    addi a2, a0, 96
+; RV32XTHEADMEMIDX-NEXT:    th.lrw a2, a2, a1, 3
+; RV32XTHEADMEMIDX-NEXT:    addi a0, a0, 100
+; RV32XTHEADMEMIDX-NEXT:    th.lrw a1, a0, a1, 3
+; RV32XTHEADMEMIDX-NEXT:    add a0, a2, a2
+; RV32XTHEADMEMIDX-NEXT:    sltu a2, a0, a2
+; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a1
+; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a2
+; RV32XTHEADMEMIDX-NEXT:    ret
 ;
-; RV64-LABEL: lrd_2:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi a0, a0, 96
-; RV64-NEXT:    th.lrd a0, a0, a1, 3
-; RV64-NEXT:    add a0, a0, a0
-; RV64-NEXT:    ret
+; RV64XTHEADMEMIDX-LABEL: lrd_2:
+; RV64XTHEADMEMIDX:       # %bb.0:
+; RV64XTHEADMEMIDX-NEXT:    addi a0, a0, 96
+; RV64XTHEADMEMIDX-NEXT:    th.lrd a0, a0, a1, 3
+; RV64XTHEADMEMIDX-NEXT:    add a0, a0, a0
+; RV64XTHEADMEMIDX-NEXT:    ret
+;
+; RV32XTHEADBA-LABEL: lrd_2:
+; RV32XTHEADBA:       # %bb.0:
+; RV32XTHEADBA-NEXT:    th.addsl a0, a0, a1, 3
+; RV32XTHEADBA-NEXT:    lw a1, 96(a0)
+; RV32XTHEADBA-NEXT:    lw a2, 100(a0)
+; RV32XTHEADBA-NEXT:    add a0, a1, a1
+; RV32XTHEADBA-NEXT:    sltu a1, a0, a1
+; RV32XTHEADBA-NEXT:    add a2, a2, a2
+; RV32XTHEADBA-NEXT:    add a1, a2, a1
+; RV32XTHEADBA-NEXT:    ret
+;
+; RV64XTHEADBA-LABEL: lrd_2:
+; RV64XTHEADBA:       # %bb.0:
+; RV64XTHEADBA-NEXT:    th.addsl a0, a0, a1, 3
+; RV64XTHEADBA-NEXT:    ld a0, 96(a0)
+; RV64XTHEADBA-NEXT:    add a0, a0, a0
+; RV64XTHEADBA-NEXT:    ret
   %1 = add i64 %b, 12
   %2 = getelementptr i64, ptr %a, i64 %1
   %3 = load i64, ptr %2, align 8
@@ -816,22 +845,33 @@ define i64 @lrd_2(ptr %a, i64 %b) {
 }
 
 define i64 @lurd(ptr %a, i32 %b) {
-; RV32-LABEL: lurd:
-; RV32:       # %bb.0:
-; RV32-NEXT:    th.lrw a2, a0, a1, 3
-; RV32-NEXT:    addi a0, a0, 4
-; RV32-NEXT:    th.lrw a1, a0, a1, 3
-; RV32-NEXT:    add a0, a2, a2
-; RV32-NEXT:    sltu a2, a0, a2
-; RV32-NEXT:    add a1, a1, a1
-; RV32-NEXT:    add a1, a1, a2
-; RV32-NEXT:    ret
+; RV32XTHEADMEMIDX-LABEL: lurd:
+; RV32XTHEADMEMIDX:       # %bb.0:
+; RV32XTHEADMEMIDX-NEXT:    th.lrw a2, a0, a1, 3
+; RV32XTHEADMEMIDX-NEXT:    addi a0, a0, 4
+; RV32XTHEADMEMIDX-NEXT:    th.lrw a1, a0, a1, 3
+; RV32XTHEADMEMIDX-NEXT:    add a0, a2, a2
+; RV32XTHEADMEMIDX-NEXT:    sltu a2, a0, a2
+; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a1
+; RV32XTHEADMEMIDX-NEXT:    add a1, a1, a2
+; RV32XTHEADMEMIDX-NEXT:    ret
 ;
 ; RV64-LABEL: lurd:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    th.lurd a0, a0, a1, 3
 ; RV64-NEXT:    add a0, a0, a0
 ; RV64-NEXT:    ret
+;
+; RV32XTHEADBA-LABEL: lurd:
+; RV32XTHEADBA:       # %bb.0:
+; RV32XTHEADBA-NEXT:    th.lrw a2, a0, a1, 3
+; RV32XTHEADBA-NEXT:    th.addsl a0, a0, a1, 3
+; RV32XTHEADBA-NEXT:    lw a1, 4(a0)
+; RV32XTHEADBA-NEXT:    add a0, a2, a2
+; RV32XTHEADBA-NEXT:    sltu a2, a0, a2
+; RV32XTHEADBA-NEXT:    add a1, a1, a1
+; RV32XTHEADBA-NEXT:    add a1, a1, a2
+; RV32XTHEADBA-NEXT:    ret
   %1 = zext i32 %b to i64
   %2 = getelementptr i64, ptr %a, i64 %1
   %3 = load i64, ptr %2, align 8
@@ -951,22 +991,33 @@ define void @surw(ptr %a, i32 %b, i32 %c) {
 }
 
 define void @srd(ptr %a, i64 %b, i64 %c) {
-; RV32-LABEL: srd:
-; RV32:       # %bb.0:
-; RV32-NEXT:    add a2, a3, a3
-; RV32-NEXT:    add a4, a4, a4
-; RV32-NEXT:    sltu a3, a2, a3
-; RV32-NEXT:    th.srw a2, a0, a1, 3
-; RV32-NEXT:    add a3, a4, a3
-; RV32-NEXT:    addi a0, a0, 4
-; RV32-NEXT:    th.srw a3, a0, a1, 3
-; RV32-NEXT:    ret
+; RV32XTHEADMEMIDX-LABEL: srd:
+; RV32XTHEADMEMIDX:       # %bb.0:
+; RV32XTHEADMEMIDX-NEXT:    add a2, a3, a3
+; RV32XTHEADMEMIDX-NEXT:    add a4, a4, a4
+; RV32XTHEADMEMIDX-NEXT:    sltu a3, a2, a3
+; RV32XTHEADMEMIDX-NEXT:    th.srw a2, a0, a1, 3
+; RV32XTHEADMEMIDX-NEXT:    add a3, a4, a3
+; RV32XTHEADMEMIDX-NEXT:    addi a0, a0, 4
+; RV32XTHEADMEMIDX-NEXT:    th.srw a3, a0, a1, 3
+; RV32XTHEADMEMIDX-NEXT:    ret
 ;
 ; RV64-LABEL: srd:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    add a2, a2, a2
 ; RV64-NEXT:    th.srd a2, a0, a1, 3
 ; RV64-NEXT:    ret
+;
+; RV32XTHEADBA-LABEL: srd:
+; RV32XTHEADBA:       # %bb.0:
+; RV32XTHEADBA-NEXT:    add a2, a3, a3
+; RV32XTHEADBA-NEXT:    add a4, a4, a4
+; RV32XTHEADBA-NEXT:    sltu a3, a2, a3
+; RV32XTHEADBA-NEXT:    add a3, a4, a3
+; RV32XTHEADBA-NEXT:    th.addsl a4, a0, a1, 3
+; RV32XTHEADBA-NEXT:    th.srw a2, a0, a1, 3
+; RV32XTHEADBA-NEXT:    sw a3, 4(a4)
+; RV32XTHEADBA-NEXT:    ret
   %1 = add i64 %c, %c
   %2 = getelementptr i64, ptr %a, i64 %b
   store i64 %1, ptr %2, align 8
@@ -974,22 +1025,33 @@ define void @srd(ptr %a, i64 %b, i64 %c) {
 }
 
 define void @surd(ptr %a, i32 %b, i64 %c) {
-; RV32-LABEL: surd:
-; RV32:       # %bb.0:
-; RV32-NEXT:    add a4, a2, a2
-; RV32-NEXT:    add a3, a3, a3
-; RV32-NEXT:    sltu a2, a4, a2
-; RV32-NEXT:    th.srw a4, a0, a1, 3
-; RV32-NEXT:    add a2, a3, a2
-; RV32-NEXT:    addi a0, a0, 4
-; RV32-NEXT:    th.srw a2, a0, a1, 3
-; RV32-NEXT:    ret
+; RV32XTHEADMEMIDX-LABEL: surd:
+; RV32XTHEADMEMIDX:       # %bb.0:
+; RV32XTHEADMEMIDX-NEXT:    add a4, a2, a2
+; RV32XTHEADMEMIDX-NEXT:    add a3, a3, a3
+; RV32XTHEADMEMIDX-NEXT:    sltu a2, a4, a2
+; RV32XTHEADMEMIDX-NEXT:    th.srw a4, a0, a1, 3
+; RV32XTHEADMEMIDX-NEXT:    add a2, a3, a2
+; RV32XTHEADMEMIDX-NEXT:    addi a0, a0, 4
+; RV32XTHEADMEMIDX-NEXT:    th.srw a2, a0, a1, 3
+; RV32XTHEADMEMIDX-NEXT:    ret
 ;
 ; RV64-LABEL: surd:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    add a2, a2, a2
 ; RV64-NEXT:    th.surd a2, a0, a1, 3
 ; RV64-NEXT:    ret
+;
+; RV32XTHEADBA-LABEL: surd:
+; RV32XTHEADBA:       # %bb.0:
+; RV32XTHEADBA-NEXT:    add a4, a2, a2
+; RV32XTHEADBA-NEXT:    add a3, a3, a3
+; RV32XTHEADBA-NEXT:    sltu a2, a4, a2
+; RV32XTHEADBA-NEXT:    add a2, a3, a2
+; RV32XTHEADBA-NEXT:    th.addsl a3, a0, a1, 3
+; RV32XTHEADBA-NEXT:    th.srw a4, a0, a1, 3
+; RV32XTHEADBA-NEXT:    sw a2, 4(a3)
+; RV32XTHEADBA-NEXT:    ret
   %1 = zext i32 %b to i64
   %2 = add i64 %c, %c
   %3 = getelementptr i64, ptr %a, i64 %1

>From 21e0e4cd5d95ffce59ccb7dd7b15440f531b802b Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 16 Jul 2025 21:50:40 -0700
Subject: [PATCH 4/4] fixup! add comment

---
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 44174a9d5c4c3..e05b927592056 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3085,6 +3085,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
         return true;
       }
 
+      // Add is commutative so we need to check both operands.
       if (SelectShl(LHS.getOperand(0), Index, Scale) &&
           isWorthFoldingSHLOverADDI(Addr, LHS.getOperand(0), *Subtarget)) {
         SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),



More information about the llvm-commits mailing list