[libc-commits] [libc] [llvm] [LoongArch] Support ISD::SET_ROUNDING (llvm.set.rounding) (PR #205051)

Tue Jun 23 01:10:01 PDT 2026

https://github.com/tangyuan0821 updated https://github.com/llvm/llvm-project/pull/205051

>From 152f23515b7d900fe0a698a2b479ab4bfbdc982a Mon Sep 17 00:00:00 2001
From: tangyuan0821 <tangyuan0821 at email.cn>
Date: Mon, 22 Jun 2026 16:00:05 +0800
Subject: [PATCH 1/6] [LoongArch] Support ISD::SET_ROUNDING (llvm.set.rounding)

Fixes "Cannot select: set_rounding" for loongarch64. The rounding mode
encoding matches LoongArch FCSR hardware, so no translation is needed.
---
 libc/shared/fp_bits.h                         |  2 +-
 .../LoongArch/LoongArchISelLowering.cpp       | 28 +++++++++++++++++++
 .../Target/LoongArch/LoongArchISelLowering.h  |  1 +
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/libc/shared/fp_bits.h b/libc/shared/fp_bits.h
index e6bb1e17b80c9..1a7544be7519d 100644
--- a/libc/shared/fp_bits.h
+++ b/libc/shared/fp_bits.h
@@ -1,4 +1,4 @@
-//===-- Floating point number utils -----------------------------*- C++ -*-===//
+comm//===-- Floating point number utils -----------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 8748f4723339b..9438608fac82a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -125,6 +125,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::VASTART, MVT::Other, Custom);
   setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
 
+  setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 
@@ -613,6 +614,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
     return lowerFRAMEADDR(Op, DAG);
   case ISD::RETURNADDR:
     return lowerRETURNADDR(Op, DAG);
+  case ISD::SET_ROUNDING:
+    return lowerSET_ROUNDING(Op, DAG);
   case ISD::WRITE_REGISTER:
     return lowerWRITE_REGISTER(Op, DAG);
   case ISD::INSERT_VECTOR_ELT:
@@ -4015,6 +4018,31 @@ SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
   return Op;
 }
 
+SDValue LoongArchTargetLowering::lowerSET_ROUNDING(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  MVT GRLenVT = Subtarget.getGRLenVT();
+  SDLoc DL(Op);
+  SDValue Chain = Op.getOperand(0);
+  SDValue RMValue = Op.getOperand(1);
+
+  // Zero-extend i32 rounding mode to GRLenVT.
+  RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, GRLenVT, RMValue);
+
+  // The rounding mode in FCSR0 occupies bits 1:0.
+  // LLVM rounding mode encoding (0=RNE,1=RTZ,2=RDN,3=RUP) matches
+  // the LoongArch FCSR hardware encoding, so no translation needed.
+  // We mask to 2 bits to guard against invalid values.
+  RMValue = DAG.getNode(ISD::AND, DL, GRLenVT, RMValue,
+                        DAG.getConstant(0x3, DL, GRLenVT));
+
+  // Build MachineInstr node for WRFCSR (pseudo for MOVGR2FCSR).
+  // WRFCSR takes (uimm2:$fcsr, GPR:$src).
+  SDValue FCSRNo = DAG.getTargetConstant(0, DL, GRLenVT); // FCSR0
+  MachineSDNode *RN = DAG.getMachineNode(LoongArch::WRFCSR, DL, MVT::Other,
+                                          FCSRNo, RMValue, Chain);
+  return SDValue(RN, 0);
+}
+
 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
                                                      SelectionDAG &DAG) const {
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 2f2eda1e2c7d9..8d9ec6020478b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -236,6 +236,7 @@ class LoongArchTargetLowering : public TargetLowering {
   SDValue lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;

>From 881f6eb14e7a14040e19ef93d4bbb960b0b5a256 Mon Sep 17 00:00:00 2001
From: tangyuan0821 <tangyuan0821 at email.cn>
Date: Mon, 22 Jun 2026 19:37:20 +0800
Subject: [PATCH 2/6] [libc] Fix corrupted header comment in fp_bits.h

---
 libc/shared/fp_bits.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/shared/fp_bits.h b/libc/shared/fp_bits.h
index 1a7544be7519d..e6bb1e17b80c9 100644
--- a/libc/shared/fp_bits.h
+++ b/libc/shared/fp_bits.h
@@ -1,4 +1,4 @@
-comm//===-- Floating point number utils -----------------------------*- C++ -*-===//
+//===-- Floating point number utils -----------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.

>From 9e422ffc2d5619cd8a829b5ba0bc902388058338 Mon Sep 17 00:00:00 2001
From: tangyuan0821 <tangyuan0821 at email.cn>
Date: Mon, 22 Jun 2026 21:10:27 +0800
Subject: [PATCH 3/6] [LoongArch] Fix SET_ROUNDING lowering: use FCSR3, add
 diagnostics, guard with hasBasicF()

---
 .../LoongArch/LoongArchISelLowering.cpp       | 24 ++++++++++++++-----
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 9438608fac82a..c95bfc5e5e7d9 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -125,7 +125,6 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::VASTART, MVT::Other, Custom);
   setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
 
-  setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 
@@ -256,6 +255,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
         setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
       }
     }
+
+    setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
   }
 
   // Set operations for 'D' feature.
@@ -4025,21 +4026,32 @@ SDValue LoongArchTargetLowering::lowerSET_ROUNDING(SDValue Op,
   SDValue Chain = Op.getOperand(0);
   SDValue RMValue = Op.getOperand(1);
 
+  // LLVM rounding mode encoding (0=RNE, 1=RTZ, 2=RUP, 3=RDN).
+  if (auto *CVal = dyn_cast<ConstantSDNode>(RMValue)) {
+    uint64_t RM = CVal->getZExtValue();
+    if (RM > 3) {
+      DAG.getContext()->emitError("invalid rounding mode");
+      return Chain;
+    }
+  }
+
   // Zero-extend i32 rounding mode to GRLenVT.
   RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, GRLenVT, RMValue);
 
-  // The rounding mode in FCSR0 occupies bits 1:0.
-  // LLVM rounding mode encoding (0=RNE,1=RTZ,2=RDN,3=RUP) matches
-  // the LoongArch FCSR hardware encoding, so no translation needed.
+  // The rounding mode in FCSR occupies bits 1:0.
+  // LLVM rounding mode encoding matches the LoongArch FCSR hardware
+  // encoding, so no translation needed.
   // We mask to 2 bits to guard against invalid values.
   RMValue = DAG.getNode(ISD::AND, DL, GRLenVT, RMValue,
                         DAG.getConstant(0x3, DL, GRLenVT));
 
   // Build MachineInstr node for WRFCSR (pseudo for MOVGR2FCSR).
   // WRFCSR takes (uimm2:$fcsr, GPR:$src).
-  SDValue FCSRNo = DAG.getTargetConstant(0, DL, GRLenVT); // FCSR0
+  // FCSR3 is an alias of the RM field; writing it avoids clobbering
+  // unrelated fields in FCSR0.
+  SDValue FCSRNo = DAG.getTargetConstant(3, DL, GRLenVT);
   MachineSDNode *RN = DAG.getMachineNode(LoongArch::WRFCSR, DL, MVT::Other,
-                                          FCSRNo, RMValue, Chain);
+                                         FCSRNo, RMValue, Chain);
   return SDValue(RN, 0);
 }
 

>From ce94bcc19bd8bdd88839bf4c2aa66b31d53fb554 Mon Sep 17 00:00:00 2001
From: tangyuan0821 <tangyuan0821 at email.cn>
Date: Tue, 23 Jun 2026 12:07:19 +0800
Subject: [PATCH 4/6] [LoongArch] Fix SET_ROUNDING encoding translation for
 LLVM rounding modes

---
 .../LoongArch/LoongArchISelLowering.cpp       | 21 ++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index c95bfc5e5e7d9..62c0c7b4408ff 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -4026,7 +4026,7 @@ SDValue LoongArchTargetLowering::lowerSET_ROUNDING(SDValue Op,
   SDValue Chain = Op.getOperand(0);
   SDValue RMValue = Op.getOperand(1);
 
-  // LLVM rounding mode encoding (0=RNE, 1=RTZ, 2=RUP, 3=RDN).
+  // LLVM rounding mode encoding: 0=RTZ, 1=RNE, 2=RUP, 3=RDN.
   if (auto *CVal = dyn_cast<ConstantSDNode>(RMValue)) {
     uint64_t RM = CVal->getZExtValue();
     if (RM > 3) {
@@ -4038,10 +4038,21 @@ SDValue LoongArchTargetLowering::lowerSET_ROUNDING(SDValue Op,
   // Zero-extend i32 rounding mode to GRLenVT.
   RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, GRLenVT, RMValue);
 
-  // The rounding mode in FCSR occupies bits 1:0.
-  // LLVM rounding mode encoding matches the LoongArch FCSR hardware
-  // encoding, so no translation needed.
-  // We mask to 2 bits to guard against invalid values.
+  // LLVM rounding mode encoding differs from LoongArch FCSR encoding:
+  //   LLVM: 0=RTZ, 1=RNE, 2=RUP, 3=RDN
+  //   FCSR: 0=RNE, 1=RZ,  2=RP,  3=RN
+  // Translate by swapping 0↔1 (XOR with 1 when RM < 2), keep 2 and 3.
+  //   0 (RTZ) → 1 (RZ), 1 (RNE) → 0 (RNE), 2 (RUP) → 2 (RP), 3 (RDN) → 3 (RN)
+  // Transformation: RM ^ (~(RM >> 1) & 1)
+  SDValue ShiftRight1 = DAG.getNode(ISD::SRL, DL, GRLenVT, RMValue,
+                                    DAG.getConstant(1, DL, GRLenVT));
+  SDValue SwapMask = DAG.getNode(ISD::AND, DL, GRLenVT,
+      DAG.getNode(ISD::XOR, DL, GRLenVT, ShiftRight1,
+                  DAG.getConstant(1, DL, GRLenVT)),
+      DAG.getConstant(1, DL, GRLenVT));
+  RMValue = DAG.getNode(ISD::XOR, DL, GRLenVT, RMValue, SwapMask);
+
+  // Mask to 2 bits to guard against invalid values.
   RMValue = DAG.getNode(ISD::AND, DL, GRLenVT, RMValue,
                         DAG.getConstant(0x3, DL, GRLenVT));
 

>From 785ee8760f04d1223d5ee1e85c5ed81c22a07eae Mon Sep 17 00:00:00 2001
From: tangyuan0821 <tangyuan0821 at email.cn>
Date: Tue, 23 Jun 2026 15:01:44 +0800
Subject: [PATCH 5/6] [LoongArch] Shift RMValue into FCSR bit position [9:8]
 before WRFCSR

---
 .../LoongArch/LoongArchISelLowering.cpp       | 11 ++-
 llvm/test/CodeGen/LoongArch/set-rounding.ll   | 74 +++++++++++++++++++
 2 files changed, 82 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/LoongArch/set-rounding.ll

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 62c0c7b4408ff..ac40c2d8a230c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -4047,15 +4047,20 @@ SDValue LoongArchTargetLowering::lowerSET_ROUNDING(SDValue Op,
   SDValue ShiftRight1 = DAG.getNode(ISD::SRL, DL, GRLenVT, RMValue,
                                     DAG.getConstant(1, DL, GRLenVT));
   SDValue SwapMask = DAG.getNode(ISD::AND, DL, GRLenVT,
-      DAG.getNode(ISD::XOR, DL, GRLenVT, ShiftRight1,
-                  DAG.getConstant(1, DL, GRLenVT)),
-      DAG.getConstant(1, DL, GRLenVT));
+                                 DAG.getNode(ISD::XOR, DL, GRLenVT, ShiftRight1,
+                                             DAG.getConstant(1, DL, GRLenVT)),
+                                 DAG.getConstant(1, DL, GRLenVT));
   RMValue = DAG.getNode(ISD::XOR, DL, GRLenVT, RMValue, SwapMask);
 
   // Mask to 2 bits to guard against invalid values.
   RMValue = DAG.getNode(ISD::AND, DL, GRLenVT, RMValue,
                         DAG.getConstant(0x3, DL, GRLenVT));
 
+  // The RM field in FCSR is at bits [9:8]. Shift the rounding mode value
+  // into position before writing via WRFCSR.
+  RMValue = DAG.getNode(ISD::SHL, DL, GRLenVT, RMValue,
+                        DAG.getConstant(8, DL, GRLenVT));
+
   // Build MachineInstr node for WRFCSR (pseudo for MOVGR2FCSR).
   // WRFCSR takes (uimm2:$fcsr, GPR:$src).
   // FCSR3 is an alias of the RM field; writing it avoids clobbering
diff --git a/llvm/test/CodeGen/LoongArch/set-rounding.ll b/llvm/test/CodeGen/LoongArch/set-rounding.ll
new file mode 100644
index 0000000000000..73f511c561445
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/set-rounding.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+f < %s | FileCheck %s --check-prefix=CHECK-F
+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=CHECK-NOF
+; RUN: not llc --mtriple=loongarch64 --mattr=+f < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERR
+
+; LLVM rounding mode encoding: 0=RTZ, 1=RNE, 2=RUP, 3=RDN.
+; LoongArch FCSR encoding:      0=RNE, 1=RZ,  2=RP,  3=RN.
+; Translation: swap 0↔1, keep 2 and 3.
+
+define void @set_rounding_rne() nounwind {
+; CHECK-F-LABEL: set_rounding_rne:
+; CHECK-F:       # %bb.0:
+; CHECK-F-NEXT:    ori $a0, $zero, 0
+; CHECK-F-NEXT:    movgr2fcsr $fcsr3, $a0
+; CHECK-F-NEXT:    ret
+  tail call void @llvm.set.rounding(i32 1)
+  ret void
+}
+
+define void @set_rounding_rtz() nounwind {
+; CHECK-F-LABEL: set_rounding_rtz:
+; CHECK-F:       # %bb.0:
+; CHECK-F-NEXT:    ori $a0, $zero, 256
+; CHECK-F-NEXT:    movgr2fcsr $fcsr3, $a0
+; CHECK-F-NEXT:    ret
+  tail call void @llvm.set.rounding(i32 0)
+  ret void
+}
+
+define void @set_rounding_rup() nounwind {
+; CHECK-F-LABEL: set_rounding_rup:
+; CHECK-F:       # %bb.0:
+; CHECK-F-NEXT:    ori $a0, $zero, 512
+; CHECK-F-NEXT:    movgr2fcsr $fcsr3, $a0
+; CHECK-F-NEXT:    ret
+  tail call void @llvm.set.rounding(i32 2)
+  ret void
+}
+
+define void @set_rounding_rdn() nounwind {
+; CHECK-F-LABEL: set_rounding_rdn:
+; CHECK-F:       # %bb.0:
+; CHECK-F-NEXT:    ori $a0, $zero, 768
+; CHECK-F-NEXT:    movgr2fcsr $fcsr3, $a0
+; CHECK-F-NEXT:    ret
+  tail call void @llvm.set.rounding(i32 3)
+  ret void
+}
+
+; Test dynamic rounding mode argument.
+define void @set_rounding_dynamic(i32 %rm) nounwind {
+; CHECK-F-LABEL: set_rounding_dynamic:
+; CHECK-F:       movgr2fcsr $fcsr3
+; CHECK-F-NEXT:  ret
+  tail call void @llvm.set.rounding(i32 %rm)
+  ret void
+}
+
+; Test without FP feature - should not crash (no custom lowering).
+; CHECK-NOF-LABEL: set_rounding_no_fp:
+; CHECK-NOF:      ret
+define void @set_rounding_no_fp() nounwind {
+  tail call void @llvm.set.rounding(i32 0)
+  ret void
+}
+
+; Test invalid rounding mode >= 4 (constant) - should emit error.
+; CHECK-ERR: LLVM ERROR: invalid rounding mode
+define void @set_rounding_invalid() nounwind {
+  tail call void @llvm.set.rounding(i32 4)
+  ret void
+}
+
+declare void @llvm.set.rounding(i32)

>From 87f0d3b9bd9f85c7435a04690cc0cd1477bee75a Mon Sep 17 00:00:00 2001
From: tangyuan0821 <tangyuan0821 at email.cn>
Date: Tue, 23 Jun 2026 16:09:41 +0800
Subject: [PATCH 6/6] [LoongArch] Handle SET_ROUNDING without basic-F; fix test
 comments

---
 .../LoongArch/LoongArchISelLowering.cpp       |  7 ++-
 llvm/test/CodeGen/LoongArch/set-rounding.ll   | 46 +++++++++----------
 2 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index ac40c2d8a230c..0b6691cc8fa59 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -207,6 +207,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
 
+  setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
+
   static const ISD::CondCode FPCCToExpand[] = {
       ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
       ISD::SETGE,  ISD::SETNE,  ISD::SETGT};
@@ -255,8 +257,6 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
         setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
       }
     }
-
-    setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
   }
 
   // Set operations for 'D' feature.
@@ -4021,6 +4021,9 @@ SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
 
 SDValue LoongArchTargetLowering::lowerSET_ROUNDING(SDValue Op,
                                                    SelectionDAG &DAG) const {
+  if (!Subtarget.hasBasicF())
+    return Op.getOperand(0);
+
   MVT GRLenVT = Subtarget.getGRLenVT();
   SDLoc DL(Op);
   SDValue Chain = Op.getOperand(0);
diff --git a/llvm/test/CodeGen/LoongArch/set-rounding.ll b/llvm/test/CodeGen/LoongArch/set-rounding.ll
index 73f511c561445..83ad6a7c73f42 100644
--- a/llvm/test/CodeGen/LoongArch/set-rounding.ll
+++ b/llvm/test/CodeGen/LoongArch/set-rounding.ll
@@ -1,62 +1,58 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --mtriple=loongarch64 --mattr=+f < %s | FileCheck %s --check-prefix=CHECK-F
-; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=CHECK-NOF
+; RUN: llc --mtriple=loongarch64 --mattr=-f < %s | FileCheck %s --check-prefix=CHECK-NOF
 ; RUN: not llc --mtriple=loongarch64 --mattr=+f < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERR
 
-; LLVM rounding mode encoding: 0=RTZ, 1=RNE, 2=RUP, 3=RDN.
-; LoongArch FCSR encoding:      0=RNE, 1=RZ,  2=RP,  3=RN.
-; Translation: swap 0↔1, keep 2 and 3.
+;; LLVM rounding mode encoding: 0=RTZ, 1=RNE, 2=RUP, 3=RDN.
+;; LoongArch FCSR encoding:      0=RNE, 1=RZ,  2=RP,  3=RN.
+;; Translation: swap 0↔1, keep 2 and 3.
+;; The RM field in FCSR is at bits [9:8], so the value is shifted left by 8.
 
 define void @set_rounding_rne() nounwind {
 ; CHECK-F-LABEL: set_rounding_rne:
-; CHECK-F:       # %bb.0:
-; CHECK-F-NEXT:    ori $a0, $zero, 0
-; CHECK-F-NEXT:    movgr2fcsr $fcsr3, $a0
-; CHECK-F-NEXT:    ret
+; CHECK-F:       movgr2fcsr $fcsr3, $zero
+; CHECK-F-NEXT:  ret
   tail call void @llvm.set.rounding(i32 1)
   ret void
 }
 
 define void @set_rounding_rtz() nounwind {
 ; CHECK-F-LABEL: set_rounding_rtz:
-; CHECK-F:       # %bb.0:
-; CHECK-F-NEXT:    ori $a0, $zero, 256
-; CHECK-F-NEXT:    movgr2fcsr $fcsr3, $a0
-; CHECK-F-NEXT:    ret
+; CHECK-F:       ori $a0, $zero, 256
+; CHECK-F-NEXT:  movgr2fcsr $fcsr3, $a0
+; CHECK-F-NEXT:  ret
   tail call void @llvm.set.rounding(i32 0)
   ret void
 }
 
 define void @set_rounding_rup() nounwind {
 ; CHECK-F-LABEL: set_rounding_rup:
-; CHECK-F:       # %bb.0:
-; CHECK-F-NEXT:    ori $a0, $zero, 512
-; CHECK-F-NEXT:    movgr2fcsr $fcsr3, $a0
-; CHECK-F-NEXT:    ret
+; CHECK-F:       ori $a0, $zero, 512
+; CHECK-F-NEXT:  movgr2fcsr $fcsr3, $a0
+; CHECK-F-NEXT:  ret
   tail call void @llvm.set.rounding(i32 2)
   ret void
 }
 
 define void @set_rounding_rdn() nounwind {
 ; CHECK-F-LABEL: set_rounding_rdn:
-; CHECK-F:       # %bb.0:
-; CHECK-F-NEXT:    ori $a0, $zero, 768
-; CHECK-F-NEXT:    movgr2fcsr $fcsr3, $a0
-; CHECK-F-NEXT:    ret
+; CHECK-F:       ori $a0, $zero, 768
+; CHECK-F-NEXT:  movgr2fcsr $fcsr3, $a0
+; CHECK-F-NEXT:  ret
   tail call void @llvm.set.rounding(i32 3)
   ret void
 }
 
-; Test dynamic rounding mode argument.
+;; Test dynamic rounding mode argument.
 define void @set_rounding_dynamic(i32 %rm) nounwind {
 ; CHECK-F-LABEL: set_rounding_dynamic:
-; CHECK-F:       movgr2fcsr $fcsr3
+; CHECK-F:       movgr2fcsr $fcsr3, $a0
 ; CHECK-F-NEXT:  ret
   tail call void @llvm.set.rounding(i32 %rm)
   ret void
 }
 
-; Test without FP feature - should not crash (no custom lowering).
+;; Test without FP feature - should not emit movgr2fcsr.
 ; CHECK-NOF-LABEL: set_rounding_no_fp:
 ; CHECK-NOF:      ret
 define void @set_rounding_no_fp() nounwind {
@@ -64,8 +60,8 @@ define void @set_rounding_no_fp() nounwind {
   ret void
 }
 
-; Test invalid rounding mode >= 4 (constant) - should emit error.
-; CHECK-ERR: LLVM ERROR: invalid rounding mode
+;; Test invalid rounding mode >= 4 (constant) - should emit error.
+; CHECK-ERR: invalid rounding mode
 define void @set_rounding_invalid() nounwind {
   tail call void @llvm.set.rounding(i32 4)
   ret void