[llvm] [LoongArch] Fix broadcast load with extension. (PR #155960)

via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 28 19:03:56 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-loongarch

Author: None (tangaac)

<details>
<summary>Changes</summary>

PR #<!-- -->135896 introduces [x]vldrepl instructions without handling extension.
This patch will fix that.

---
Full diff: https://github.com/llvm/llvm-project/pull/155960.diff


3 Files Affected:

- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+2-1) 
- (modified) llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll (+38-6) 
- (modified) llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll (+39-6) 


``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index ffb6c2980026f..478c335c3f07e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2471,8 +2471,9 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
   if (!IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
     return SDValue();
 
-  if (IsIdeneity) {
+  if (IsIdeneity && ISD::isNON_EXTLoad(IdentitySrc.getNode())) {
     auto *LN = cast<LoadSDNode>(IdentitySrc);
+
     SDVTList Tys =
         LN->isIndexed()
             ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
index 976924bdca686..89592a0886cc1 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
@@ -18,6 +18,32 @@ define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) {
   ret <4 x i64> %tmp2
 }
 
+define <16 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_sext_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.b $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = sext i8 %tmp to i16
+  %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
+  %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
+  ret <16 x i16> %tmp3
+}
+
+define <16 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_zext_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.bu $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = zext i8 %tmp to i16
+  %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
+  %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
+  ret <16 x i16> %tmp3
+}
+
 define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_d_unaligned_offset:
 ; CHECK:       # %bb.0:
@@ -34,7 +60,8 @@ define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
 define <32 x i8> @xvldrepl_b(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_b:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.b $xr0, $a0, 0
+; CHECK-NEXT:    ld.b $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.b $xr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i8, ptr %ptr
   %tmp1 = insertelement <32 x i8> zeroinitializer, i8 %tmp, i32 0
@@ -45,7 +72,8 @@ define <32 x i8> @xvldrepl_b(ptr %ptr) {
 define <32 x i8> @xvldrepl_b_offset(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_b_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.b $xr0, $a0, 33
+; CHECK-NEXT:    ld.b $a0, $a0, 33
+; CHECK-NEXT:    xvreplgr2vr.b $xr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i8, ptr %ptr, i64 33
   %tmp = load i8, ptr %p
@@ -58,7 +86,8 @@ define <32 x i8> @xvldrepl_b_offset(ptr %ptr) {
 define <16 x i16> @xvldrepl_h(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_h:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.h $xr0, $a0, 0
+; CHECK-NEXT:    ld.h $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i16, ptr %ptr
   %tmp1 = insertelement <16 x i16> zeroinitializer, i16 %tmp, i32 0
@@ -69,7 +98,8 @@ define <16 x i16> @xvldrepl_h(ptr %ptr) {
 define <16 x i16> @xvldrepl_h_offset(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_h_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.h $xr0, $a0, 66
+; CHECK-NEXT:    ld.h $a0, $a0, 66
+; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i16, ptr %ptr, i64 33
   %tmp = load i16, ptr %p
@@ -81,7 +111,8 @@ define <16 x i16> @xvldrepl_h_offset(ptr %ptr) {
 define <8 x i32> @xvldrepl_w(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_w:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.w $xr0, $a0, 0
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i32, ptr %ptr
   %tmp1 = insertelement <8 x i32> zeroinitializer, i32 %tmp, i32 0
@@ -92,7 +123,8 @@ define <8 x i32> @xvldrepl_w(ptr %ptr) {
 define <8 x i32> @xvldrepl_w_offset(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_w_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.w $xr0, $a0, 132
+; CHECK-NEXT:    ld.w $a0, $a0, 132
+; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i32, ptr %ptr, i64 33
   %tmp = load i32, ptr %p
diff --git a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
index c46747ef30509..a8cddbf9e6400 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
@@ -18,6 +18,32 @@ define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
   ret <2 x i64> %tmp2
 }
 
+define <8 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_sext_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.b $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.h $vr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = sext i8 %tmp to i16
+  %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
+  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
+  ret <8 x i16> %tmp3
+}
+
+define <8 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_zext_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.bu $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.h $vr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = zext i8 %tmp to i16
+  %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
+  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
+  ret <8 x i16> %tmp3
+}
+
 define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_d_unaligned_offset:
 ; CHECK:       # %bb.0:
@@ -34,7 +60,8 @@ define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
 define <16 x i8> @vldrepl_b(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_b:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.b $vr0, $a0, 0
+; CHECK-NEXT:    ld.b $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.b $vr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i8, ptr %ptr
   %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %tmp, i32 0
@@ -45,7 +72,8 @@ define <16 x i8> @vldrepl_b(ptr %ptr) {
 define <16 x i8> @vldrepl_b_offset(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_b_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.b $vr0, $a0, 33
+; CHECK-NEXT:    ld.b $a0, $a0, 33
+; CHECK-NEXT:    vreplgr2vr.b $vr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i8, ptr %ptr, i64 33
   %tmp = load i8, ptr %p
@@ -58,7 +86,8 @@ define <16 x i8> @vldrepl_b_offset(ptr %ptr) {
 define <8 x i16> @vldrepl_h(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_h:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.h $vr0, $a0, 0
+; CHECK-NEXT:    ld.h $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.h $vr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i16, ptr %ptr
   %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %tmp, i32 0
@@ -69,7 +98,8 @@ define <8 x i16> @vldrepl_h(ptr %ptr) {
 define <8 x i16> @vldrepl_h_offset(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_h_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.h $vr0, $a0, 66
+; CHECK-NEXT:    ld.h $a0, $a0, 66
+; CHECK-NEXT:    vreplgr2vr.h $vr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i16, ptr %ptr, i64 33
   %tmp = load i16, ptr %p
@@ -81,7 +111,8 @@ define <8 x i16> @vldrepl_h_offset(ptr %ptr) {
 define <4 x i32> @vldrepl_w(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_w:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.w $vr0, $a0, 0
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.w $vr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i32, ptr %ptr
   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
@@ -92,7 +123,8 @@ define <4 x i32> @vldrepl_w(ptr %ptr) {
 define <4 x i32> @vldrepl_w_offset(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_w_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.w $vr0, $a0, 132
+; CHECK-NEXT:    ld.w $a0, $a0, 132
+; CHECK-NEXT:    vreplgr2vr.w $vr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i32, ptr %ptr, i64 33
   %tmp = load i32, ptr %p
@@ -169,3 +201,4 @@ define <2 x double> @vldrepl_d_dbl_offset(ptr %ptr) {
   %tmp2 = shufflevector <2 x double> %tmp1, <2 x double> poison, <2 x i32> zeroinitializer
   ret <2 x double> %tmp2
 }
+

``````````

</details>


https://github.com/llvm/llvm-project/pull/155960


More information about the llvm-commits mailing list