[llvm] [LoongArch] Fix broadcast load with extension. (PR #155960)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 1 01:12:45 PDT 2025
https://github.com/tangaac updated https://github.com/llvm/llvm-project/pull/155960
>From be1adeaeafafc059c26166bd25a25b067d0ea567 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Fri, 29 Aug 2025 09:45:33 +0800
Subject: [PATCH 1/3] Fix broadcast load with extension.
---
.../LoongArch/LoongArchISelLowering.cpp | 6 +++--
.../CodeGen/LoongArch/lasx/broadcast-load.ll | 26 +++++++++++++++++++
.../CodeGen/LoongArch/lsx/broadcast-load.ll | 26 +++++++++++++++++++
3 files changed, 56 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index fe650a0b90263..3a965f1499cb1 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2519,8 +2519,10 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
if (!IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
return SDValue();
- if (IsIdeneity) {
- auto *LN = cast<LoadSDNode>(IdentitySrc);
+ auto *LN = cast<LoadSDNode>(IdentitySrc);
+ auto ExtType = LN->getExtensionType();
+
+ if (IsIdeneity && (ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD)) {
SDVTList Tys =
LN->isIndexed()
? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
index 205e59a18bf9d..4aa2bd76ab80c 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
@@ -33,6 +33,32 @@ define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) {
ret <4 x i64> %tmp2
}
+define <16 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_sext_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld.b $a0, $a0, 0
+; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
+; CHECK-NEXT: ret
+ %tmp = load i8, ptr %ptr
+ %tmp1 = sext i8 %tmp to i16
+ %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
+ %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
+ ret <16 x i16> %tmp3
+}
+
+define <16 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_zext_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld.bu $a0, $a0, 0
+; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
+; CHECK-NEXT: ret
+ %tmp = load i8, ptr %ptr
+ %tmp1 = zext i8 %tmp to i16
+ %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
+ %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
+ ret <16 x i16> %tmp3
+}
+
define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
; LA32-LABEL: xvldrepl_d_unaligned_offset:
; LA32: # %bb.0:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
index 12224f8d59b9f..eed149957f6ab 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
@@ -31,6 +31,32 @@ define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
ret <2 x i64> %tmp2
}
+define <8 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_sext_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld.b $a0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
+; CHECK-NEXT: ret
+ %tmp = load i8, ptr %ptr
+ %tmp1 = sext i8 %tmp to i16
+ %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
+ %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
+ ret <8 x i16> %tmp3
+}
+
+define <8 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_zext_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld.bu $a0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
+; CHECK-NEXT: ret
+ %tmp = load i8, ptr %ptr
+ %tmp1 = zext i8 %tmp to i16
+ %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
+ %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
+ ret <8 x i16> %tmp3
+}
+
define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
; LA32-LABEL: vldrepl_d_unaligned_offset:
; LA32: # %bb.0:
>From f78f7d34c34528ce6686eeb69772c0bdb5a36bf1 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Fri, 29 Aug 2025 16:51:02 +0800
Subject: [PATCH 2/3] address comments
---
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 4 ++--
llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll | 2 --
2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 3a965f1499cb1..099fa878c5f8d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2516,13 +2516,13 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
}
// make sure that this load is valid and only has one user.
- if (!IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
+ if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
return SDValue();
auto *LN = cast<LoadSDNode>(IdentitySrc);
auto ExtType = LN->getExtensionType();
- if (IsIdeneity && (ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD)) {
+ if (ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) {
SDVTList Tys =
LN->isIndexed()
? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
index eed149957f6ab..349684ff22be2 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
@@ -2,8 +2,6 @@
; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
-; TODO: Load a element and splat it to a vector could be lowerd to vldrepl
-
; A load has more than one user shouldn't be lowered to vldrepl
define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
; LA32-LABEL: should_not_be_optimized:
>From dcaf590aad9355ff1d514fd8ac0752d8d95d3eb6 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Mon, 1 Sep 2025 15:59:14 +0800
Subject: [PATCH 3/3] address hev's comment
---
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 099fa878c5f8d..8313869c1016d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2522,7 +2522,8 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
auto *LN = cast<LoadSDNode>(IdentitySrc);
auto ExtType = LN->getExtensionType();
- if (ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) {
+ if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
+ VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
SDVTList Tys =
LN->isIndexed()
? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
More information about the llvm-commits
mailing list