[llvm-branch-commits] [llvm] [LoongArch] Enable LoopTermFold pass (PR #168045)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Nov 14 03:27:32 PST 2025
https://github.com/zhaoqi5 created https://github.com/llvm/llvm-project/pull/168045
None
>From d4893ec23b280a7db5c038491a6cd65006ceaa0a Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Fri, 14 Nov 2025 17:18:12 +0800
Subject: [PATCH] [LoongArch] Enable LoopTermFold pass
---
.../LoongArch/LoongArchTargetMachine.cpp | 1 +
llvm/test/CodeGen/LoongArch/opt-pipeline.ll | 1 +
.../CodeGen/LoongArch/preferred-alignments.ll | 6 +--
.../LoopStrengthReduce/LoongArch/lsr-insns.ll | 51 +++++++++++++++++--
4 files changed, 53 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index 92a9388e5cb7b..02629daeeb2f7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -153,6 +153,7 @@ class LoongArchPassConfig : public TargetPassConfig {
LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {
setEnableSinkAndFold(EnableSinkFold);
+ EnableLoopTermFold = true;
}
LoongArchTargetMachine &getLoongArchTargetMachine() const {
diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
index 661f67d4989c4..1391c44e35443 100644
--- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
@@ -44,6 +44,7 @@
; LAXX-NEXT: Canonicalize Freeze Instructions in Loops
; LAXX-NEXT: Induction Variable Users
; LAXX-NEXT: Loop Strength Reduction
+; LAXX-NEXT: Loop Terminator Folding
; LAXX-NEXT: Basic Alias Analysis (stateless AA impl)
; LAXX-NEXT: Function Alias Analysis Results
; LAXX-NEXT: Merge contiguous icmps into a memcmp
diff --git a/llvm/test/CodeGen/LoongArch/preferred-alignments.ll b/llvm/test/CodeGen/LoongArch/preferred-alignments.ll
index 0f81f860025df..7a0e0d77f0690 100644
--- a/llvm/test/CodeGen/LoongArch/preferred-alignments.ll
+++ b/llvm/test/CodeGen/LoongArch/preferred-alignments.ll
@@ -9,13 +9,13 @@ define signext i32 @sum(ptr noalias nocapture noundef readonly %0, i32 noundef s
; LA464-NEXT: blez $a1, .LBB0_3
; LA464-NEXT: # %bb.1:
; LA464-NEXT: bstrpick.d $a1, $a1, 31, 0
+; LA464-NEXT: alsl.d $a1, $a1, $a0, 2
; LA464-NEXT: .p2align 4, , 16
; LA464-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
; LA464-NEXT: ld.w $a3, $a0, 0
-; LA464-NEXT: add.w $a2, $a3, $a2
-; LA464-NEXT: addi.d $a1, $a1, -1
; LA464-NEXT: addi.d $a0, $a0, 4
-; LA464-NEXT: bnez $a1, .LBB0_2
+; LA464-NEXT: add.w $a2, $a3, $a2
+; LA464-NEXT: bne $a0, $a1, .LBB0_2
; LA464-NEXT: .LBB0_3:
; LA464-NEXT: move $a0, $a2
; LA464-NEXT: ret
diff --git a/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll
index a09a9a37034e1..4a342dc921f12 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll
@@ -1,12 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt < %s --passes=loop-reduce --mtriple=loongarch64 -S | FileCheck %s -check-prefix=CHECK-OPT
+; RUN: opt < %s --passes=loop-reduce,loop-term-fold --mtriple=loongarch64 -S | FileCheck %s -check-prefix=CHECK-TF
; RUN: llc < %s --mtriple=loongarch64 | FileCheck %s -check-prefix=CHECK-LLC
define void @foo(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocapture %q) nounwind {
; CHECK-LLC-LABEL: foo:
; CHECK-LLC: # %bb.0: # %entry
; CHECK-LLC-NEXT: move $a3, $zero
-; CHECK-LLC-NEXT: ori $a4, $zero, 1024
+; CHECK-LLC-NEXT: lu12i.w $a4, 1
; CHECK-LLC-NEXT: .p2align 4, , 16
; CHECK-LLC-NEXT: .LBB0_1: # %for.body
; CHECK-LLC-NEXT: # =>This Inner Loop Header: Depth=1
@@ -14,9 +15,8 @@ define void @foo(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocap
; CHECK-LLC-NEXT: ldx.w $a6, $a1, $a3
; CHECK-LLC-NEXT: add.d $a5, $a6, $a5
; CHECK-LLC-NEXT: stx.w $a5, $a2, $a3
-; CHECK-LLC-NEXT: addi.d $a4, $a4, -1
; CHECK-LLC-NEXT: addi.d $a3, $a3, 4
-; CHECK-LLC-NEXT: bnez $a4, .LBB0_1
+; CHECK-LLC-NEXT: bne $a3, $a4, .LBB0_1
; CHECK-LLC-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-LLC-NEXT: ret
; CHECK-OPT-LABEL: define void @foo(
@@ -40,6 +40,25 @@ define void @foo(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocap
; CHECK-OPT-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; CHECK-OPT-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
;
+; CHECK-TF-LABEL: define void @foo(
+; CHECK-TF-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]], ptr captures(none) [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-TF-NEXT: [[ENTRY:.*]]:
+; CHECK-TF-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK-TF: [[FOR_COND_CLEANUP:.*]]:
+; CHECK-TF-NEXT: ret void
+; CHECK-TF: [[FOR_BODY]]:
+; CHECK-TF-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
+; CHECK-TF-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[X]], i64 [[LSR_IV1]]
+; CHECK-TF-NEXT: [[LDTMP:%.*]] = load i32, ptr [[SCEVGEP4]], align 4
+; CHECK-TF-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[Y]], i64 [[LSR_IV1]]
+; CHECK-TF-NEXT: [[LDTMP1:%.*]] = load i32, ptr [[SCEVGEP3]], align 4
+; CHECK-TF-NEXT: [[ADD:%.*]] = add nsw i32 [[LDTMP1]], [[LDTMP]]
+; CHECK-TF-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 [[LSR_IV1]]
+; CHECK-TF-NEXT: store i32 [[ADD]], ptr [[SCEVGEP]], align 4
+; CHECK-TF-NEXT: [[LSR_IV_NEXT2]] = add i64 [[LSR_IV1]], 4
+; CHECK-TF-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq i64 [[LSR_IV_NEXT2]], 4096
+; CHECK-TF-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
+;
entry:
br label %for.body
@@ -106,6 +125,32 @@ define void @bar(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocap
; CHECK-OPT-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[TMP0]], [[LSR_IV_NEXT]]
; CHECK-OPT-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]]
;
+; CHECK-TF-LABEL: define void @bar(
+; CHECK-TF-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]], ptr captures(none) [[Q:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-TF-NEXT: [[ENTRY:.*:]]
+; CHECK-TF-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-TF-NEXT: br i1 [[CMP10]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; CHECK-TF: [[FOR_BODY_PREHEADER]]:
+; CHECK-TF-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-TF-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2
+; CHECK-TF-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK-TF: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-TF-NEXT: br label %[[FOR_COND_CLEANUP]]
+; CHECK-TF: [[FOR_COND_CLEANUP]]:
+; CHECK-TF-NEXT: ret void
+; CHECK-TF: [[FOR_BODY]]:
+; CHECK-TF-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-TF-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[X]], i64 [[LSR_IV]]
+; CHECK-TF-NEXT: [[LDTMP:%.*]] = load i32, ptr [[SCEVGEP2]], align 4
+; CHECK-TF-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[Y]], i64 [[LSR_IV]]
+; CHECK-TF-NEXT: [[LDTMP1:%.*]] = load i32, ptr [[SCEVGEP1]], align 4
+; CHECK-TF-NEXT: [[ADD:%.*]] = add nsw i32 [[LDTMP1]], [[LDTMP]]
+; CHECK-TF-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 [[LSR_IV]]
+; CHECK-TF-NEXT: store i32 [[ADD]], ptr [[SCEVGEP]], align 4
+; CHECK-TF-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4
+; CHECK-TF-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[TMP0]], [[LSR_IV_NEXT]]
+; CHECK-TF-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]]
+;
entry:
%cmp10 = icmp sgt i32 %n, 0
br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup
More information about the llvm-branch-commits
mailing list