[llvm] [X86][GISEL] - Legalize G_FPTOUI & G_UITOFP for X87 (PR #155562)
Pawan Nirpal via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 25 01:06:36 PDT 2026
https://github.com/pawan-nirpal-031 updated https://github.com/llvm/llvm-project/pull/155562
>From ce6347d4e088fdc2941e1a2e267cf04b75d6276b Mon Sep 17 00:00:00 2001
From: Pawan Nirpal <pnirpal at qti.qualcomm.com>
Date: Tue, 16 Dec 2025 11:07:23 +0530
Subject: [PATCH] [AArch64] - Allow for aggressive unrolling, with non-zero
LoopMicroOpBufferSize for Oryon
---
llvm/lib/Target/AArch64/AArch64SchedOryon.td | 2 +-
.../aarch64-mcpu-oryon-runtime-unroll.ll | 152 ++++++++++++++++++
2 files changed, 153 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/aarch64-mcpu-oryon-runtime-unroll.ll
diff --git a/llvm/lib/Target/AArch64/AArch64SchedOryon.td b/llvm/lib/Target/AArch64/AArch64SchedOryon.td
index 5b597b91e7459..435eaf99c6175 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedOryon.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedOryon.td
@@ -19,7 +19,7 @@ def OryonModel : SchedMachineModel {
let MicroOpBufferSize = 376;
let LoadLatency = 4;
let MispredictPenalty = 13; // 13 cycles for mispredicted branch.
- let LoopMicroOpBufferSize = 0; // Do not have a LoopMicroOpBuffer
+ let LoopMicroOpBufferSize = 16; // Oryon-1 does not have loop micro op buffer, we enable this pseudo value to allow for aggressive unrolling based on runtime TC.
let PostRAScheduler = 1; // Using PostRA sched.
let CompleteModel = 1;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-mcpu-oryon-runtime-unroll.ll b/llvm/test/CodeGen/AArch64/aarch64-mcpu-oryon-runtime-unroll.ll
new file mode 100644
index 0000000000000..79136cf71c005
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-mcpu-oryon-runtime-unroll.ll
@@ -0,0 +1,152 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes='loop-unroll' -S %s | FileCheck %s --check-prefix=UNROLLED
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @foo(ptr %mat, ptr %sharr, ptr %barr, i16 %rows, i16 %dimout) #0 {
+; UNROLLED-LABEL: define void @foo(
+; UNROLLED-SAME: ptr [[MAT:%.*]], ptr [[SHARR:%.*]], ptr [[BARR:%.*]], i16 [[ROWS:%.*]], i16 [[DIMOUT:%.*]]) #[[ATTR0:[0-9]+]] {
+; UNROLLED-NEXT: [[ENTRY:.*:]]
+; UNROLLED-NEXT: [[CMP33:%.*]] = icmp sgt i16 [[DIMOUT]], 0
+; UNROLLED-NEXT: br i1 [[CMP33]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END22:.*]]
+; UNROLLED: [[FOR_BODY_LR_PH]]:
+; UNROLLED-NEXT: [[CMP631:%.*]] = icmp sgt i16 [[ROWS]], 0
+; UNROLLED-NEXT: br i1 [[CMP631]], label %[[FOR_BODY_US_PREHEADER:.*]], label %[[FOR_BODY_LR_PH_SPLIT:.*]]
+; UNROLLED: [[FOR_BODY_US_PREHEADER]]:
+; UNROLLED-NEXT: [[WIDE_TRIP_COUNT39:%.*]] = zext nneg i16 [[DIMOUT]] to i64
+; UNROLLED-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i16 [[ROWS]] to i64
+; UNROLLED-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
+; UNROLLED-NEXT: br label %[[FOR_BODY_US:.*]]
+; UNROLLED: [[FOR_BODY_US]]:
+; UNROLLED-NEXT: [[INDVARS_IV36:%.*]] = phi i64 [ 0, %[[FOR_BODY_US_PREHEADER]] ], [ [[INDVARS_IV_NEXT37:%.*]], %[[FOR_COND3_FOR_INC20_CRIT_EDGE_US:.*]] ]
+; UNROLLED-NEXT: store i8 0, ptr [[BARR]], align 1
+; UNROLLED-NEXT: [[INVARIANT_GEP_US:%.*]] = getelementptr i8, ptr [[MAT]], i64 [[INDVARS_IV36]]
+; UNROLLED-NEXT: [[XTRAITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 1
+; UNROLLED-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
+; UNROLLED-NEXT: br i1 [[TMP1]], label %[[FOR_BODY8_US_EPIL_PREHEADER:.*]], label %[[FOR_BODY_US_NEW:.*]]
+; UNROLLED: [[FOR_BODY_US_NEW]]:
+; UNROLLED-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[XTRAITER]]
+; UNROLLED-NEXT: br label %[[FOR_BODY8_US:.*]]
+; UNROLLED: [[FOR_BODY8_US]]:
+; UNROLLED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_US_NEW]] ], [ [[INDVARS_IV_NEXT_1:%.*]], %[[FOR_INC_US_1:.*]] ]
+; UNROLLED-NEXT: [[TMP2:%.*]] = phi i8 [ 0, %[[FOR_BODY_US_NEW]] ], [ [[TMP8:%.*]], %[[FOR_INC_US_1]] ]
+; UNROLLED-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[FOR_BODY_US_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[FOR_INC_US_1]] ]
+; UNROLLED-NEXT: [[GEP_US:%.*]] = getelementptr [2 x i8], ptr [[INVARIANT_GEP_US]], i64 [[INDVARS_IV]]
+; UNROLLED-NEXT: [[TMP3:%.*]] = load i8, ptr [[GEP_US]], align 1
+; UNROLLED-NEXT: [[TOBOOL_NOT_US:%.*]] = icmp eq i8 [[TMP3]], 0
+; UNROLLED-NEXT: br i1 [[TOBOOL_NOT_US]], label %[[FOR_INC_US:.*]], label %[[IF_THEN_US:.*]]
+; UNROLLED: [[IF_THEN_US]]:
+; UNROLLED-NEXT: [[ARRAYIDX14_US:%.*]] = getelementptr inbounds nuw i8, ptr [[SHARR]], i64 [[INDVARS_IV]]
+; UNROLLED-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX14_US]], align 1
+; UNROLLED-NEXT: [[XOR30_US:%.*]] = xor i8 [[TMP2]], [[TMP4]]
+; UNROLLED-NEXT: store i8 [[XOR30_US]], ptr [[BARR]], align 1
+; UNROLLED-NEXT: br label %[[FOR_INC_US]]
+; UNROLLED: [[FOR_INC_US]]:
+; UNROLLED-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP2]], %[[FOR_BODY8_US]] ], [ [[XOR30_US]], %[[IF_THEN_US]] ]
+; UNROLLED-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; UNROLLED-NEXT: [[GEP_US_1:%.*]] = getelementptr [2 x i8], ptr [[INVARIANT_GEP_US]], i64 [[INDVARS_IV_NEXT]]
+; UNROLLED-NEXT: [[TMP6:%.*]] = load i8, ptr [[GEP_US_1]], align 1
+; UNROLLED-NEXT: [[TOBOOL_NOT_US_1:%.*]] = icmp eq i8 [[TMP6]], 0
+; UNROLLED-NEXT: br i1 [[TOBOOL_NOT_US_1]], label %[[FOR_INC_US_1]], label %[[IF_THEN_US_1:.*]]
+; UNROLLED: [[IF_THEN_US_1]]:
+; UNROLLED-NEXT: [[ARRAYIDX14_US_1:%.*]] = getelementptr inbounds nuw i8, ptr [[SHARR]], i64 [[INDVARS_IV_NEXT]]
+; UNROLLED-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX14_US_1]], align 1
+; UNROLLED-NEXT: [[XOR30_US_1:%.*]] = xor i8 [[TMP5]], [[TMP7]]
+; UNROLLED-NEXT: store i8 [[XOR30_US_1]], ptr [[BARR]], align 1
+; UNROLLED-NEXT: br label %[[FOR_INC_US_1]]
+; UNROLLED: [[FOR_INC_US_1]]:
+; UNROLLED-NEXT: [[TMP8]] = phi i8 [ [[TMP5]], %[[FOR_INC_US]] ], [ [[XOR30_US_1]], %[[IF_THEN_US_1]] ]
+; UNROLLED-NEXT: [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV]], 2
+; UNROLLED-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
+; UNROLLED-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
+; UNROLLED-NEXT: br i1 [[NITER_NCMP_1]], label %[[FOR_COND3_FOR_INC20_CRIT_EDGE_US_UNR_LCSSA:.*]], label %[[FOR_BODY8_US]]
+; UNROLLED: [[FOR_COND3_FOR_INC20_CRIT_EDGE_US_UNR_LCSSA]]:
+; UNROLLED-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_1]], %[[FOR_INC_US_1]] ]
+; UNROLLED-NEXT: [[DOTUNR:%.*]] = phi i8 [ [[TMP8]], %[[FOR_INC_US_1]] ]
+; UNROLLED-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; UNROLLED-NEXT: br i1 [[LCMP_MOD]], label %[[FOR_BODY8_US_EPIL_PREHEADER]], label %[[FOR_COND3_FOR_INC20_CRIT_EDGE_US]]
+; UNROLLED: [[FOR_BODY8_US_EPIL_PREHEADER]]:
+; UNROLLED-NEXT: [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[FOR_BODY_US]] ], [ [[INDVARS_IV_UNR]], %[[FOR_COND3_FOR_INC20_CRIT_EDGE_US_UNR_LCSSA]] ]
+; UNROLLED-NEXT: [[DOTEPIL_INIT:%.*]] = phi i8 [ 0, %[[FOR_BODY_US]] ], [ [[DOTUNR]], %[[FOR_COND3_FOR_INC20_CRIT_EDGE_US_UNR_LCSSA]] ]
+; UNROLLED-NEXT: [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; UNROLLED-NEXT: call void @llvm.assume(i1 [[LCMP_MOD1]])
+; UNROLLED-NEXT: br label %[[FOR_BODY8_US_EPIL:.*]]
+; UNROLLED: [[FOR_BODY8_US_EPIL]]:
+; UNROLLED-NEXT: [[GEP_US_EPIL:%.*]] = getelementptr [2 x i8], ptr [[INVARIANT_GEP_US]], i64 [[INDVARS_IV_EPIL_INIT]]
+; UNROLLED-NEXT: [[TMP9:%.*]] = load i8, ptr [[GEP_US_EPIL]], align 1
+; UNROLLED-NEXT: [[TOBOOL_NOT_US_EPIL:%.*]] = icmp eq i8 [[TMP9]], 0
+; UNROLLED-NEXT: br i1 [[TOBOOL_NOT_US_EPIL]], label %[[FOR_INC_US_EPIL:.*]], label %[[IF_THEN_US_EPIL:.*]]
+; UNROLLED: [[IF_THEN_US_EPIL]]:
+; UNROLLED-NEXT: [[ARRAYIDX14_US_EPIL:%.*]] = getelementptr inbounds nuw i8, ptr [[SHARR]], i64 [[INDVARS_IV_EPIL_INIT]]
+; UNROLLED-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX14_US_EPIL]], align 1
+; UNROLLED-NEXT: [[XOR30_US_EPIL:%.*]] = xor i8 [[DOTEPIL_INIT]], [[TMP10]]
+; UNROLLED-NEXT: store i8 [[XOR30_US_EPIL]], ptr [[BARR]], align 1
+; UNROLLED-NEXT: br label %[[FOR_INC_US_EPIL]]
+; UNROLLED: [[FOR_INC_US_EPIL]]:
+; UNROLLED-NEXT: br label %[[FOR_COND3_FOR_INC20_CRIT_EDGE_US]]
+; UNROLLED: [[FOR_COND3_FOR_INC20_CRIT_EDGE_US]]:
+; UNROLLED-NEXT: [[INDVARS_IV_NEXT37]] = add nuw nsw i64 [[INDVARS_IV36]], 1
+; UNROLLED-NEXT: [[EXITCOND40_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT37]], [[WIDE_TRIP_COUNT39]]
+; UNROLLED-NEXT: br i1 [[EXITCOND40_NOT]], label %[[FOR_END22_LOOPEXIT:.*]], label %[[FOR_BODY_US]]
+; UNROLLED: [[FOR_BODY_LR_PH_SPLIT]]:
+; UNROLLED-NEXT: store i8 0, ptr [[BARR]], align 1
+; UNROLLED-NEXT: br label %[[FOR_END22]]
+; UNROLLED: [[FOR_END22_LOOPEXIT]]:
+; UNROLLED-NEXT: br label %[[FOR_END22]]
+; UNROLLED: [[FOR_END22]]:
+; UNROLLED-NEXT: ret void
+;
+entry:
+ %cmp33 = icmp sgt i16 %dimout, 0
+ br i1 %cmp33, label %for.body.lr.ph, label %for.end22
+
+for.body.lr.ph: ; preds = %entry
+ %cmp631 = icmp sgt i16 %rows, 0
+ br i1 %cmp631, label %for.body.us.preheader, label %for.body.lr.ph.split
+
+for.body.us.preheader: ; preds = %for.body.lr.ph
+ %wide.trip.count39 = zext nneg i16 %dimout to i64
+ %wide.trip.count = zext nneg i16 %rows to i64
+ br label %for.body.us
+
+for.body.us: ; preds = %for.body.us.preheader, %for.cond3.for.inc20_crit_edge.us
+ %indvars.iv36 = phi i64 [ 0, %for.body.us.preheader ], [ %indvars.iv.next37, %for.cond3.for.inc20_crit_edge.us ]
+ store i8 0, ptr %barr, align 1
+ %invariant.gep.us = getelementptr i8, ptr %mat, i64 %indvars.iv36
+ br label %for.body8.us
+
+for.body8.us: ; preds = %for.body.us, %for.inc.us
+ %indvars.iv = phi i64 [ 0, %for.body.us ], [ %indvars.iv.next, %for.inc.us ]
+ %0 = phi i8 [ 0, %for.body.us ], [ %3, %for.inc.us ]
+ %gep.us = getelementptr [2 x i8], ptr %invariant.gep.us, i64 %indvars.iv
+ %1 = load i8, ptr %gep.us, align 1
+ %tobool.not.us = icmp eq i8 %1, 0
+ br i1 %tobool.not.us, label %for.inc.us, label %if.then.us
+
+if.then.us: ; preds = %for.body8.us
+ %arrayidx14.us = getelementptr inbounds nuw i8, ptr %sharr, i64 %indvars.iv
+ %2 = load i8, ptr %arrayidx14.us, align 1
+ %xor30.us = xor i8 %0, %2
+ store i8 %xor30.us, ptr %barr, align 1
+ br label %for.inc.us
+
+for.inc.us: ; preds = %if.then.us, %for.body8.us
+ %3 = phi i8 [ %0, %for.body8.us ], [ %xor30.us, %if.then.us ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond.not, label %for.cond3.for.inc20_crit_edge.us, label %for.body8.us
+
+for.cond3.for.inc20_crit_edge.us: ; preds = %for.inc.us
+ %indvars.iv.next37 = add nuw nsw i64 %indvars.iv36, 1
+ %exitcond40.not = icmp eq i64 %indvars.iv.next37, %wide.trip.count39
+ br i1 %exitcond40.not, label %for.end22, label %for.body.us
+
+for.body.lr.ph.split: ; preds = %for.body.lr.ph
+ store i8 0, ptr %barr, align 1
+ br label %for.end22
+
+for.end22: ; preds = %for.cond3.for.inc20_crit_edge.us, %for.body.lr.ph.split, %entry
+ ret void
+}
+
+attributes #0 = { "target-cpu"="oryon-1" "target-features"="+neon,+sve" }
More information about the llvm-commits
mailing list