[llvm] [AggressiveInstCombine] Make cttz fold more resiliant to non-array geps (PR #150896)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 28 00:46:53 PDT 2025
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/150896
Similar to #150639 this fixes the AggressiveInstCombine fold for convert tables to cttz instructions if the gep types are not array types. i.e `gep i16 @glob, i64 %idx` instead of `gep [64 x i16] @glob, i64 0, i64 %idx`.
>From d05b6407c7b5e29549944826f3266d7985fab343 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 28 Jul 2025 08:29:52 +0100
Subject: [PATCH] [AggressiveInstCombine] Make cttz fold more resiliant to
non-array geps
---
.../AggressiveInstCombine.cpp | 32 +++++++------
.../lower-table-based-cttz-basics.ll | 47 +++++++++++++++++++
.../PhaseOrdering/lower-table-based-cttz.ll | 42 +++++++++++++++--
3 files changed, 101 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 7af5ba4e0e103..975590214ffa6 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -547,14 +547,20 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) {
return false;
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand());
- if (!GEP || !GEP->hasNoUnsignedSignedWrap() || GEP->getNumIndices() != 2)
+ if (!GEP || !GEP->hasNoUnsignedSignedWrap())
return false;
- if (!GEP->getSourceElementType()->isArrayTy())
- return false;
-
- uint64_t ArraySize = GEP->getSourceElementType()->getArrayNumElements();
- if (ArraySize != 32 && ArraySize != 64)
+ Type *GEPSrcEltTy = GEP->getSourceElementType();
+ Value *GepIdx;
+ if (GEP->getNumIndices() == 2) {
+ if (!GEPSrcEltTy->isArrayTy() ||
+ !match(GEP->idx_begin()->get(), m_ZeroInt()))
+ return false;
+ GEPSrcEltTy = GEPSrcEltTy->getArrayElementType();
+ GepIdx = std::next(GEP->idx_begin())->get();
+ } else if (GEP->getNumIndices() == 1)
+ GepIdx = GEP->idx_begin()->get();
+ else
return false;
GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
@@ -563,21 +569,17 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) {
ConstantDataArray *ConstData =
dyn_cast<ConstantDataArray>(GVTable->getInitializer());
- if (!ConstData)
- return false;
-
- if (!match(GEP->idx_begin()->get(), m_ZeroInt()))
+ if (!ConstData || ConstData->getElementType() != GEPSrcEltTy)
return false;
- Value *Idx2 = std::next(GEP->idx_begin())->get();
Value *X1;
uint64_t MulConst, ShiftConst;
// FIXME: 64-bit targets have `i64` type for the GEP index, so this match will
// probably fail for other (e.g. 32-bit) targets.
- if (!match(Idx2, m_ZExtOrSelf(
- m_LShr(m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)),
- m_ConstantInt(MulConst)),
- m_ConstantInt(ShiftConst)))))
+ if (!match(GepIdx, m_ZExtOrSelf(m_LShr(
+ m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)),
+ m_ConstantInt(MulConst)),
+ m_ConstantInt(ShiftConst)))))
return false;
unsigned InputBits = X1->getType()->getScalarSizeInBits();
diff --git a/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll b/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll
index 4d571999df372..0bfa891a7887c 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll
@@ -276,3 +276,50 @@ entry:
%0 = load i32, ptr %arrayidx, align 4
ret i32 %0
}
+
+define i32 @ctz1_with_i8_gep(i32 %x) {
+; CHECK-LABEL: @ctz1_with_i8_gep(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP3]] to i32
+; CHECK-NEXT: ret i32 [[CONV]]
+;
+entry:
+ %sub = sub i32 0, %x
+ %and = and i32 %sub, %x
+ %mul = mul i32 %and, 125613361
+ %shr = lshr i32 %mul, 27
+ %idxprom = zext i32 %shr to i64
+ %arrayidx = getelementptr inbounds i8, ptr @ctz7.table, i64 %idxprom
+ %0 = load i8, ptr %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ ret i32 %conv
+}
+
+define i32 @ctz2_with_i8_gep(i32 %x) {
+; CHECK-LABEL: @ctz2_with_i8_gep(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[X:%.*]]
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], [[X]]
+; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[AND]], 72416175
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[MUL]], 26
+; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[SHR]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i8], ptr @ctz2.table, i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
+; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
+; CHECK-NEXT: ret i32 [[CONV]]
+;
+entry:
+ %sub = sub i32 0, %x
+ %and = and i32 %sub, %x
+ %mul = mul i32 %and, 72416175
+ %shr = lshr i32 %mul, 26
+ %idxprom = zext i32 %shr to i64
+ %arrayidx = getelementptr inbounds [64 x i8], ptr @ctz2.table, i64 0, i64 %idxprom
+ %0 = load i16, ptr %arrayidx, align 2
+ %conv = sext i16 %0 to i32
+ ret i32 %conv
+}
diff --git a/llvm/test/Transforms/PhaseOrdering/lower-table-based-cttz.ll b/llvm/test/Transforms/PhaseOrdering/lower-table-based-cttz.ll
index 19fbc1f1ae64e..4455016c3e4a4 100644
--- a/llvm/test/Transforms/PhaseOrdering/lower-table-based-cttz.ll
+++ b/llvm/test/Transforms/PhaseOrdering/lower-table-based-cttz.ll
@@ -1,3 +1,6 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -O3 -S < %s | FileCheck %s
+
;; This tests lowering of the implementations of table-based ctz
;; algorithm to the llvm.cttz instruction in the -O3 case.
@@ -13,13 +16,17 @@
;; }
;; Compiled as: clang -O3 test.c -S -emit-llvm -Xclang -disable-llvm-optzns
-; RUN: opt -O3 -S < %s | FileCheck %s
-
-; CHECK: call range(i32 0, 33) i32 @llvm.cttz.i32
-
@ctz1.table = internal constant [32 x i8] c"\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09", align 16
-define i32 @ctz1(i32 noundef %x) {
+define i32 @ctz(i32 noundef %x) {
+; CHECK-LABEL: define range(i32 0, 32) i32 @ctz(
+; CHECK-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT: [[CONV:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]]
+; CHECK-NEXT: ret i32 [[CONV]]
+;
entry:
%x.addr = alloca i32, align 4
store i32 %x, ptr %x.addr, align 4
@@ -35,3 +42,28 @@ entry:
%conv = sext i8 %2 to i32
ret i32 %conv
}
+
+define i32 @ctz_nonarraygep(i32 noundef %x) {
+; CHECK-LABEL: define range(i32 0, 32) i32 @ctz_nonarraygep(
+; CHECK-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT: [[CONV:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]]
+; CHECK-NEXT: ret i32 [[CONV]]
+;
+entry:
+ %x.addr = alloca i32, align 4
+ store i32 %x, ptr %x.addr, align 4
+ %0 = load i32, ptr %x.addr, align 4
+ %1 = load i32, ptr %x.addr, align 4
+ %sub = sub i32 0, %1
+ %and = and i32 %0, %sub
+ %mul = mul i32 %and, 125613361
+ %shr = lshr i32 %mul, 27
+ %idxprom = zext i32 %shr to i64
+ %arrayidx = getelementptr inbounds i8, ptr @ctz1.table, i64 %idxprom
+ %2 = load i8, ptr %arrayidx, align 1
+ %conv = sext i8 %2 to i32
+ ret i32 %conv
+}
More information about the llvm-commits
mailing list