[llvm] [llvm][SPIRV] Expose fast `popcnt` support for SPIR-V targets (PR #109845)
Alex Voicu via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 25 10:27:31 PDT 2024
https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/109845
>From eca4879a992d90b73c7d739ad48a75cb85fa064e Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 24 Sep 2024 19:27:30 +0100
Subject: [PATCH 1/2] Advertise fast `popcnt` support for SPIR-V targets.
---
.../Target/SPIRV/SPIRVTargetTransformInfo.h | 10 ++
.../Transforms/LoopIdiom/AMDGPU/popcnt.ll | 1 +
.../Transforms/LoopIdiom/SPIRV/lit.local.cfg | 2 +
.../test/Transforms/LoopIdiom/SPIRV/popcnt.ll | 128 ++++++++++++++++++
4 files changed, 141 insertions(+)
create mode 100644 llvm/test/Transforms/LoopIdiom/SPIRV/lit.local.cfg
create mode 100644 llvm/test/Transforms/LoopIdiom/SPIRV/popcnt.ll
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
index 2fbb4381da2637..7a64aa81218a89 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
@@ -24,6 +24,7 @@
namespace llvm {
class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> {
using BaseT = BasicTTIImplBase<SPIRVTTIImpl>;
+ using TTI = TargetTransformInfo;
friend BaseT;
@@ -37,6 +38,15 @@ class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> {
explicit SPIRVTTIImpl(const SPIRVTargetMachine *TM, const Function &F)
: BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
+
+ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
+ // SPIR-V natively supports OpBitcount, per 3.53.14 in the spec, as such it
+ // is reasonable to assume the Op is fast / preferable to the expanded loop.
+ // Furthermore, this prevents information being lost if transforms are
+ // applied to SPIR-V before lowering to a concrete target.
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ return TTI::PSK_FastHardware;
+ }
};
} // namespace llvm
diff --git a/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll b/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
index 5f49d6c05ae4ed..f030d188b890ef 100644
--- a/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
+++ b/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
@@ -1,4 +1,5 @@
; RUN: opt -passes=loop-idiom -mtriple=amdgcn-- -S < %s | FileCheck %s
+; RUN: opt -passes=loop-idiom -mtriple=spirv64-amd-amdhsa -S < %s | FileCheck %s
; Mostly copied from x86 version.
diff --git a/llvm/test/Transforms/LoopIdiom/SPIRV/lit.local.cfg b/llvm/test/Transforms/LoopIdiom/SPIRV/lit.local.cfg
new file mode 100644
index 00000000000000..78dd74cd6dc634
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/SPIRV/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "SPIRV" in config.root.targets:
+ config.unsupported = True
diff --git a/llvm/test/Transforms/LoopIdiom/SPIRV/popcnt.ll b/llvm/test/Transforms/LoopIdiom/SPIRV/popcnt.ll
new file mode 100644
index 00000000000000..661e40dd96ddf8
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/SPIRV/popcnt.ll
@@ -0,0 +1,128 @@
+; RUN: opt -passes=loop-idiom -mtriple=spirv32-- -S < %s | FileCheck %s
+; RUN: opt -passes=loop-idiom -mtriple=spirv64-- -S < %s | FileCheck %s
+
+; Mostly copied from x86 version.
+
+;To recognize this pattern:
+;int popcount(unsigned long long a) {
+; int c = 0;
+; while (a) {
+; c++;
+; a &= a - 1;
+; }
+; return c;
+;}
+;
+
+; CHECK-LABEL: @popcount_i64
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount_i64(i64 %a) nounwind uwtable readnone ssp {
+entry:
+ %tobool3 = icmp eq i64 %a, 0
+ br i1 %tobool3, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+ %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+ %inc = add nsw i32 %c.05, 1
+ %sub = add i64 %a.addr.04, -1
+ %and = and i64 %sub, %a.addr.04
+ %tobool = icmp eq i64 %and, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+ ret i32 %c.0.lcssa
+}
+
+; CHECK-LABEL: @popcount_i32
+; CHECK: entry
+; CHECK: llvm.ctpop.i32
+; CHECK: ret
+define i32 @popcount_i32(i32 %a) nounwind uwtable readnone ssp {
+entry:
+ %tobool3 = icmp eq i32 %a, 0
+ br i1 %tobool3, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+ %a.addr.04 = phi i32 [ %and, %while.body ], [ %a, %entry ]
+ %inc = add nsw i32 %c.05, 1
+ %sub = add i32 %a.addr.04, -1
+ %and = and i32 %sub, %a.addr.04
+ %tobool = icmp eq i32 %and, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+ ret i32 %c.0.lcssa
+}
+
+; CHECK-LABEL: @popcount_i128
+; CHECK: entry
+; CHECK: llvm.ctpop.i128
+; CHECK: ret
+define i32 @popcount_i128(i128 %a) nounwind uwtable readnone ssp {
+entry:
+ %tobool3 = icmp eq i128 %a, 0
+ br i1 %tobool3, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+ %a.addr.04 = phi i128 [ %and, %while.body ], [ %a, %entry ]
+ %inc = add nsw i32 %c.05, 1
+ %sub = add i128 %a.addr.04, -1
+ %and = and i128 %sub, %a.addr.04
+ %tobool = icmp eq i128 %and, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+ ret i32 %c.0.lcssa
+}
+
+; To recognize this pattern:
+;int popcount(unsigned long long a, int mydata1, int mydata2) {
+; int c = 0;
+; while (a) {
+; c++;
+; a &= a - 1;
+; mydata1 *= c;
+; mydata2 *= (int)a;
+; }
+; return c + mydata1 + mydata2;
+;}
+
+; CHECK-LABEL: @popcount2
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp {
+entry:
+ %tobool9 = icmp eq i64 %a, 0
+ br i1 %tobool9, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+ %mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ]
+ %mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ]
+ %a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+ %inc = add nsw i32 %c.013, 1
+ %sub = add i64 %a.addr.010, -1
+ %and = and i64 %sub, %a.addr.010
+ %mul = mul nsw i32 %inc, %mydata1.addr.011
+ %conv = trunc i64 %and to i32
+ %mul1 = mul nsw i32 %conv, %mydata2.addr.012
+ %tobool = icmp eq i64 %and, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+ %mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ]
+ %mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ]
+ %add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa
+ %add2 = add i32 %add, %c.0.lcssa
+ ret i32 %add2
+}
>From 82a9f7292f14436cac2b53c841e6f707761c034a Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Wed, 25 Sep 2024 00:42:17 +0100
Subject: [PATCH 2/2] Remove incorrect assertion.
---
llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
index 7a64aa81218a89..dd459b15840c2e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
@@ -44,7 +44,8 @@ class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> {
// is reasonable to assume the Op is fast / preferable to the expanded loop.
// Furthermore, this prevents information being lost if transforms are
// applied to SPIR-V before lowering to a concrete target.
- assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ if (!isPowerOf2_32(TyWidth))
+ return TTI::PSK_Software; // Arbitrary bit-width INT is not core SPIR-V.
return TTI::PSK_FastHardware;
}
};
More information about the llvm-commits
mailing list