[llvm] [llvm][SPIRV] Expose fast `popcnt` support for SPIR-V targets (PR #109845)
Alex Voicu via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 26 19:47:04 PDT 2024
https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/109845
>From eca4879a992d90b73c7d739ad48a75cb85fa064e Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 24 Sep 2024 19:27:30 +0100
Subject: [PATCH 1/6] Advertise fast `popcnt` support for SPIR-V targets.
---
.../Target/SPIRV/SPIRVTargetTransformInfo.h | 10 ++
.../Transforms/LoopIdiom/AMDGPU/popcnt.ll | 1 +
.../Transforms/LoopIdiom/SPIRV/lit.local.cfg | 2 +
.../test/Transforms/LoopIdiom/SPIRV/popcnt.ll | 128 ++++++++++++++++++
4 files changed, 141 insertions(+)
create mode 100644 llvm/test/Transforms/LoopIdiom/SPIRV/lit.local.cfg
create mode 100644 llvm/test/Transforms/LoopIdiom/SPIRV/popcnt.ll
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
index 2fbb4381da2637..7a64aa81218a89 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
@@ -24,6 +24,7 @@
namespace llvm {
class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> {
using BaseT = BasicTTIImplBase<SPIRVTTIImpl>;
+ using TTI = TargetTransformInfo;
friend BaseT;
@@ -37,6 +38,15 @@ class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> {
explicit SPIRVTTIImpl(const SPIRVTargetMachine *TM, const Function &F)
: BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
+
+ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
+ // SPIR-V natively supports OpBitcount, per 3.53.14 in the spec, as such it
+ // is reasonable to assume the Op is fast / preferable to the expanded loop.
+ // Furthermore, this prevents information being lost if transforms are
+ // applied to SPIR-V before lowering to a concrete target.
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ return TTI::PSK_FastHardware;
+ }
};
} // namespace llvm
diff --git a/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll b/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
index 5f49d6c05ae4ed..f030d188b890ef 100644
--- a/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
+++ b/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
@@ -1,4 +1,5 @@
; RUN: opt -passes=loop-idiom -mtriple=amdgcn-- -S < %s | FileCheck %s
+; RUN: opt -passes=loop-idiom -mtriple=spirv64-amd-amdhsa -S < %s | FileCheck %s
; Mostly copied from x86 version.
diff --git a/llvm/test/Transforms/LoopIdiom/SPIRV/lit.local.cfg b/llvm/test/Transforms/LoopIdiom/SPIRV/lit.local.cfg
new file mode 100644
index 00000000000000..78dd74cd6dc634
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/SPIRV/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "SPIRV" in config.root.targets:
+ config.unsupported = True
diff --git a/llvm/test/Transforms/LoopIdiom/SPIRV/popcnt.ll b/llvm/test/Transforms/LoopIdiom/SPIRV/popcnt.ll
new file mode 100644
index 00000000000000..661e40dd96ddf8
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/SPIRV/popcnt.ll
@@ -0,0 +1,128 @@
+; RUN: opt -passes=loop-idiom -mtriple=spirv32-- -S < %s | FileCheck %s
+; RUN: opt -passes=loop-idiom -mtriple=spirv64-- -S < %s | FileCheck %s
+
+; Mostly copied from x86 version.
+
+;To recognize this pattern:
+;int popcount(unsigned long long a) {
+; int c = 0;
+; while (a) {
+; c++;
+; a &= a - 1;
+; }
+; return c;
+;}
+;
+
+; CHECK-LABEL: @popcount_i64
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount_i64(i64 %a) nounwind uwtable readnone ssp {
+entry:
+ %tobool3 = icmp eq i64 %a, 0
+ br i1 %tobool3, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+ %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+ %inc = add nsw i32 %c.05, 1
+ %sub = add i64 %a.addr.04, -1
+ %and = and i64 %sub, %a.addr.04
+ %tobool = icmp eq i64 %and, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+ ret i32 %c.0.lcssa
+}
+
+; CHECK-LABEL: @popcount_i32
+; CHECK: entry
+; CHECK: llvm.ctpop.i32
+; CHECK: ret
+define i32 @popcount_i32(i32 %a) nounwind uwtable readnone ssp {
+entry:
+ %tobool3 = icmp eq i32 %a, 0
+ br i1 %tobool3, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+ %a.addr.04 = phi i32 [ %and, %while.body ], [ %a, %entry ]
+ %inc = add nsw i32 %c.05, 1
+ %sub = add i32 %a.addr.04, -1
+ %and = and i32 %sub, %a.addr.04
+ %tobool = icmp eq i32 %and, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+ ret i32 %c.0.lcssa
+}
+
+; CHECK-LABEL: @popcount_i128
+; CHECK: entry
+; CHECK: llvm.ctpop.i128
+; CHECK: ret
+define i32 @popcount_i128(i128 %a) nounwind uwtable readnone ssp {
+entry:
+ %tobool3 = icmp eq i128 %a, 0
+ br i1 %tobool3, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+ %a.addr.04 = phi i128 [ %and, %while.body ], [ %a, %entry ]
+ %inc = add nsw i32 %c.05, 1
+ %sub = add i128 %a.addr.04, -1
+ %and = and i128 %sub, %a.addr.04
+ %tobool = icmp eq i128 %and, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+ ret i32 %c.0.lcssa
+}
+
+; To recognize this pattern:
+;int popcount(unsigned long long a, int mydata1, int mydata2) {
+; int c = 0;
+; while (a) {
+; c++;
+; a &= a - 1;
+; mydata1 *= c;
+; mydata2 *= (int)a;
+; }
+; return c + mydata1 + mydata2;
+;}
+
+; CHECK-LABEL: @popcount2
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp {
+entry:
+ %tobool9 = icmp eq i64 %a, 0
+ br i1 %tobool9, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+ %mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ]
+ %mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ]
+ %a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+ %inc = add nsw i32 %c.013, 1
+ %sub = add i64 %a.addr.010, -1
+ %and = and i64 %sub, %a.addr.010
+ %mul = mul nsw i32 %inc, %mydata1.addr.011
+ %conv = trunc i64 %and to i32
+ %mul1 = mul nsw i32 %conv, %mydata2.addr.012
+ %tobool = icmp eq i64 %and, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+ %mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ]
+ %mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ]
+ %add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa
+ %add2 = add i32 %add, %c.0.lcssa
+ ret i32 %add2
+}
>From 82a9f7292f14436cac2b53c841e6f707761c034a Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Wed, 25 Sep 2024 00:42:17 +0100
Subject: [PATCH 2/6] Remove incorrect assertion.
---
llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
index 7a64aa81218a89..dd459b15840c2e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
@@ -44,7 +44,8 @@ class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> {
// is reasonable to assume the Op is fast / preferable to the expanded loop.
// Furthermore, this prevents information being lost if transforms are
// applied to SPIR-V before lowering to a concrete target.
- assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ if (!isPowerOf2_32(TyWidth))
+ return TTI::PSK_Software; // Arbitrary bit-width INT is not core SPIR-V.
return TTI::PSK_FastHardware;
}
};
>From db154ee2992f3d76a4848ffaf7e21665dc49127a Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Thu, 26 Sep 2024 18:33:24 +0100
Subject: [PATCH 3/6] SPIR-V doesn't handle integer bitwidths greater than 64.
---
.../Target/SPIRV/SPIRVTargetTransformInfo.h | 2 +-
.../test/Transforms/LoopIdiom/SPIRV/popcnt.ll | 23 -------------------
2 files changed, 1 insertion(+), 24 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
index dd459b15840c2e..24047f31fab290 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
@@ -44,7 +44,7 @@ class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> {
// is reasonable to assume the Op is fast / preferable to the expanded loop.
// Furthermore, this prevents information being lost if transforms are
// applied to SPIR-V before lowering to a concrete target.
- if (!isPowerOf2_32(TyWidth))
+ if (!isPowerOf2_32(TyWidth) || TyWidth > 64)
return TTI::PSK_Software; // Arbitrary bit-width INT is not core SPIR-V.
return TTI::PSK_FastHardware;
}
diff --git a/llvm/test/Transforms/LoopIdiom/SPIRV/popcnt.ll b/llvm/test/Transforms/LoopIdiom/SPIRV/popcnt.ll
index 661e40dd96ddf8..dd3a4d9699fdb1 100644
--- a/llvm/test/Transforms/LoopIdiom/SPIRV/popcnt.ll
+++ b/llvm/test/Transforms/LoopIdiom/SPIRV/popcnt.ll
@@ -60,29 +60,6 @@ while.end: ; preds = %while.body, %entry
ret i32 %c.0.lcssa
}
-; CHECK-LABEL: @popcount_i128
-; CHECK: entry
-; CHECK: llvm.ctpop.i128
-; CHECK: ret
-define i32 @popcount_i128(i128 %a) nounwind uwtable readnone ssp {
-entry:
- %tobool3 = icmp eq i128 %a, 0
- br i1 %tobool3, label %while.end, label %while.body
-
-while.body: ; preds = %entry, %while.body
- %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
- %a.addr.04 = phi i128 [ %and, %while.body ], [ %a, %entry ]
- %inc = add nsw i32 %c.05, 1
- %sub = add i128 %a.addr.04, -1
- %and = and i128 %sub, %a.addr.04
- %tobool = icmp eq i128 %and, 0
- br i1 %tobool, label %while.end, label %while.body
-
-while.end: ; preds = %while.body, %entry
- %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
- ret i32 %c.0.lcssa
-}
-
; To recognize this pattern:
;int popcount(unsigned long long a, int mydata1, int mydata2) {
; int c = 0;
>From ec6167f18e208894a052efa8b1b052fe77d69770 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Thu, 26 Sep 2024 18:33:45 +0100
Subject: [PATCH 4/6] Add opt->llc->SPIR-V test.
---
.../optimizations/recognize-popcnt-loop.ll | 114 ++++++++++++++++++
1 file changed, 114 insertions(+)
create mode 100644 llvm/test/CodeGen/SPIRV/optimizations/recognize-popcnt-loop.ll
diff --git a/llvm/test/CodeGen/SPIRV/optimizations/recognize-popcnt-loop.ll b/llvm/test/CodeGen/SPIRV/optimizations/recognize-popcnt-loop.ll
new file mode 100644
index 00000000000000..5ddc9bd898922d
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/optimizations/recognize-popcnt-loop.ll
@@ -0,0 +1,114 @@
+; RUN: opt -O3 -mtriple=spirv32-- %s -o - | llc -O3 -mtriple=spirv32-- -o - | FileCheck %s
+; RUN: %if spirv-tools %{ opt -O3 -mtriple=spirv32-- %s -o - | llc -O3 -mtriple=spirv32-- -o - -filetype=obj | spirv-val %}
+
+; RUN: opt -O3 -mtriple=spirv64-- %s -o - | llc -O3 -mtriple=spirv64-- -o - | FileCheck %s
+; RUN: %if spirv-tools %{ opt -O3 -mtriple=spirv64-- %s -o - | llc -O3 -mtriple=spirv64-- -o - -filetype=obj | spirv-val %}
+
+; Mostly copied from x86 version.
+
+;To recognize this pattern:
+;int popcount(unsigned long long a) {
+; int c = 0;
+; while (a) {
+; c++;
+; a &= a - 1;
+; }
+; return c;
+;}
+
+; CHECK-DAG: OpName %[[POPCNT64:.*]] "popcount_i64"
+; CHECK-DAG: OpName %[[POPCNT32:.*]] "popcount_i32"
+; CHECK-DAG: OpName %[[POPCNT2:.*]] "popcount2"
+; CHECK-DAG: %[[INT64:.*]] = OpTypeInt 64 0
+; CHECK-DAG: %[[INT32:.*]] = OpTypeInt 32 0
+
+define i32 @popcount_i64(i64 %a) nounwind uwtable readnone ssp {
+entry:
+ %tobool3 = icmp eq i64 %a, 0
+ br i1 %tobool3, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+ %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+ %inc = add nsw i32 %c.05, 1
+ %sub = add i64 %a.addr.04, -1
+ %and = and i64 %sub, %a.addr.04
+ %tobool = icmp eq i64 %and, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+ ret i32 %c.0.lcssa
+}
+; CHECK-DAG: %[[POPCNT64]] = OpFunction
+; CHECK: %[[A:.*]] = OpFunctionParameter %[[INT64]]
+; CHECK-DAG: %{{.+}} = OpBitCount %[[INT64]] %[[A]]
+; CHECK-DAG: OpFunctionEnd
+
+
+define i32 @popcount_i32(i32 %a) nounwind uwtable readnone ssp {
+entry:
+ %tobool3 = icmp eq i32 %a, 0
+ br i1 %tobool3, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+ %a.addr.04 = phi i32 [ %and, %while.body ], [ %a, %entry ]
+ %inc = add nsw i32 %c.05, 1
+ %sub = add i32 %a.addr.04, -1
+ %and = and i32 %sub, %a.addr.04
+ %tobool = icmp eq i32 %and, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+ ret i32 %c.0.lcssa
+}
+; CHECK: %[[POPCNT32]] = OpFunction
+; CHECK: %[[A:.*]] = OpFunctionParameter %[[INT32]]
+; CHECK-DAG: %{{.*}} = OpBitCount %[[INT32]] %[[A]]
+; CHECK-DAG: OpFunctionEnd
+
+; To recognize this pattern:
+;int popcount(unsigned long long a, int mydata1, int mydata2) {
+; int c = 0;
+; while (a) {
+; c++;
+; a &= a - 1;
+; mydata1 *= c;
+; mydata2 *= (int)a;
+; }
+; return c + mydata1 + mydata2;
+;}
+
+define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp {
+entry:
+ %tobool9 = icmp eq i64 %a, 0
+ br i1 %tobool9, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+ %mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ]
+ %mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ]
+ %a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+ %inc = add nsw i32 %c.013, 1
+ %sub = add i64 %a.addr.010, -1
+ %and = and i64 %sub, %a.addr.010
+ %mul = mul nsw i32 %inc, %mydata1.addr.011
+ %conv = trunc i64 %and to i32
+ %mul1 = mul nsw i32 %conv, %mydata2.addr.012
+ %tobool = icmp eq i64 %and, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+ %mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ]
+ %mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ]
+ %add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa
+ %add2 = add i32 %add, %c.0.lcssa
+ ret i32 %add2
+}
+; CHECK: %[[POPCNT2]] = OpFunction
+; CHECK: %[[A:.*]] = OpFunctionParameter %[[INT64]]
+; CHECK-DAG: %{{.*}} = OpBitCount %[[INT64]] %[[A]]
+; CHECK-DAG: OpFunctionEnd
\ No newline at end of file
>From 5476fcda96fd82b743aabe7de4f4f9eb49f1c28e Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 27 Sep 2024 03:17:54 +0100
Subject: [PATCH 5/6] Add missing whitespace.
---
llvm/test/CodeGen/SPIRV/optimizations/recognize-popcnt-loop.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/SPIRV/optimizations/recognize-popcnt-loop.ll b/llvm/test/CodeGen/SPIRV/optimizations/recognize-popcnt-loop.ll
index 5ddc9bd898922d..9e1f52bc6f9b0e 100644
--- a/llvm/test/CodeGen/SPIRV/optimizations/recognize-popcnt-loop.ll
+++ b/llvm/test/CodeGen/SPIRV/optimizations/recognize-popcnt-loop.ll
@@ -111,4 +111,4 @@ while.end: ; preds = %while.body, %entry
; CHECK: %[[POPCNT2]] = OpFunction
; CHECK: %[[A:.*]] = OpFunctionParameter %[[INT64]]
; CHECK-DAG: %{{.*}} = OpBitCount %[[INT64]] %[[A]]
-; CHECK-DAG: OpFunctionEnd
\ No newline at end of file
+; CHECK-DAG: OpFunctionEnd
>From f0112ffe8fba129d0847d9afb548ef804dfa820a Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 27 Sep 2024 03:46:50 +0100
Subject: [PATCH 6/6] No need to duplicate the same test under both AMDGPU and
SPIRV.
---
llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll b/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
index f030d188b890ef..5f49d6c05ae4ed 100644
--- a/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
+++ b/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
@@ -1,5 +1,4 @@
; RUN: opt -passes=loop-idiom -mtriple=amdgcn-- -S < %s | FileCheck %s
-; RUN: opt -passes=loop-idiom -mtriple=spirv64-amd-amdhsa -S < %s | FileCheck %s
; Mostly copied from x86 version.
More information about the llvm-commits
mailing list