[llvm] [llvm][SPIRV] Expose fast `popcnt` support for SPIR-V targets (PR #109845)

Alex Voicu via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 25 10:27:31 PDT 2024


https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/109845

>From eca4879a992d90b73c7d739ad48a75cb85fa064e Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 24 Sep 2024 19:27:30 +0100
Subject: [PATCH 1/2] Advertise fast `popcnt` support for SPIR-V targets.

---
 .../Target/SPIRV/SPIRVTargetTransformInfo.h   |  10 ++
 .../Transforms/LoopIdiom/AMDGPU/popcnt.ll     |   1 +
 .../Transforms/LoopIdiom/SPIRV/lit.local.cfg  |   2 +
 .../test/Transforms/LoopIdiom/SPIRV/popcnt.ll | 128 ++++++++++++++++++
 4 files changed, 141 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopIdiom/SPIRV/lit.local.cfg
 create mode 100644 llvm/test/Transforms/LoopIdiom/SPIRV/popcnt.ll

diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
index 2fbb4381da2637..7a64aa81218a89 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
@@ -24,6 +24,7 @@
 namespace llvm {
 class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> {
   using BaseT = BasicTTIImplBase<SPIRVTTIImpl>;
+  using TTI = TargetTransformInfo;
 
   friend BaseT;
 
@@ -37,6 +38,15 @@ class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> {
   explicit SPIRVTTIImpl(const SPIRVTargetMachine *TM, const Function &F)
       : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
+
+  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
+    // SPIR-V natively supports OpBitcount, per 3.53.14 in the spec, as such it
+    // is reasonable to assume the Op is fast / preferable to the expanded loop.
+    // Furthermore, this prevents information being lost if transforms are
+    // applied to SPIR-V before lowering to a concrete target.
+    assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+    return TTI::PSK_FastHardware;
+  }
 };
 
 } // namespace llvm
diff --git a/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll b/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
index 5f49d6c05ae4ed..f030d188b890ef 100644
--- a/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
+++ b/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -passes=loop-idiom -mtriple=amdgcn-- -S < %s | FileCheck %s
+; RUN: opt -passes=loop-idiom -mtriple=spirv64-amd-amdhsa -S < %s | FileCheck %s
 
 ; Mostly copied from x86 version.
 
diff --git a/llvm/test/Transforms/LoopIdiom/SPIRV/lit.local.cfg b/llvm/test/Transforms/LoopIdiom/SPIRV/lit.local.cfg
new file mode 100644
index 00000000000000..78dd74cd6dc634
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/SPIRV/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "SPIRV" in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/Transforms/LoopIdiom/SPIRV/popcnt.ll b/llvm/test/Transforms/LoopIdiom/SPIRV/popcnt.ll
new file mode 100644
index 00000000000000..661e40dd96ddf8
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/SPIRV/popcnt.ll
@@ -0,0 +1,128 @@
+; RUN: opt -passes=loop-idiom -mtriple=spirv32-- -S < %s | FileCheck %s
+; RUN: opt -passes=loop-idiom -mtriple=spirv64-- -S < %s | FileCheck %s
+
+; Mostly copied from x86 version.
+
+;To recognize this pattern:
+;int popcount(unsigned long long a) {
+;    int c = 0;
+;    while (a) {
+;        c++;
+;        a &= a - 1;
+;    }
+;    return c;
+;}
+;
+
+; CHECK-LABEL: @popcount_i64
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount_i64(i64 %a) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i64 %a, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.05, 1
+  %sub = add i64 %a.addr.04, -1
+  %and = and i64 %sub, %a.addr.04
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+}
+
+; CHECK-LABEL: @popcount_i32
+; CHECK: entry
+; CHECK: llvm.ctpop.i32
+; CHECK: ret
+define i32 @popcount_i32(i32 %a) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i32 %a, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.04 = phi i32 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.05, 1
+  %sub = add i32 %a.addr.04, -1
+  %and = and i32 %sub, %a.addr.04
+  %tobool = icmp eq i32 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+}
+
+; CHECK-LABEL: @popcount_i128
+; CHECK: entry
+; CHECK: llvm.ctpop.i128
+; CHECK: ret
+define i32 @popcount_i128(i128 %a) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i128 %a, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.04 = phi i128 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.05, 1
+  %sub = add i128 %a.addr.04, -1
+  %and = and i128 %sub, %a.addr.04
+  %tobool = icmp eq i128 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+}
+
+; To recognize this pattern:
+;int popcount(unsigned long long a, int mydata1, int mydata2) {
+;    int c = 0;
+;    while (a) {
+;        c++;
+;        a &= a - 1;
+;        mydata1 *= c;
+;        mydata2 *= (int)a;
+;    }
+;    return c + mydata1 + mydata2;
+;}
+
+; CHECK-LABEL: @popcount2
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp {
+entry:
+  %tobool9 = icmp eq i64 %a, 0
+  br i1 %tobool9, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ]
+  %mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ]
+  %a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.013, 1
+  %sub = add i64 %a.addr.010, -1
+  %and = and i64 %sub, %a.addr.010
+  %mul = mul nsw i32 %inc, %mydata1.addr.011
+  %conv = trunc i64 %and to i32
+  %mul1 = mul nsw i32 %conv, %mydata2.addr.012
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  %mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ]
+  %mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ]
+  %add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa
+  %add2 = add i32 %add, %c.0.lcssa
+  ret i32 %add2
+}

>From 82a9f7292f14436cac2b53c841e6f707761c034a Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Wed, 25 Sep 2024 00:42:17 +0100
Subject: [PATCH 2/2] Remove incorrect assertion.

---
 llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
index 7a64aa81218a89..dd459b15840c2e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
@@ -44,7 +44,8 @@ class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> {
     // is reasonable to assume the Op is fast / preferable to the expanded loop.
     // Furthermore, this prevents information being lost if transforms are
     // applied to SPIR-V before lowering to a concrete target.
-    assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+    if (!isPowerOf2_32(TyWidth))
+      return TTI::PSK_Software; // Arbitrary bit-width INT is not core SPIR-V.
     return TTI::PSK_FastHardware;
   }
 };



More information about the llvm-commits mailing list