[llvm] dadea96 - AMDGPU: Add range to wavefrontsize intrinsic declaration (#136303)

Fri Apr 25 01:19:50 PDT 2025

Author: Matt Arsenault
Date: 2025-04-25T10:19:47+02:00
New Revision: dadea967915db7ac33963c3874097e4b1a961ca8

URL: https://github.com/llvm/llvm-project/commit/dadea967915db7ac33963c3874097e4b1a961ca8
DIFF: https://github.com/llvm/llvm-project/commit/dadea967915db7ac33963c3874097e4b1a961ca8.diff

LOG: AMDGPU: Add range to wavefrontsize intrinsic declaration (#136303)

Added: 
    

Modified: 
    llvm/include/llvm/IR/IntrinsicsAMDGPU.td
    llvm/test/Assembler/amdgcn-intrinsic-attributes.ll
    llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 75068717d9a5f..a57eb4a6dba49 100644

--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -234,9 +234,11 @@ def int_amdgcn_init_exec_from_input : Intrinsic<[],
 def int_amdgcn_init_whole_wave : Intrinsic<[llvm_i1_ty], [], [
     IntrHasSideEffects, IntrNoMem, IntrConvergent]>;
 
-def int_amdgcn_wavefrontsize :
-  ClangBuiltin<"__builtin_amdgcn_wavefrontsize">,
-  DefaultAttrsIntrinsic<[llvm_i32_ty], [], [NoUndef<RetIndex>, IntrNoMem, IntrSpeculatable]>;
+def int_amdgcn_wavefrontsize
+    : ClangBuiltin<"__builtin_amdgcn_wavefrontsize">,
+      DefaultAttrsIntrinsic<[llvm_i32_ty], [],
+                            [NoUndef<RetIndex>, Range<RetIndex, 32, 65>,
+                             IntrNoMem, IntrSpeculatable]>;
 
 // Represent a relocation constant.
 def int_amdgcn_reloc_constant : DefaultAttrsIntrinsic<

diff  --git a/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll b/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll
index bd5ce2ddda3e2..744c94ac85410 100644
--- a/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll
+++ b/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll
@@ -18,4 +18,12 @@ define i32 @ds_consume(ptr addrspace(3) %ptr) {
   ret i32 %ret
 }
 
+; Test assumed range
+; CHECK: declare noundef range(i32 32, 65) i32 @llvm.amdgcn.wavefrontsize() #1
+define i32 @wavefrontsize() {
+  %ret = call i32 @llvm.amdgcn.wavefrontsize()
+  ret i32 %ret
+}
+
 ; CHECK: attributes #0 = { convergent nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+; CHECK: attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

diff  --git a/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll
index 92f0af30b9e4d..e065d96ad0ba6 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll
@@ -39,7 +39,7 @@ define amdgpu_kernel void @fold_and_optimize_wavefrontsize(ptr addrspace(1) noca
 ; OPT-SAME: ptr addrspace(1) captures(none) [[ARG:%.*]]) {
 ; OPT-NEXT:  [[BB:.*:]]
 ; OPT-NEXT:    [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR1]]
-; OPT-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[TMP]], 32
+; OPT-NEXT:    [[TMP1:%.*]] = icmp samesign ugt i32 [[TMP]], 32
 ; OPT-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 1
 ; OPT-NEXT:    store i32 [[TMP2]], ptr addrspace(1) [[ARG]], align 4
 ; OPT-NEXT:    ret void
@@ -69,7 +69,7 @@ define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(ptr addrspace(1) n
 ; OPT-SAME: ptr addrspace(1) captures(none) [[ARG:%.*]]) {
 ; OPT-NEXT:  [[BB:.*:]]
 ; OPT-NEXT:    [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR1]]
-; OPT-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[TMP]], 32
+; OPT-NEXT:    [[TMP1:%.*]] = icmp samesign ugt i32 [[TMP]], 32
 ; OPT-NEXT:    br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]]
 ; OPT:       [[BB2]]:
 ; OPT-NEXT:    store i32 1, ptr addrspace(1) [[ARG]], align 4