[llvm] [AMDGPU][GlobalIsel] Use isRegType to check for legal types for G_FREEZE & G_IMPLICIT_DEF (PR #101331)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 31 06:09:20 PDT 2024
https://github.com/sstipanovic created https://github.com/llvm/llvm-project/pull/101331
G_FREEZE was legal for <13 x S32> which caused an infinite loop in the combiner
>From d1d5b16068df5561492797a37cdb032fe72746e5 Mon Sep 17 00:00:00 2001
From: Stefan Stipanovic <Stefan.Stipanovic at amd.com>
Date: Wed, 31 Jul 2024 15:02:02 +0200
Subject: [PATCH] [AMDGPU][GlobalIsel] Use isRegType to check for legal types
for G_FREEZE & G_IMPLICIT_DEF
Change-Id: I9d5c52744b77b3820d755a6b2cd872730e6a99c7
---
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 10 +-
.../freeze_implicit_def_legalizer.ll | 120 ++++++++++++++++++
2 files changed, 128 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/freeze_implicit_def_legalizer.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index c6c4b8f930647..8f99ad6e393cd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -889,10 +889,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampScalar(0, S16, S64);
getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
- .legalIf(isRegisterType(0))
// s1 and s16 are special cases because they have legal operations on
// them, but don't really occupy registers in the normal way.
- .legalFor({S1, S16})
+ .legalFor({S32, S64, V2S16, S16, V4S16, S1, S128, S256})
+ .legalFor(AllS32Vectors)
+ .legalFor(AllS64Vectors)
+ .legalFor(AddrSpaces64)
+ .legalFor(AddrSpaces32)
+ .legalFor(AddrSpaces128)
+ .legalIf(isPointer(0))
+ .clampNumElements(0, V16S32, V32S32)
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
.clampScalarOrElt(0, S32, MaxScalar)
.widenScalarToNextPow2(0, 32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/freeze_implicit_def_legalizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/freeze_implicit_def_legalizer.ll
new file mode 100644
index 0000000000000..2fcc6e9571022
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/freeze_implicit_def_legalizer.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p32:32:32-p64:32:32-p65:32:32"
+target triple = "amdgcn--amdpal"
+
+define amdgpu_cs void @_amdgpu_cs_main(i64 %0) {
+; GFX10-LABEL: _amdgpu_cs_main:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:32
+; GFX10-NEXT: global_load_dwordx4 v[6:9], v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(1)
+; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v5
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_cmp_gt_f32_e64 s0, 0, v7
+; GFX10-NEXT: s_and_b32 s0, vcc_lo, s0
+; GFX10-NEXT: s_and_saveexec_b32 s1, s0
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: _amdgpu_cs_main:
+; GFX11: ; %bb.0: ; %.entry
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off offset:32
+; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(1)
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v5
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0, v1
+; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0
+; GFX11-NEXT: s_and_saveexec_b32 s1, s0
+; GFX11-NEXT: s_endpgm
+.entry:
+ %1 = inttoptr i64 %0 to ptr addrspace(1)
+ %2 = load float, ptr addrspace(1) %1, align 4
+ %3 = call float @llvm.fabs.f32(float %2)
+ %4 = fcmp olt float %3, 1.000000e+00
+ %5 = getelementptr i8, ptr addrspace(1) %1, i64 4
+ %6 = load float, ptr addrspace(1) %5, align 4
+ %.fr.i0 = freeze float %6
+ %7 = getelementptr i8, ptr addrspace(1) %1, i64 16
+ %8 = load float, ptr addrspace(1) %7, align 4
+ %.fr123.i0 = freeze float %8
+ %9 = fadd float %.fr123.i0, 0.000000e+00
+ %10 = call float @llvm.fabs.f32(float %9)
+ %11 = and i1 false, %4
+ %12 = getelementptr i8, ptr addrspace(1) %1, i64 20
+ %13 = load float, ptr addrspace(1) %12, align 4
+ %14 = call float @llvm.fabs.f32(float %13)
+ %15 = fcmp olt float %14, 1.000000e+00
+ %16 = and i1 %15, false
+ %17 = getelementptr i8, ptr addrspace(1) %1, i64 24
+ %18 = load float, ptr addrspace(1) %17, align 4
+ %19 = call float @llvm.fabs.f32(float %18)
+ %20 = fcmp olt float %19, 1.000000e+00
+ %21 = and i1 %20, false
+ %22 = getelementptr i8, ptr addrspace(1) %1, i64 28
+ %23 = load float, ptr addrspace(1) %22, align 4
+ %.fr128.i0 = freeze float %23
+ %24 = fadd float %.fr128.i0, 0.000000e+00
+ %25 = call float @llvm.fabs.f32(float %24)
+ %26 = getelementptr i8, ptr addrspace(1) %1, i64 36
+ %27 = load float, ptr addrspace(1) %26, align 4
+ %28 = call float @llvm.fabs.f32(float %27)
+ %29 = fcmp olt float %28, 1.000000e+00
+ %30 = and i1 %29, false
+ %31 = getelementptr i8, ptr addrspace(1) %1, i64 40
+ %32 = load float, ptr addrspace(1) %31, align 4
+ %.fr133.i0 = freeze float %32
+ %33 = fadd float %.fr133.i0, 0.000000e+00
+ %34 = call float @llvm.fabs.f32(float %33)
+ %35 = getelementptr i8, ptr addrspace(1) %1, i64 44
+ %36 = load float, ptr addrspace(1) %35, align 4
+ %37 = fcmp olt float %36, 0.000000e+00
+ %.i112 = getelementptr i8, ptr addrspace(1) %1, i64 8
+ %.ii1 = load float, ptr addrspace(1) %.i112, align 4
+ %.i213 = getelementptr i8, ptr addrspace(1) %1, i64 12
+ %.ii2 = load float, ptr addrspace(1) %.i213, align 4
+ %.fr.i1 = freeze float %.ii1
+ %.fr.i2 = freeze float %.ii2
+ %38 = fcmp olt float %.fr.i0, 0.000000e+00
+ %39 = fadd float %.fr.i1, 0.000000e+00
+ %40 = call float @llvm.fabs.f32(float %39)
+ %41 = fadd float %.fr.i2, 0.000000e+00
+ %42 = call float @llvm.fabs.f32(float %41)
+ %43 = and i1 %37, %38
+ %.i124 = getelementptr i8, ptr addrspace(1) %1, i64 32
+ %.ii125 = load float, ptr addrspace(1) %.i124, align 4
+ %.fr128.i1 = freeze float %.ii125
+ %44 = fadd float %.fr128.i1, 0.000000e+00
+ %45 = call float @llvm.fabs.f32(float %44)
+ %.i234 = getelementptr i8, ptr addrspace(1) %1, i64 48
+ %.ii235 = load float, ptr addrspace(1) %.i234, align 4
+ %.fr133.i2 = freeze float %.ii235
+ %46 = fadd float %.fr133.i2, 0.000000e+00
+ %47 = call float @llvm.fabs.f32(float %46)
+ br i1 %43, label %48, label %53
+
+48: ; preds = %.entry
+ %49 = call i64 @llvm.amdgcn.s.getpc()
+ %50 = and i64 %49, 1
+ %51 = inttoptr i64 %50 to ptr addrspace(4)
+ %52 = load <4 x i32>, ptr addrspace(4) %51, align 16
+ br label %53
+
+53: ; preds = %48, %.entry
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @llvm.fabs.f32(float) #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i64 @llvm.amdgcn.s.getpc() #0
+
+; uselistorder directives
+uselistorder ptr @llvm.fabs.f32, { 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }
+
+attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
More information about the llvm-commits
mailing list