[llvm] [NFC][AMDGPU] Add D16 test for multiple fptrunc image sample (PR #141771)
Harrison Hao via llvm-commits
llvm-commits at lists.llvm.org
Wed May 28 09:15:37 PDT 2025
https://github.com/harrisonGPU updated https://github.com/llvm/llvm-project/pull/141771
>From 6deaf2381f43b1f35cf2ee244990c800533d1627 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Wed, 28 May 2025 14:22:08 +0000
Subject: [PATCH] [NFC][AMDGPU] Add a new test for image-d16
---
.../InstCombine/AMDGPU/image-d16.ll | 44 +++++++++++++++++++
1 file changed, 44 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll b/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll
index 30431ad724843..9e6c1f0166bac 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll
@@ -3,6 +3,7 @@
; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx810 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx900 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1010 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
+; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1100 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
define amdgpu_ps half @image_sample_2d_fptrunc_to_d16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
; GFX7-LABEL: @image_sample_2d_fptrunc_to_d16(
@@ -121,6 +122,49 @@ main_body:
ret half %addf_sum.2
}
+define void @image_sample_2d_multi_fptrunc_to_d16(<8 x i32> %surf_desc, <4 x i32> %samp, float %u, float %v, ptr addrspace(7) %out) {
+; GFX7-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX7-NEXT: main_body:
+; GFX7-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX7-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX7-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to half
+; GFX7-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX7-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to half
+; GFX7-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX7-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to half
+; GFX7-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
+; GFX7-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[H2]]
+; GFX7-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX7-NEXT: ret void
+;
+; GFX81PLUS-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX81PLUS-NEXT: main_body:
+; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX81PLUS-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX81PLUS-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to half
+; GFX81PLUS-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX81PLUS-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to half
+; GFX81PLUS-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX81PLUS-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to half
+; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
+; GFX81PLUS-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[H2]]
+; GFX81PLUS-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX81PLUS-NEXT: ret void
+;
+main_body:
+ %sample = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %u, float %v, <8 x i32> %surf_desc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+ %e0 = extractelement <4 x float> %sample, i32 0
+ %h0 = fptrunc float %e0 to half
+ %e1 = extractelement <4 x float> %sample, i32 1
+ %h1 = fptrunc float %e1 to half
+ %e2 = extractelement <4 x float> %sample, i32 2
+ %h2 = fptrunc float %e2 to half
+ %mul = fmul half %h0, %h1
+ %res = fadd half %mul, %h2
+ store half %res, ptr addrspace(7) %out, align 2
+ ret void
+}
+
define amdgpu_ps half @image_gather4_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
; GFX7-LABEL: @image_gather4_2d_v4f32(
; GFX7-NEXT: main_body:
More information about the llvm-commits
mailing list