[llvm] 9c8c31e - Revert "[AMDGPU] Trim zero components from buffer and image stores"
Mateja Marjanovic via llvm-commits
llvm-commits at lists.llvm.org
Thu May 18 08:11:49 PDT 2023
Author: Mateja Marjanovic
Date: 2023-05-18T17:02:01+02:00
New Revision: 9c8c31eea439253f3addf9e25fb9a242c77c9587
URL: https://github.com/llvm/llvm-project/commit/9c8c31eea439253f3addf9e25fb9a242c77c9587
DIFF: https://github.com/llvm/llvm-project/commit/9c8c31eea439253f3addf9e25fb9a242c77c9587.diff
LOG: Revert "[AMDGPU] Trim zero components from buffer and image stores"
This reverts commit 3181a6e3e7dae9292782216a55c5e1f0583c1668.
Added:
Modified:
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
Removed:
llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-simplify-image-buffer-stores.ll
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 6bc751a939d4..48960ee26e66 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -872,12 +872,10 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
defm int_amdgcn_image_store : AMDGPUImageDimIntrinsicsAll<
"STORE", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
- [IntrWriteMem, IntrWillReturn], [SDNPMemOperand]>,
- AMDGPUImageDMaskIntrinsic;
+ [IntrWriteMem, IntrWillReturn], [SDNPMemOperand]>;
defm int_amdgcn_image_store_mip : AMDGPUImageDimIntrinsicsNoMsaa<
"STORE_MIP", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
- [IntrWriteMem, IntrWillReturn], [SDNPMemOperand], 1>,
- AMDGPUImageDMaskIntrinsic;
+ [IntrWriteMem, IntrWillReturn], [SDNPMemOperand], 1>;
//////////////////////////////////////////////////////////////////////////
// MSAA intrinsics
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 6e8878e7aa2a..116574c289dc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -376,36 +376,6 @@ static bool matchFPExtFromF16(Value *Arg, Value *&FPExtSrc) {
return false;
}
-// Trim all zero components from the end of the vector \p UseV and return
-// an appropriate bitset with known elements.
-static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV,
- Instruction *I) {
- auto *VTy = cast<FixedVectorType>(UseV->getType());
- unsigned VWidth = VTy->getNumElements();
- APInt DemandedElts = APInt::getAllOnes(VWidth);
-
- for (int i = VWidth - 1; i >= 0; --i) {
- APInt DemandOneElt = APInt::getOneBitSet(VWidth, i);
- KnownFPClass KnownFPClass =
- computeKnownFPClass(UseV, DemandOneElt, IC.getDataLayout(),
- /*InterestedClasses=*/fcAllFlags,
- /*Depth=*/0, &IC.getTargetLibraryInfo(),
- &IC.getAssumptionCache(), I,
- &IC.getDominatorTree(),
- &IC.getOptimizationRemarkEmitter());
- if (KnownFPClass.KnownFPClasses != fcPosZero)
- break;
- DemandedElts.clearBit(i);
- }
- return DemandedElts;
-}
-
-static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
- IntrinsicInst &II,
- APInt DemandedElts,
- int DMaskIdx = -1,
- bool IsLoad = true);
-
std::optional<Instruction *>
GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
Intrinsic::ID IID = II.getIntrinsicID();
@@ -1120,65 +1090,26 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return IC.replaceInstUsesWith(II, ConstantInt::getFalse(II.getType()));
break;
}
- case Intrinsic::amdgcn_buffer_store:
- case Intrinsic::amdgcn_buffer_store_format:
- case Intrinsic::amdgcn_raw_buffer_store:
- case Intrinsic::amdgcn_raw_buffer_store_format:
- case Intrinsic::amdgcn_raw_tbuffer_store:
- case Intrinsic::amdgcn_struct_buffer_store:
- case Intrinsic::amdgcn_struct_buffer_store_format:
- case Intrinsic::amdgcn_struct_tbuffer_store:
- case Intrinsic::amdgcn_tbuffer_store:
- case Intrinsic::amdgcn_image_store_1d:
- case Intrinsic::amdgcn_image_store_1darray:
- case Intrinsic::amdgcn_image_store_2d:
- case Intrinsic::amdgcn_image_store_2darray:
- case Intrinsic::amdgcn_image_store_2darraymsaa:
- case Intrinsic::amdgcn_image_store_2dmsaa:
- case Intrinsic::amdgcn_image_store_3d:
- case Intrinsic::amdgcn_image_store_cube:
- case Intrinsic::amdgcn_image_store_mip_1d:
- case Intrinsic::amdgcn_image_store_mip_1darray:
- case Intrinsic::amdgcn_image_store_mip_2d:
- case Intrinsic::amdgcn_image_store_mip_2darray:
- case Intrinsic::amdgcn_image_store_mip_3d:
- case Intrinsic::amdgcn_image_store_mip_cube: {
- if (!isa<FixedVectorType>(II.getArgOperand(0)->getType()))
- break;
-
- APInt DemandedElts =
- trimTrailingZerosInVector(IC, II.getArgOperand(0), &II);
-
- int DMaskIdx = getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID()) ? 1 : -1;
- if (simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, DMaskIdx,
- false)) {
- return IC.eraseInstFromFunction(II);
+ default: {
+ if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
+ AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
+ return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
}
-
- break;
- }
}
- if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
- AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
- return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
}
return std::nullopt;
}
/// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
///
-/// The result of simplifying amdgcn image and buffer store intrinsics is updating
-/// definitions of the intrinsics vector argument, not Uses of the result like
-/// image and buffer loads.
/// Note: This only supports non-TFE/LWE image intrinsic calls; those have
/// struct returns.
static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
IntrinsicInst &II,
APInt DemandedElts,
- int DMaskIdx, bool IsLoad) {
+ int DMaskIdx = -1) {
- auto *IIVTy = cast<FixedVectorType>(IsLoad ? II.getType()
- : II.getOperand(0)->getType());
+ auto *IIVTy = cast<FixedVectorType>(II.getType());
unsigned VWidth = IIVTy->getNumElements();
if (VWidth == 1)
return nullptr;
@@ -1249,13 +1180,13 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
DemandedElts &= (1 << llvm::popcount(DMaskVal)) - 1;
unsigned NewDMaskVal = 0;
- unsigned OrigLdStIdx = 0;
+ unsigned OrigLoadIdx = 0;
for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
const unsigned Bit = 1 << SrcIdx;
if (!!(DMaskVal & Bit)) {
- if (!!DemandedElts[OrigLdStIdx])
+ if (!!DemandedElts[OrigLoadIdx])
NewDMaskVal |= Bit;
- OrigLdStIdx++;
+ OrigLoadIdx++;
}
}
@@ -1283,45 +1214,29 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
(NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
OverloadTys[0] = NewTy;
- if (!IsLoad) {
- SmallVector<int, 8> EltMask;
- for (unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
- if (DemandedElts[OrigStoreIdx])
- EltMask.push_back(OrigStoreIdx);
-
- if (NewNumElts == 1)
- Args[0] = IC.Builder.CreateExtractElement(II.getOperand(0), EltMask[0]);
- else
- Args[0] = IC.Builder.CreateShuffleVector(II.getOperand(0), EltMask);
- }
-
Function *NewIntrin = Intrinsic::getDeclaration(
II.getModule(), II.getIntrinsicID(), OverloadTys);
CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args);
NewCall->takeName(&II);
NewCall->copyMetadata(II);
- if (IsLoad) {
- if (NewNumElts == 1) {
- return IC.Builder.CreateInsertElement(UndefValue::get(IIVTy), NewCall,
- DemandedElts.countr_zero());
- }
-
- SmallVector<int, 8> EltMask;
- unsigned NewLoadIdx = 0;
- for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
- if (!!DemandedElts[OrigLoadIdx])
- EltMask.push_back(NewLoadIdx++);
- else
- EltMask.push_back(NewNumElts);
- }
-
- auto *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
+ if (NewNumElts == 1) {
+ return IC.Builder.CreateInsertElement(UndefValue::get(IIVTy), NewCall,
+ DemandedElts.countr_zero());
+ }
- return Shuffle;
+ SmallVector<int, 8> EltMask;
+ unsigned NewLoadIdx = 0;
+ for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
+ if (!!DemandedElts[OrigLoadIdx])
+ EltMask.push_back(NewLoadIdx++);
+ else
+ EltMask.push_back(NewNumElts);
}
- return NewCall;
+ Value *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
+
+ return Shuffle;
}
std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index ae7d5414649a..34bd96c9bde0 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -66,7 +66,7 @@ define double @test_constant_fold_rcp_f64_43() nounwind {
define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp {
; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR13:[0-9]+]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR14:[0-9]+]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone
@@ -107,7 +107,7 @@ define double @test_constant_fold_sqrt_f64_undef() nounwind {
define half @test_constant_fold_sqrt_f16_0() nounwind {
; CHECK-LABEL: @test_constant_fold_sqrt_f16_0(
-; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.sqrt.f16(half 0xH0000) #[[ATTR14:[0-9]+]]
+; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.sqrt.f16(half 0xH0000) #[[ATTR15:[0-9]+]]
; CHECK-NEXT: ret half [[VAL]]
;
%val = call half @llvm.amdgcn.sqrt.f16(half 0.0) nounwind readnone
@@ -116,7 +116,7 @@ define half @test_constant_fold_sqrt_f16_0() nounwind {
define float @test_constant_fold_sqrt_f32_0() nounwind {
; CHECK-LABEL: @test_constant_fold_sqrt_f32_0(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR14]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR15]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.sqrt.f32(float 0.0) nounwind readnone
@@ -125,7 +125,7 @@ define float @test_constant_fold_sqrt_f32_0() nounwind {
define double @test_constant_fold_sqrt_f64_0() nounwind {
; CHECK-LABEL: @test_constant_fold_sqrt_f64_0(
-; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0.000000e+00) #[[ATTR14]]
+; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0.000000e+00) #[[ATTR15]]
; CHECK-NEXT: ret double [[VAL]]
;
%val = call double @llvm.amdgcn.sqrt.f64(double 0.0) nounwind readnone
@@ -134,7 +134,7 @@ define double @test_constant_fold_sqrt_f64_0() nounwind {
define half @test_constant_fold_sqrt_f16_neg0() nounwind {
; CHECK-LABEL: @test_constant_fold_sqrt_f16_neg0(
-; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.sqrt.f16(half 0xH8000) #[[ATTR14]]
+; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.sqrt.f16(half 0xH8000) #[[ATTR15]]
; CHECK-NEXT: ret half [[VAL]]
;
%val = call half @llvm.amdgcn.sqrt.f16(half -0.0) nounwind readnone
@@ -143,7 +143,7 @@ define half @test_constant_fold_sqrt_f16_neg0() nounwind {
define float @test_constant_fold_sqrt_f32_neg0() nounwind {
; CHECK-LABEL: @test_constant_fold_sqrt_f32_neg0(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float -0.000000e+00) #[[ATTR14]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float -0.000000e+00) #[[ATTR15]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.sqrt.f32(float -0.0) nounwind readnone
@@ -152,7 +152,7 @@ define float @test_constant_fold_sqrt_f32_neg0() nounwind {
define double @test_constant_fold_sqrt_f64_neg0() nounwind {
; CHECK-LABEL: @test_constant_fold_sqrt_f64_neg0(
-; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -0.000000e+00) #[[ATTR14]]
+; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -0.000000e+00) #[[ATTR15]]
; CHECK-NEXT: ret double [[VAL]]
;
%val = call double @llvm.amdgcn.sqrt.f64(double -0.0) nounwind readnone
@@ -644,7 +644,7 @@ define i1 @test_class_isnan_f32(float %x) nounwind {
define i1 @test_class_isnan_f32_strict(float %x) nounwind {
; CHECK-LABEL: @test_class_isnan_f32_strict(
-; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 3) #[[ATTR15:[0-9]+]]
+; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 3) #[[ATTR16:[0-9]+]]
; CHECK-NEXT: ret i1 [[VAL]]
;
%val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3) strictfp
@@ -662,7 +662,7 @@ define i1 @test_class_is_p0_n0_f32(float %x) nounwind {
define i1 @test_class_is_p0_n0_f32_strict(float %x) nounwind {
; CHECK-LABEL: @test_class_is_p0_n0_f32_strict(
-; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 96) #[[ATTR15]]
+; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 96) #[[ATTR16]]
; CHECK-NEXT: ret i1 [[VAL]]
;
%val = call i1 @llvm.amdgcn.class.f32(float %x, i32 96) strictfp
@@ -1275,8 +1275,8 @@ define i32 @ubfe_offset_0_width_0(i32 %src) {
define i32 @ubfe_offset_0_width_3(i32 %src) {
; CHECK-LABEL: @ubfe_offset_0_width_3(
-; CHECK-NEXT: [[BFE:%.*]] = and i32 [[SRC:%.*]], 7
-; CHECK-NEXT: ret i32 [[BFE]]
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[SRC:%.*]], 7
+; CHECK-NEXT: ret i32 [[TMP1]]
;
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 3)
ret i32 %bfe
@@ -1793,7 +1793,7 @@ define i64 @icmp_constant_inputs_false() {
define i64 @icmp_constant_inputs_true() {
; CHECK-LABEL: @icmp_constant_inputs_true(
-; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR16:[0-9]+]]
+; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR17:[0-9]+]]
; CHECK-NEXT: ret i64 [[RESULT]]
;
%result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34)
@@ -2500,7 +2500,7 @@ define i64 @fcmp_constant_inputs_false() {
define i64 @fcmp_constant_inputs_true() {
; CHECK-LABEL: @fcmp_constant_inputs_true(
-; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR16]]
+; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR17]]
; CHECK-NEXT: ret i64 [[RESULT]]
;
%result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4)
@@ -2542,7 +2542,7 @@ define i64 @ballot_zero_64() {
define i64 @ballot_one_64() {
; CHECK-LABEL: @ballot_one_64(
-; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR16]]
+; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR17]]
; CHECK-NEXT: ret i64 [[B]]
;
%b = call i64 @llvm.amdgcn.ballot.i64(i1 1)
@@ -2568,7 +2568,7 @@ define i32 @ballot_zero_32() {
define i32 @ballot_one_32() {
; CHECK-LABEL: @ballot_one_32(
-; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.read_register.i32(metadata [[META1:![0-9]+]]) #[[ATTR16]]
+; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.read_register.i32(metadata [[META1:![0-9]+]]) #[[ATTR17]]
; CHECK-NEXT: ret i32 [[B]]
;
%b = call i32 @llvm.amdgcn.ballot.i32(i1 1)
@@ -5586,7 +5586,7 @@ define double @trig_preop_constfold() {
define double @trig_preop_constfold_strictfp() {
; CHECK-LABEL: @trig_preop_constfold_strictfp(
-; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) #[[ATTR15]]
+; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) #[[ATTR16]]
; CHECK-NEXT: ret double [[VAL]]
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) strictfp
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-simplify-image-buffer-stores.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-simplify-image-buffer-stores.ll
deleted file mode 100644
index 7f88108c1263..000000000000
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-simplify-image-buffer-stores.ll
+++ /dev/null
@@ -1,104 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mcpu=gfx900 -S -passes=instcombine -mtriple=amdgcn-amd-amdhsa %s | FileCheck -check-prefixes=GCN %s
-; RUN: opt -mcpu=gfx1010 -S -passes=instcombine -mtriple=amdgcn-amd-amdhsa %s | FileCheck -check-prefixes=GCN %s
-; RUN: opt -mcpu=gfx1100 -S -passes=instcombine -mtriple=amdgcn-amd-amdhsa %s | FileCheck -check-prefixes=GCN %s
-
-define amdgpu_ps void @image_store_1d_store_insert_zeros_at_end(<8 x i32> inreg %rsrc, float %vdata1, i32 %s) #0 {
-; GCN-LABEL: @image_store_1d_store_insert_zeros_at_end(
-; GCN-NEXT: call void @llvm.amdgcn.image.store.1d.f32.i32(float [[VDATA1:%.*]], i32 1, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
-; GCN-NEXT: ret void
-;
- %newvdata1 = insertelement <4 x float> undef, float %vdata1, i32 0
- %newvdata2 = insertelement <4 x float> %newvdata1, float 0.0, i32 1
- %newvdata3 = insertelement <4 x float> %newvdata2, float 0.0, i32 2
- %newvdata4 = insertelement <4 x float> %newvdata3, float 0.0, i32 3
- call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %newvdata4, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
- ret void
-}
-
-define amdgpu_ps void @image_store_mip_1d_store_insert_zeros_at_end(<8 x i32> inreg %rsrc, float %vdata1, float %vdata2, i32 %s, i32 %mip) #0 {
-; GCN-LABEL: @image_store_mip_1d_store_insert_zeros_at_end(
-; GCN-NEXT: [[TMP1:%.*]] = insertelement <3 x float> <float 0.000000e+00, float poison, float poison>, float [[VDATA1:%.*]], i64 1
-; GCN-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[TMP1]], float [[VDATA2:%.*]], i64 2
-; GCN-NEXT: call void @llvm.amdgcn.image.store.1d.v3f32.i32(<3 x float> [[TMP2]], i32 7, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
-; GCN-NEXT: ret void
-;
- %newvdata1 = insertelement <4 x float> undef, float 0.0, i32 0
- %newvdata2 = insertelement <4 x float> %newvdata1, float %vdata1, i32 1
- %newvdata3 = insertelement <4 x float> %newvdata2, float %vdata2, i32 2
- %newvdata4 = insertelement <4 x float> %newvdata3, float 0.0, i32 3
- call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %newvdata4, i32 7, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
- ret void
-}
-
-define amdgpu_ps void @buffer_store_insert_zeros_at_end(<4 x i32> inreg %a, float %vdata1, i32 %b) {
-; GCN-LABEL: @buffer_store_insert_zeros_at_end(
-; GCN-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float [[VDATA1:%.*]], i64 0
-; GCN-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer
-; GCN-NEXT: call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> [[TMP2]], <4 x i32> [[A:%.*]], i32 [[B:%.*]], i32 0, i1 false, i1 false)
-; GCN-NEXT: ret void
-;
- %newvdata1 = insertelement <4 x float> undef, float %vdata1, i32 0
- %newvdata2 = insertelement <4 x float> %newvdata1, float %vdata1, i32 1
- %newvdata3 = insertelement <4 x float> %newvdata2, float 0.0, i32 2
- %newvdata4 = insertelement <4 x float> %newvdata3, float 0.0, i32 3
- call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %newvdata4, <4 x i32> %a, i32 %b, i32 0, i1 0, i1 0)
- ret void
-}
-
-define amdgpu_ps void @struct_buffer_store_insert_zeros(<4 x i32> inreg %a, float %vdata1, i32 %b) {
-; GCN-LABEL: @struct_buffer_store_insert_zeros(
-; GCN-NEXT: [[TMP1:%.*]] = insertelement <3 x float> <float poison, float 0.000000e+00, float poison>, float [[VDATA1:%.*]], i64 0
-; GCN-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[TMP1]], float [[VDATA1]], i64 2
-; GCN-NEXT: call void @llvm.amdgcn.struct.buffer.store.v3f32(<3 x float> [[TMP2]], <4 x i32> [[A:%.*]], i32 [[B:%.*]], i32 0, i32 42, i32 0)
-; GCN-NEXT: ret void
-;
- %newvdata1 = insertelement <4 x float> undef, float %vdata1, i32 0
- %newvdata2 = insertelement <4 x float> %newvdata1, float 0.0, i32 1
- %newvdata3 = insertelement <4 x float> %newvdata2, float %vdata1, i32 2
- %newvdata4 = insertelement <4 x float> %newvdata3, float 0.0, i32 3
- call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %newvdata4, <4 x i32> %a, i32 %b, i32 0, i32 42, i32 0)
- ret void
-}
-
-define amdgpu_ps void @struct_tbuffer_store_insert_zeros_at_beginning(<4 x i32> inreg %a, float %vdata1, i32 %b) {
-; GCN-LABEL: @struct_tbuffer_store_insert_zeros_at_beginning(
-; GCN-NEXT: [[NEWVDATA4:%.*]] = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison>, float [[VDATA1:%.*]], i64 3
-; GCN-NEXT: call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> [[NEWVDATA4]], <4 x i32> [[A:%.*]], i32 [[B:%.*]], i32 0, i32 42, i32 0, i32 15)
-; GCN-NEXT: ret void
-;
- %newvdata1 = insertelement <4 x float> undef, float 0.0, i32 0
- %newvdata2 = insertelement <4 x float> %newvdata1, float 0.0, i32 1
- %newvdata3 = insertelement <4 x float> %newvdata2, float 0.0, i32 2
- %newvdata4 = insertelement <4 x float> %newvdata3, float %vdata1, i32 3
- call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %newvdata4, <4 x i32> %a, i32 %b, i32 0, i32 42, i32 0, i32 15)
- ret void
-}
-
-declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #2
-declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #2
-declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
-declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #2
-declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #2
-declare void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind writeonly }
-attributes #2 = { nounwind }
More information about the llvm-commits
mailing list