[llvm] [VPlan] Preserve IR flags when widening casts (PR #115373)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 7 18:02:48 PST 2024
https://github.com/goldsteinn updated https://github.com/llvm/llvm-project/pull/115373
>From 79de03674f5424eac1a71ad7846d0b21a251608a Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Thu, 7 Nov 2024 14:35:41 -0600
Subject: [PATCH 1/3] [LV][X86] Add test for preserving flags when widening
casts; NFC
---
.../LoopVectorize/X86/uitofp-preserve-nneg.ll | 78 +++++++++++++++++++
1 file changed, 78 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll b/llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll
new file mode 100644
index 00000000000000..d28aeee6e2a817
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes="default<O3>,loop-vectorize" -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local void @uitofp_preserve_nneg(ptr nocapture noundef writeonly %result, i32 noundef %size, float noundef %y, float noundef %delta) {
+; CHECK-LABEL: @uitofp_preserve_nneg(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[SIZE]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SIZE]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER4:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[DELTA:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = uitofp <4 x i32> [[VEC_IND]] to <4 x float>
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[TMP0]], <4 x float> [[BROADCAST_SPLAT3]])
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[RESULT:%.*]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[TMP2]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER4]]
+; CHECK: for.body.preheader4:
+; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER4]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT: [[CONV:%.*]] = uitofp nneg i32 [[TMP4]] to float
+; CHECK-NEXT: [[TMP5:%.*]] = tail call float @llvm.fmuladd.f32(float [[DELTA]], float [[CONV]], float [[Y]])
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[RESULT]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store float [[TMP5]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp4 = icmp sgt i32 %size, 0
+ br i1 %cmp4, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %t.05 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %conv = uitofp nneg i32 %t.05 to float
+ %0 = tail call float @llvm.fmuladd.f32(float %delta, float %conv, float %y)
+ %idxprom = zext nneg i32 %t.05 to i64
+ %arrayidx = getelementptr inbounds float, ptr %result, i64 %idxprom
+ store float %0, ptr %arrayidx, align 4
+ %inc = add nuw nsw i32 %t.05, 1
+ %cmp = icmp slt i32 %inc, %size
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
>From 6eee306821c78fd3282754b5f36d5c155af55f38 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Thu, 7 Nov 2024 14:36:36 -0600
Subject: [PATCH 2/3] [VPlan] Preserve IR flags when widening casts
We have `nneg` for both `sext` and `uitofp`.
Fixes #114856
---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 ++
.../CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll | 2 ++
.../CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll | 1 +
.../AMDGPU/machine-function-info-long-branch-reg-debug.ll | 1 +
.../MIR/AMDGPU/machine-function-info-long-branch-reg.ll | 1 +
.../CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir | 8 ++++++++
llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll | 6 +++++-
.../Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll | 2 +-
.../mir/preserve-machine-function-info-amdgpu.mir | 2 ++
9 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 6254ea15191819..ef2ca9af7268d1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1484,6 +1484,8 @@ void VPWidenCastRecipe::execute(VPTransformState &State) {
Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
State.set(this, Cast);
State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
+ if (auto *CastOp = dyn_cast<Instruction>(Cast))
+ setFlags(CastOp);
}
InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
index 0f7a5f8e0941ad..0ae51c602a8d98 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
@@ -38,6 +38,7 @@
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 5
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: scavengeFI: '%stack.0'
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
@@ -304,6 +305,7 @@
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 5
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: scavengeFI: '%stack.0'
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
index 7759501ea42268..07b933cdb6583c 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
@@ -38,6 +38,7 @@
; AFTER-PEI-NEXT: fp64-fp16-output-denormals: true
; AFTER-PEI-NEXT: highBitsOf32BitAddress: 0
; AFTER-PEI-NEXT: occupancy: 5
+; AFTER-PEI-NEXT: numPhysicalVGPRSpillLanes: 0
; AFTER-PEI-NEXT: scavengeFI: '%stack.3'
; AFTER-PEI-NEXT: vgprForAGPRCopy: ''
; AFTER-PEI-NEXT: sgprForEXECCopy: ''
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
index 4545c8bbeb3e6c..ea61ec9cb512ca 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
@@ -39,6 +39,7 @@
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: BitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 8
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: '$sgpr2_sgpr3'
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
index 8215ba834170f2..0a689df49258c1 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
@@ -39,6 +39,7 @@
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: BitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 8
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: '$sgpr2_sgpr3'
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
index 51795a4fea515e..b430488987e03c 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
@@ -48,6 +48,7 @@
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 8
+# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
# FULL-NEXT: vgprForAGPRCopy: ''
# FULL-NEXT: sgprForEXECCopy: ''
# FULL-NEXT: longBranchReservedReg: ''
@@ -81,6 +82,7 @@
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 8
+# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
# SIMPLE-NEXT: body:
name: kernel0
machineFunctionInfo:
@@ -152,6 +154,7 @@ body: |
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 8
+# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
# FULL-NEXT: vgprForAGPRCopy: ''
# FULL-NEXT: sgprForEXECCopy: ''
# FULL-NEXT: longBranchReservedReg: ''
@@ -174,6 +177,7 @@ body: |
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 8
+# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
# SIMPLE-NEXT: body:
name: no_mfi
@@ -227,6 +231,7 @@ body: |
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 8
+# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
# FULL-NEXT: vgprForAGPRCopy: ''
# FULL-NEXT: sgprForEXECCopy: ''
# FULL-NEXT: longBranchReservedReg: ''
@@ -249,6 +254,7 @@ body: |
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 8
+# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
# SIMPLE-NEXT: body:
name: empty_mfi
@@ -303,6 +309,7 @@ body: |
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 8
+# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
# FULL-NEXT: vgprForAGPRCopy: ''
# FULL-NEXT: sgprForEXECCopy: ''
# FULL-NEXT: longBranchReservedReg: ''
@@ -326,6 +333,7 @@ body: |
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 8
+# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
# SIMPLE-NEXT: body:
name: empty_mfi_entry_func
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
index b69ede6f24f0f1..3fa4977a98e734 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -48,10 +48,11 @@
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 8
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: ''
-; CHECK-NEXT: hasInitWholeWave: false
+; CHECK-NEXT: hasInitWholeWave: false
; CHECK-NEXT: body:
define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
%gep = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %arg0
@@ -94,6 +95,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 10
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: ''
@@ -164,6 +166,7 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 {
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 8
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: ''
@@ -216,6 +219,7 @@ define void @function() {
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 8
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: ''
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll b/llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll
index d28aeee6e2a817..b093f35159fc71 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll
@@ -23,7 +23,7 @@ define dso_local void @uitofp_preserve_nneg(ptr nocapture noundef writeonly %res
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = uitofp <4 x i32> [[VEC_IND]] to <4 x float>
+; CHECK-NEXT: [[TMP0:%.*]] = uitofp nneg <4 x i32> [[VEC_IND]] to <4 x float>
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[TMP0]], <4 x float> [[BROADCAST_SPLAT3]])
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[RESULT:%.*]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[TMP2]], align 4
diff --git a/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir b/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
index 73e75fc0f7ef5b..303ebaabd34410 100644
--- a/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
+++ b/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
@@ -52,10 +52,12 @@
# RESULT-NEXT: fp64-fp16-output-denormals: false
# RESULT-NEXT: highBitsOf32BitAddress: 4276993775
# RESULT-NEXT: occupancy: 8
+# RESULT-NEXT: numPhysicalVGPRSpillLanes: 0
# RESULT-NEXT: wwmReservedRegs:
# RESULT-NEXT: - '$vgpr2'
# RESULT-NEXT: - '$vgpr3'
# RESULT-NEXT: vgprForAGPRCopy: '$vgpr33'
+# RESULT-NEXT: body:
# RESULT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51
# RESULT: S_NOP 0, implicit $vgpr33
>From b21f82bc0a7e38d24e004a9725845b63541e21ba Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Thu, 7 Nov 2024 17:48:34 -0600
Subject: [PATCH 3/3] Remove unrelated tests
---
.../CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll | 2 --
.../CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll | 1 -
.../AMDGPU/machine-function-info-long-branch-reg-debug.ll | 1 -
.../MIR/AMDGPU/machine-function-info-long-branch-reg.ll | 1 -
.../CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir | 8 --------
llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll | 6 +-----
.../mir/preserve-machine-function-info-amdgpu.mir | 2 --
7 files changed, 1 insertion(+), 20 deletions(-)
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
index 0ae51c602a8d98..0f7a5f8e0941ad 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
@@ -38,7 +38,6 @@
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 5
-; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: scavengeFI: '%stack.0'
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
@@ -305,7 +304,6 @@
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 5
-; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: scavengeFI: '%stack.0'
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
index 07b933cdb6583c..7759501ea42268 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
@@ -38,7 +38,6 @@
; AFTER-PEI-NEXT: fp64-fp16-output-denormals: true
; AFTER-PEI-NEXT: highBitsOf32BitAddress: 0
; AFTER-PEI-NEXT: occupancy: 5
-; AFTER-PEI-NEXT: numPhysicalVGPRSpillLanes: 0
; AFTER-PEI-NEXT: scavengeFI: '%stack.3'
; AFTER-PEI-NEXT: vgprForAGPRCopy: ''
; AFTER-PEI-NEXT: sgprForEXECCopy: ''
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
index ea61ec9cb512ca..4545c8bbeb3e6c 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
@@ -39,7 +39,6 @@
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: BitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 8
-; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: '$sgpr2_sgpr3'
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
index 0a689df49258c1..8215ba834170f2 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
@@ -39,7 +39,6 @@
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: BitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 8
-; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: '$sgpr2_sgpr3'
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
index b430488987e03c..51795a4fea515e 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
@@ -48,7 +48,6 @@
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 8
-# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
# FULL-NEXT: vgprForAGPRCopy: ''
# FULL-NEXT: sgprForEXECCopy: ''
# FULL-NEXT: longBranchReservedReg: ''
@@ -82,7 +81,6 @@
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 8
-# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
# SIMPLE-NEXT: body:
name: kernel0
machineFunctionInfo:
@@ -154,7 +152,6 @@ body: |
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 8
-# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
# FULL-NEXT: vgprForAGPRCopy: ''
# FULL-NEXT: sgprForEXECCopy: ''
# FULL-NEXT: longBranchReservedReg: ''
@@ -177,7 +174,6 @@ body: |
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 8
-# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
# SIMPLE-NEXT: body:
name: no_mfi
@@ -231,7 +227,6 @@ body: |
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 8
-# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
# FULL-NEXT: vgprForAGPRCopy: ''
# FULL-NEXT: sgprForEXECCopy: ''
# FULL-NEXT: longBranchReservedReg: ''
@@ -254,7 +249,6 @@ body: |
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 8
-# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
# SIMPLE-NEXT: body:
name: empty_mfi
@@ -309,7 +303,6 @@ body: |
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 8
-# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
# FULL-NEXT: vgprForAGPRCopy: ''
# FULL-NEXT: sgprForEXECCopy: ''
# FULL-NEXT: longBranchReservedReg: ''
@@ -333,7 +326,6 @@ body: |
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 8
-# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
# SIMPLE-NEXT: body:
name: empty_mfi_entry_func
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
index 3fa4977a98e734..b69ede6f24f0f1 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -48,11 +48,10 @@
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 8
-; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: ''
-; CHECK-NEXT: hasInitWholeWave: false
+; CHECK-NEXT: hasInitWholeWave: false
; CHECK-NEXT: body:
define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
%gep = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %arg0
@@ -95,7 +94,6 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 10
-; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: ''
@@ -166,7 +164,6 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 {
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 8
-; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: ''
@@ -219,7 +216,6 @@ define void @function() {
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 8
-; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: ''
diff --git a/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir b/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
index 303ebaabd34410..73e75fc0f7ef5b 100644
--- a/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
+++ b/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
@@ -52,12 +52,10 @@
# RESULT-NEXT: fp64-fp16-output-denormals: false
# RESULT-NEXT: highBitsOf32BitAddress: 4276993775
# RESULT-NEXT: occupancy: 8
-# RESULT-NEXT: numPhysicalVGPRSpillLanes: 0
# RESULT-NEXT: wwmReservedRegs:
# RESULT-NEXT: - '$vgpr2'
# RESULT-NEXT: - '$vgpr3'
# RESULT-NEXT: vgprForAGPRCopy: '$vgpr33'
-# RESULT-NEXT: body:
# RESULT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51
# RESULT: S_NOP 0, implicit $vgpr33
More information about the llvm-commits
mailing list