[llvm] [VPlan] Preserve IR flags when widening casts (PR #115373)

Thu Nov 7 18:02:48 PST 2024

https://github.com/goldsteinn updated https://github.com/llvm/llvm-project/pull/115373

>From 79de03674f5424eac1a71ad7846d0b21a251608a Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Thu, 7 Nov 2024 14:35:41 -0600
Subject: [PATCH 1/3] [LV][X86] Add test for preserving flags when widening
 casts; NFC

---
 .../LoopVectorize/X86/uitofp-preserve-nneg.ll | 78 +++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll

diff --git a/llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll b/llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll
new file mode 100644
index 00000000000000..d28aeee6e2a817
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes="default<O3>,loop-vectorize" -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local void @uitofp_preserve_nneg(ptr nocapture noundef writeonly %result, i32 noundef %size, float noundef %y, float noundef %delta) {
+; CHECK-LABEL: @uitofp_preserve_nneg(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[SIZE]] to i64
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SIZE]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER4:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[DELTA:%.*]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = uitofp <4 x i32> [[VEC_IND]] to <4 x float>
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[TMP0]], <4 x float> [[BROADCAST_SPLAT3]])
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[RESULT:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER4]]
+; CHECK:       for.body.preheader4:
+; CHECK-NEXT:    [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER4]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[CONV:%.*]] = uitofp nneg i32 [[TMP4]] to float
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call float @llvm.fmuladd.f32(float [[DELTA]], float [[CONV]], float [[Y]])
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[RESULT]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[TMP5]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp4 = icmp sgt i32 %size, 0
+  br i1 %cmp4, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %t.05 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %conv = uitofp nneg i32 %t.05 to float
+  %0 = tail call float @llvm.fmuladd.f32(float %delta, float %conv, float %y)
+  %idxprom = zext nneg i32 %t.05 to i64
+  %arrayidx = getelementptr inbounds float, ptr %result, i64 %idxprom
+  store float %0, ptr %arrayidx, align 4
+  %inc = add nuw nsw i32 %t.05, 1
+  %cmp = icmp slt i32 %inc, %size
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}

>From 6eee306821c78fd3282754b5f36d5c155af55f38 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Thu, 7 Nov 2024 14:36:36 -0600
Subject: [PATCH 2/3] [VPlan] Preserve IR flags when widening casts

We have `nneg` for both `sext` and `uitofp`.

Fixes #114856
---
 llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp            | 2 ++
 .../CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll   | 2 ++
 .../CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll | 1 +
 .../AMDGPU/machine-function-info-long-branch-reg-debug.ll | 1 +
 .../MIR/AMDGPU/machine-function-info-long-branch-reg.ll   | 1 +
 .../CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir    | 8 ++++++++
 llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll     | 6 +++++-
 .../Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll  | 2 +-
 .../mir/preserve-machine-function-info-amdgpu.mir         | 2 ++
 9 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 6254ea15191819..ef2ca9af7268d1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1484,6 +1484,8 @@ void VPWidenCastRecipe::execute(VPTransformState &State) {
   Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
   State.set(this, Cast);
   State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
+  if (auto *CastOp = dyn_cast<Instruction>(Cast))
+    setFlags(CastOp);
 }
 
 InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
index 0f7a5f8e0941ad..0ae51c602a8d98 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
@@ -38,6 +38,7 @@
 ; CHECK-NEXT:     fp64-fp16-output-denormals: true
 ; CHECK-NEXT:   highBitsOf32BitAddress: 0
 ; CHECK-NEXT:   occupancy:       5
+; CHECK-NEXT:   numPhysicalVGPRSpillLanes: 0
 ; CHECK-NEXT:   scavengeFI:      '%stack.0'
 ; CHECK-NEXT:   vgprForAGPRCopy: ''
 ; CHECK-NEXT:   sgprForEXECCopy: '$sgpr100_sgpr101'
@@ -304,6 +305,7 @@
 ; CHECK-NEXT:     fp64-fp16-output-denormals: true
 ; CHECK-NEXT:   highBitsOf32BitAddress: 0
 ; CHECK-NEXT:   occupancy:       5
+; CHECK-NEXT:   numPhysicalVGPRSpillLanes: 0
 ; CHECK-NEXT:   scavengeFI:      '%stack.0'
 ; CHECK-NEXT:   vgprForAGPRCopy: ''
 ; CHECK-NEXT:   sgprForEXECCopy: '$sgpr100_sgpr101'
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
index 7759501ea42268..07b933cdb6583c 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
@@ -38,6 +38,7 @@
 ; AFTER-PEI-NEXT:   fp64-fp16-output-denormals: true
 ; AFTER-PEI-NEXT: highBitsOf32BitAddress: 0
 ; AFTER-PEI-NEXT: occupancy: 5
+; AFTER-PEI-NEXT: numPhysicalVGPRSpillLanes: 0
 ; AFTER-PEI-NEXT: scavengeFI: '%stack.3'
 ; AFTER-PEI-NEXT: vgprForAGPRCopy: ''
 ; AFTER-PEI-NEXT: sgprForEXECCopy: ''
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
index 4545c8bbeb3e6c..ea61ec9cb512ca 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
@@ -39,6 +39,7 @@
 ; CHECK-NEXT: fp64-fp16-output-denormals: true
 ; CHECK-NEXT: BitsOf32BitAddress: 0
 ; CHECK-NEXT: occupancy:       8
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
 ; CHECK-NEXT: vgprForAGPRCopy: ''
 ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
 ; CHECK-NEXT: longBranchReservedReg: '$sgpr2_sgpr3'
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
index 8215ba834170f2..0a689df49258c1 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
@@ -39,6 +39,7 @@
 ; CHECK-NEXT: fp64-fp16-output-denormals: true
 ; CHECK-NEXT: BitsOf32BitAddress: 0
 ; CHECK-NEXT: occupancy:       8
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
 ; CHECK-NEXT: vgprForAGPRCopy: ''
 ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
 ; CHECK-NEXT: longBranchReservedReg: '$sgpr2_sgpr3'
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
index 51795a4fea515e..b430488987e03c 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
@@ -48,6 +48,7 @@
 # FULL-NEXT: fp64-fp16-output-denormals: true
 # FULL-NEXT:  highBitsOf32BitAddress: 0
 # FULL-NEXT:  occupancy: 8
+# FULL-NEXT:  numPhysicalVGPRSpillLanes: 0
 # FULL-NEXT:  vgprForAGPRCopy: ''
 # FULL-NEXT:  sgprForEXECCopy: ''
 # FULL-NEXT:  longBranchReservedReg: ''
@@ -81,6 +82,7 @@
 # SIMPLE-NEXT: workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
 # SIMPLE-NEXT: workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
 # SIMPLE-NEXT: occupancy: 8
+# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
 # SIMPLE-NEXT: body:
 name: kernel0
 machineFunctionInfo:
@@ -152,6 +154,7 @@ body:             |
 # FULL-NEXT: fp64-fp16-output-denormals: true
 # FULL-NEXT:  highBitsOf32BitAddress: 0
 # FULL-NEXT:  occupancy: 8
+# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
 # FULL-NEXT: vgprForAGPRCopy: ''
 # FULL-NEXT: sgprForEXECCopy: ''
 # FULL-NEXT: longBranchReservedReg: ''
@@ -174,6 +177,7 @@ body:             |
 # SIMPLE-NEXT: workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
 # SIMPLE-NEXT: workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
 # SIMPLE-NEXT:  occupancy: 8
+# SIMPLE-NEXT:  numPhysicalVGPRSpillLanes: 0
 # SIMPLE-NEXT: body:
 
 name: no_mfi
@@ -227,6 +231,7 @@ body:             |
 # FULL-NEXT: fp64-fp16-output-denormals: true
 # FULL-NEXT:  highBitsOf32BitAddress: 0
 # FULL-NEXT:  occupancy: 8
+# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
 # FULL-NEXT: vgprForAGPRCopy: ''
 # FULL-NEXT: sgprForEXECCopy: ''
 # FULL-NEXT: longBranchReservedReg: ''
@@ -249,6 +254,7 @@ body:             |
 # SIMPLE-NEXT: workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
 # SIMPLE-NEXT: workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
 # SIMPLE-NEXT:  occupancy: 8
+# SIMPLE-NEXT:  numPhysicalVGPRSpillLanes: 0
 # SIMPLE-NEXT: body:
 
 name: empty_mfi
@@ -303,6 +309,7 @@ body:             |
 # FULL-NEXT: fp64-fp16-output-denormals: true
 # FULL-NEXT:  highBitsOf32BitAddress: 0
 # FULL-NEXT:  occupancy: 8
+# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
 # FULL-NEXT: vgprForAGPRCopy: ''
 # FULL-NEXT: sgprForEXECCopy: ''
 # FULL-NEXT: longBranchReservedReg: ''
@@ -326,6 +333,7 @@ body:             |
 # SIMPLE-NEXT: workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
 # SIMPLE-NEXT: workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
 # SIMPLE-NEXT: occupancy: 8
+# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
 # SIMPLE-NEXT: body:
 
 name: empty_mfi_entry_func
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
index b69ede6f24f0f1..3fa4977a98e734 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -48,10 +48,11 @@
 ; CHECK-NEXT: fp64-fp16-output-denormals: true
 ; CHECK-NEXT: highBitsOf32BitAddress: 0
 ; CHECK-NEXT: occupancy: 8
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
 ; CHECK-NEXT: vgprForAGPRCopy: ''
 ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
 ; CHECK-NEXT: longBranchReservedReg: ''
-; CHECK-NEXT: hasInitWholeWave: false
+; CHECK-NEXT: hasInitWholeWave: false    
 ; CHECK-NEXT: body:
 define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
   %gep = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %arg0
@@ -94,6 +95,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
 ; CHECK-NEXT: fp64-fp16-output-denormals: true
 ; CHECK-NEXT: highBitsOf32BitAddress: 0
 ; CHECK-NEXT: occupancy: 10
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
 ; CHECK-NEXT: vgprForAGPRCopy: ''
 ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
 ; CHECK-NEXT: longBranchReservedReg: ''
@@ -164,6 +166,7 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 {
 ; CHECK-NEXT: fp64-fp16-output-denormals: true
 ; CHECK-NEXT: highBitsOf32BitAddress: 0
 ; CHECK-NEXT: occupancy: 8
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
 ; CHECK-NEXT: vgprForAGPRCopy: ''
 ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
 ; CHECK-NEXT: longBranchReservedReg: ''
@@ -216,6 +219,7 @@ define void @function() {
 ; CHECK-NEXT: fp64-fp16-output-denormals: true
 ; CHECK-NEXT: highBitsOf32BitAddress: 0
 ; CHECK-NEXT: occupancy: 8
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
 ; CHECK-NEXT: vgprForAGPRCopy: ''
 ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
 ; CHECK-NEXT: longBranchReservedReg: ''
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll b/llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll
index d28aeee6e2a817..b093f35159fc71 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll
@@ -23,7 +23,7 @@ define dso_local void @uitofp_preserve_nneg(ptr nocapture noundef writeonly %res
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = uitofp <4 x i32> [[VEC_IND]] to <4 x float>
+; CHECK-NEXT:    [[TMP0:%.*]] = uitofp nneg <4 x i32> [[VEC_IND]] to <4 x float>
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[TMP0]], <4 x float> [[BROADCAST_SPLAT3]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[RESULT:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[TMP2]], align 4
diff --git a/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir b/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
index 73e75fc0f7ef5b..303ebaabd34410 100644
--- a/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
+++ b/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
@@ -52,10 +52,12 @@
 # RESULT-NEXT: fp64-fp16-output-denormals: false
 # RESULT-NEXT: highBitsOf32BitAddress: 4276993775
 # RESULT-NEXT: occupancy:       8
+# RESULT-NEXT: numPhysicalVGPRSpillLanes: 0
 # RESULT-NEXT: wwmReservedRegs:
 # RESULT-NEXT: - '$vgpr2'
 # RESULT-NEXT: - '$vgpr3'
 # RESULT-NEXT: vgprForAGPRCopy: '$vgpr33'
+# RESULT-NEXT: body:
 
 # RESULT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51
 # RESULT: S_NOP 0, implicit $vgpr33

>From b21f82bc0a7e38d24e004a9725845b63541e21ba Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Thu, 7 Nov 2024 17:48:34 -0600
Subject: [PATCH 3/3] Remove unrelated tests

---
 .../CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll   | 2 --
 .../CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll | 1 -
 .../AMDGPU/machine-function-info-long-branch-reg-debug.ll | 1 -
 .../MIR/AMDGPU/machine-function-info-long-branch-reg.ll   | 1 -
 .../CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir    | 8 --------
 llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll     | 6 +-----
 .../mir/preserve-machine-function-info-amdgpu.mir         | 2 --
 7 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
index 0ae51c602a8d98..0f7a5f8e0941ad 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
@@ -38,7 +38,6 @@
 ; CHECK-NEXT:     fp64-fp16-output-denormals: true
 ; CHECK-NEXT:   highBitsOf32BitAddress: 0
 ; CHECK-NEXT:   occupancy:       5
-; CHECK-NEXT:   numPhysicalVGPRSpillLanes: 0
 ; CHECK-NEXT:   scavengeFI:      '%stack.0'
 ; CHECK-NEXT:   vgprForAGPRCopy: ''
 ; CHECK-NEXT:   sgprForEXECCopy: '$sgpr100_sgpr101'
@@ -305,7 +304,6 @@
 ; CHECK-NEXT:     fp64-fp16-output-denormals: true
 ; CHECK-NEXT:   highBitsOf32BitAddress: 0
 ; CHECK-NEXT:   occupancy:       5
-; CHECK-NEXT:   numPhysicalVGPRSpillLanes: 0
 ; CHECK-NEXT:   scavengeFI:      '%stack.0'
 ; CHECK-NEXT:   vgprForAGPRCopy: ''
 ; CHECK-NEXT:   sgprForEXECCopy: '$sgpr100_sgpr101'
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
index 07b933cdb6583c..7759501ea42268 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
@@ -38,7 +38,6 @@
 ; AFTER-PEI-NEXT:   fp64-fp16-output-denormals: true
 ; AFTER-PEI-NEXT: highBitsOf32BitAddress: 0
 ; AFTER-PEI-NEXT: occupancy: 5
-; AFTER-PEI-NEXT: numPhysicalVGPRSpillLanes: 0
 ; AFTER-PEI-NEXT: scavengeFI: '%stack.3'
 ; AFTER-PEI-NEXT: vgprForAGPRCopy: ''
 ; AFTER-PEI-NEXT: sgprForEXECCopy: ''
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
index ea61ec9cb512ca..4545c8bbeb3e6c 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
@@ -39,7 +39,6 @@
 ; CHECK-NEXT: fp64-fp16-output-denormals: true
 ; CHECK-NEXT: BitsOf32BitAddress: 0
 ; CHECK-NEXT: occupancy:       8
-; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
 ; CHECK-NEXT: vgprForAGPRCopy: ''
 ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
 ; CHECK-NEXT: longBranchReservedReg: '$sgpr2_sgpr3'
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
index 0a689df49258c1..8215ba834170f2 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
@@ -39,7 +39,6 @@
 ; CHECK-NEXT: fp64-fp16-output-denormals: true
 ; CHECK-NEXT: BitsOf32BitAddress: 0
 ; CHECK-NEXT: occupancy:       8
-; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
 ; CHECK-NEXT: vgprForAGPRCopy: ''
 ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
 ; CHECK-NEXT: longBranchReservedReg: '$sgpr2_sgpr3'
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
index b430488987e03c..51795a4fea515e 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
@@ -48,7 +48,6 @@
 # FULL-NEXT: fp64-fp16-output-denormals: true
 # FULL-NEXT:  highBitsOf32BitAddress: 0
 # FULL-NEXT:  occupancy: 8
-# FULL-NEXT:  numPhysicalVGPRSpillLanes: 0
 # FULL-NEXT:  vgprForAGPRCopy: ''
 # FULL-NEXT:  sgprForEXECCopy: ''
 # FULL-NEXT:  longBranchReservedReg: ''
@@ -82,7 +81,6 @@
 # SIMPLE-NEXT: workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
 # SIMPLE-NEXT: workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
 # SIMPLE-NEXT: occupancy: 8
-# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
 # SIMPLE-NEXT: body:
 name: kernel0
 machineFunctionInfo:
@@ -154,7 +152,6 @@ body:             |
 # FULL-NEXT: fp64-fp16-output-denormals: true
 # FULL-NEXT:  highBitsOf32BitAddress: 0
 # FULL-NEXT:  occupancy: 8
-# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
 # FULL-NEXT: vgprForAGPRCopy: ''
 # FULL-NEXT: sgprForEXECCopy: ''
 # FULL-NEXT: longBranchReservedReg: ''
@@ -177,7 +174,6 @@ body:             |
 # SIMPLE-NEXT: workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
 # SIMPLE-NEXT: workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
 # SIMPLE-NEXT:  occupancy: 8
-# SIMPLE-NEXT:  numPhysicalVGPRSpillLanes: 0
 # SIMPLE-NEXT: body:
 
 name: no_mfi
@@ -231,7 +227,6 @@ body:             |
 # FULL-NEXT: fp64-fp16-output-denormals: true
 # FULL-NEXT:  highBitsOf32BitAddress: 0
 # FULL-NEXT:  occupancy: 8
-# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
 # FULL-NEXT: vgprForAGPRCopy: ''
 # FULL-NEXT: sgprForEXECCopy: ''
 # FULL-NEXT: longBranchReservedReg: ''
@@ -254,7 +249,6 @@ body:             |
 # SIMPLE-NEXT: workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
 # SIMPLE-NEXT: workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
 # SIMPLE-NEXT:  occupancy: 8
-# SIMPLE-NEXT:  numPhysicalVGPRSpillLanes: 0
 # SIMPLE-NEXT: body:
 
 name: empty_mfi
@@ -309,7 +303,6 @@ body:             |
 # FULL-NEXT: fp64-fp16-output-denormals: true
 # FULL-NEXT:  highBitsOf32BitAddress: 0
 # FULL-NEXT:  occupancy: 8
-# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
 # FULL-NEXT: vgprForAGPRCopy: ''
 # FULL-NEXT: sgprForEXECCopy: ''
 # FULL-NEXT: longBranchReservedReg: ''
@@ -333,7 +326,6 @@ body:             |
 # SIMPLE-NEXT: workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
 # SIMPLE-NEXT: workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
 # SIMPLE-NEXT: occupancy: 8
-# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
 # SIMPLE-NEXT: body:
 
 name: empty_mfi_entry_func
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
index 3fa4977a98e734..b69ede6f24f0f1 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -48,11 +48,10 @@
 ; CHECK-NEXT: fp64-fp16-output-denormals: true
 ; CHECK-NEXT: highBitsOf32BitAddress: 0
 ; CHECK-NEXT: occupancy: 8
-; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
 ; CHECK-NEXT: vgprForAGPRCopy: ''
 ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
 ; CHECK-NEXT: longBranchReservedReg: ''
-; CHECK-NEXT: hasInitWholeWave: false    
+; CHECK-NEXT: hasInitWholeWave: false
 ; CHECK-NEXT: body:
 define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
   %gep = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %arg0
@@ -95,7 +94,6 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
 ; CHECK-NEXT: fp64-fp16-output-denormals: true
 ; CHECK-NEXT: highBitsOf32BitAddress: 0
 ; CHECK-NEXT: occupancy: 10
-; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
 ; CHECK-NEXT: vgprForAGPRCopy: ''
 ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
 ; CHECK-NEXT: longBranchReservedReg: ''
@@ -166,7 +164,6 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 {
 ; CHECK-NEXT: fp64-fp16-output-denormals: true
 ; CHECK-NEXT: highBitsOf32BitAddress: 0
 ; CHECK-NEXT: occupancy: 8
-; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
 ; CHECK-NEXT: vgprForAGPRCopy: ''
 ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
 ; CHECK-NEXT: longBranchReservedReg: ''
@@ -219,7 +216,6 @@ define void @function() {
 ; CHECK-NEXT: fp64-fp16-output-denormals: true
 ; CHECK-NEXT: highBitsOf32BitAddress: 0
 ; CHECK-NEXT: occupancy: 8
-; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
 ; CHECK-NEXT: vgprForAGPRCopy: ''
 ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
 ; CHECK-NEXT: longBranchReservedReg: ''
diff --git a/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir b/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
index 303ebaabd34410..73e75fc0f7ef5b 100644
--- a/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
+++ b/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
@@ -52,12 +52,10 @@
 # RESULT-NEXT: fp64-fp16-output-denormals: false
 # RESULT-NEXT: highBitsOf32BitAddress: 4276993775
 # RESULT-NEXT: occupancy:       8
-# RESULT-NEXT: numPhysicalVGPRSpillLanes: 0
 # RESULT-NEXT: wwmReservedRegs:
 # RESULT-NEXT: - '$vgpr2'
 # RESULT-NEXT: - '$vgpr3'
 # RESULT-NEXT: vgprForAGPRCopy: '$vgpr33'
-# RESULT-NEXT: body:
 
 # RESULT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51
 # RESULT: S_NOP 0, implicit $vgpr33