[llvm] [VPlan] Preserve IR flags when widening casts (PR #115373)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 7 13:44:46 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-backend-amdgpu
Author: None (goldsteinn)
<details>
<summary>Changes</summary>
We have `nneg` for both `sext` and `uitofp`.
Fixes #<!-- -->114856
---
Full diff: https://github.com/llvm/llvm-project/pull/115373.diff
10 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+2)
- (modified) llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll (+2)
- (modified) llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll (+1)
- (modified) llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll (+1)
- (modified) llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll (+1)
- (modified) llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir (+8)
- (modified) llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll (+5-1)
- (added) llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll (+78)
- (modified) llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll (+1-1)
- (modified) llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir (+2)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 6254ea15191819..ef2ca9af7268d1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1484,6 +1484,8 @@ void VPWidenCastRecipe::execute(VPTransformState &State) {
Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
State.set(this, Cast);
State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
+ if (auto *CastOp = dyn_cast<Instruction>(Cast))
+ setFlags(CastOp);
}
InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
index 0f7a5f8e0941ad..0ae51c602a8d98 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
@@ -38,6 +38,7 @@
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 5
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: scavengeFI: '%stack.0'
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
@@ -304,6 +305,7 @@
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 5
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: scavengeFI: '%stack.0'
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
index 7759501ea42268..07b933cdb6583c 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
@@ -38,6 +38,7 @@
; AFTER-PEI-NEXT: fp64-fp16-output-denormals: true
; AFTER-PEI-NEXT: highBitsOf32BitAddress: 0
; AFTER-PEI-NEXT: occupancy: 5
+; AFTER-PEI-NEXT: numPhysicalVGPRSpillLanes: 0
; AFTER-PEI-NEXT: scavengeFI: '%stack.3'
; AFTER-PEI-NEXT: vgprForAGPRCopy: ''
; AFTER-PEI-NEXT: sgprForEXECCopy: ''
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
index 4545c8bbeb3e6c..ea61ec9cb512ca 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
@@ -39,6 +39,7 @@
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: BitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 8
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: '$sgpr2_sgpr3'
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
index 8215ba834170f2..0a689df49258c1 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
@@ -39,6 +39,7 @@
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: BitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 8
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: '$sgpr2_sgpr3'
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
index 51795a4fea515e..b430488987e03c 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
@@ -48,6 +48,7 @@
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 8
+# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
# FULL-NEXT: vgprForAGPRCopy: ''
# FULL-NEXT: sgprForEXECCopy: ''
# FULL-NEXT: longBranchReservedReg: ''
@@ -81,6 +82,7 @@
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 8
+# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
# SIMPLE-NEXT: body:
name: kernel0
machineFunctionInfo:
@@ -152,6 +154,7 @@ body: |
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 8
+# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
# FULL-NEXT: vgprForAGPRCopy: ''
# FULL-NEXT: sgprForEXECCopy: ''
# FULL-NEXT: longBranchReservedReg: ''
@@ -174,6 +177,7 @@ body: |
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 8
+# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
# SIMPLE-NEXT: body:
name: no_mfi
@@ -227,6 +231,7 @@ body: |
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 8
+# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
# FULL-NEXT: vgprForAGPRCopy: ''
# FULL-NEXT: sgprForEXECCopy: ''
# FULL-NEXT: longBranchReservedReg: ''
@@ -249,6 +254,7 @@ body: |
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 8
+# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
# SIMPLE-NEXT: body:
name: empty_mfi
@@ -303,6 +309,7 @@ body: |
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 8
+# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
# FULL-NEXT: vgprForAGPRCopy: ''
# FULL-NEXT: sgprForEXECCopy: ''
# FULL-NEXT: longBranchReservedReg: ''
@@ -326,6 +333,7 @@ body: |
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 8
+# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
# SIMPLE-NEXT: body:
name: empty_mfi_entry_func
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
index b69ede6f24f0f1..3fa4977a98e734 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -48,10 +48,11 @@
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 8
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: ''
-; CHECK-NEXT: hasInitWholeWave: false
+; CHECK-NEXT: hasInitWholeWave: false
; CHECK-NEXT: body:
define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
%gep = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %arg0
@@ -94,6 +95,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 10
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: ''
@@ -164,6 +166,7 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 {
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 8
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: ''
@@ -216,6 +219,7 @@ define void @function() {
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 8
+; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
; CHECK-NEXT: vgprForAGPRCopy: ''
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
; CHECK-NEXT: longBranchReservedReg: ''
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll b/llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll
new file mode 100644
index 00000000000000..b093f35159fc71
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes="default<O3>,loop-vectorize" -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local void @uitofp_preserve_nneg(ptr nocapture noundef writeonly %result, i32 noundef %size, float noundef %y, float noundef %delta) {
+; CHECK-LABEL: @uitofp_preserve_nneg(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[SIZE]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SIZE]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER4:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[DELTA:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = uitofp nneg <4 x i32> [[VEC_IND]] to <4 x float>
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[TMP0]], <4 x float> [[BROADCAST_SPLAT3]])
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[RESULT:%.*]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[TMP2]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER4]]
+; CHECK: for.body.preheader4:
+; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER4]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT: [[CONV:%.*]] = uitofp nneg i32 [[TMP4]] to float
+; CHECK-NEXT: [[TMP5:%.*]] = tail call float @llvm.fmuladd.f32(float [[DELTA]], float [[CONV]], float [[Y]])
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[RESULT]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store float [[TMP5]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp4 = icmp sgt i32 %size, 0
+ br i1 %cmp4, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %t.05 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %conv = uitofp nneg i32 %t.05 to float
+ %0 = tail call float @llvm.fmuladd.f32(float %delta, float %conv, float %y)
+ %idxprom = zext nneg i32 %t.05 to i64
+ %arrayidx = getelementptr inbounds float, ptr %result, i64 %idxprom
+ store float %0, ptr %arrayidx, align 4
+ %inc = add nuw nsw i32 %t.05, 1
+ %cmp = icmp slt i32 %inc, %size
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
index e4dbc5829f6115..b312688b7932dc 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
@@ -109,7 +109,7 @@ define void @test3(float %0) {
; CHECK: for.body:
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr null, align 4
; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x float> zeroinitializer, [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> poison, <2 x i1> <i1 true, i1 true>, i64 0)
+; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> poison, <2 x i1> splat (i1 true), i64 0)
; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP6]], <2 x i1> [[TMP5]], i64 2)
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP4]], i64 0)
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
diff --git a/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir b/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
index 73e75fc0f7ef5b..303ebaabd34410 100644
--- a/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
+++ b/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
@@ -52,10 +52,12 @@
# RESULT-NEXT: fp64-fp16-output-denormals: false
# RESULT-NEXT: highBitsOf32BitAddress: 4276993775
# RESULT-NEXT: occupancy: 8
+# RESULT-NEXT: numPhysicalVGPRSpillLanes: 0
# RESULT-NEXT: wwmReservedRegs:
# RESULT-NEXT: - '$vgpr2'
# RESULT-NEXT: - '$vgpr3'
# RESULT-NEXT: vgprForAGPRCopy: '$vgpr33'
+# RESULT-NEXT: body:
# RESULT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51
# RESULT: S_NOP 0, implicit $vgpr33
``````````
</details>
https://github.com/llvm/llvm-project/pull/115373
More information about the llvm-commits
mailing list