[llvm] 25f4bd8 - AMDGPU: Clear kill flags after FoldZeroHighBits (#99582)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 19 00:46:41 PDT 2024
Author: Changpeng Fang
Date: 2024-07-19T00:46:37-07:00
New Revision: 25f4bd8872b2c15c641c8e915afb54b39d119d8a
URL: https://github.com/llvm/llvm-project/commit/25f4bd8872b2c15c641c8e915afb54b39d119d8a
DIFF: https://github.com/llvm/llvm-project/commit/25f4bd8872b2c15c641c8e915afb54b39d119d8a.diff
LOG: AMDGPU: Clear kill flags after FoldZeroHighBits (#99582)
After folding, all uses of the result register are going to be replaced
by the operand register. The kill flags on the uses of the result and
operand registers are no longer valid after the replacement, and need to
be cleared.
The only exception is, however, if the kill flag is set for the operand
register, we are sure the last use of the result register is the new
last use of the operand register, and thus we are safe to keep the kill
flags.
Added:
llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-clear-kill-flags.mir
Modified:
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 0e8c96625b221..9b2cab2eb73a3 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1361,7 +1361,9 @@ bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const {
return false;
Register Dst = MI.getOperand(0).getReg();
- MRI->replaceRegWith(Dst, SrcDef->getOperand(0).getReg());
+ MRI->replaceRegWith(Dst, Src1);
+ if (!MI.getOperand(2).isKill())
+ MRI->clearKillFlags(Src1);
MI.eraseFromParent();
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-clear-kill-flags.mir b/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-clear-kill-flags.mir
new file mode 100644
index 0000000000000..baaca76bfd8a8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-clear-kill-flags.mir
@@ -0,0 +1,54 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs -run-pass si-fold-operands -o - %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+
+---
+name: fold_zero_high_bits_src1_alive
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; GCN-LABEL: name: fold_zero_high_bits_src1_alive
+ ; GCN: liveins: $vgpr0, $vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], 1, 0, implicit $exec
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GCN-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[V_ADD_U16_e64_]], 1, 0, implicit $exec
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[V_ADD_U16_e64_]], [[COPY1]], 0, implicit $exec
+ %0:vgpr_32 = COPY $vgpr0
+ %1:sreg_32 = S_MOV_B32 1
+ %2:vgpr_32 = V_ADD_U16_e64 %0:vgpr_32, %1:sreg_32, 0, implicit $exec
+ %3:sreg_32 = S_MOV_B32 65535
+ %4:vgpr_32 = V_AND_B32_e64 %3:sreg_32, %2:vgpr_32, implicit $exec
+ %5:vgpr_32 = V_MUL_U32_U24_e64 killed %4:vgpr_32, %1:sreg_32, 0, implicit $exec
+ %6:vgpr_32 = COPY $vgpr1
+ %7:vgpr_32 = V_SUB_U16_e64 %2:vgpr_32, %6:vgpr_32, 0, implicit $exec
+...
+
+---
+name: fold_zero_high_bits_src1_killed
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; GCN-LABEL: name: fold_zero_high_bits_src1_killed
+ ; GCN: liveins: $vgpr0, $vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], 1, 0, implicit $exec
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[V_ADD_U16_e64_]], [[COPY1]], 0, implicit $exec
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GCN-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 killed [[V_ADD_U16_e64_]], 1, 0, implicit $exec
+ %0:vgpr_32 = COPY $vgpr0
+ %1:sreg_32 = S_MOV_B32 1
+ %2:vgpr_32 = V_ADD_U16_e64 %0:vgpr_32, %1:sreg_32, 0, implicit $exec
+ %6:vgpr_32 = COPY $vgpr1
+ %7:vgpr_32 = V_SUB_U16_e64 %2:vgpr_32, %6:vgpr_32, 0, implicit $exec
+ %3:sreg_32 = S_MOV_B32 65535
+ %4:vgpr_32 = V_AND_B32_e64 %3:sreg_32, killed %2:vgpr_32, implicit $exec
+ %5:vgpr_32 = V_MUL_U32_U24_e64 killed %4:vgpr_32, %1:sreg_32, 0, implicit $exec
+...
More information about the llvm-commits
mailing list