[PATCH] D138050: AMDGPU/GlobalISel: Insert freeze when splitting vector G_SEXT_INREG
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 15 11:34:00 PST 2022
arsenm created this revision.
arsenm added reviewers: AMDGPU, foad, Pierre-vh, Petar.Avramovic.
Herald added subscribers: kosarev, kerbowa, hiraditya, tpr, dstuttard, yaxunl, jvesely, kzhuravl.
Herald added a project: All.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.
This transform is broken for undef or poison inputs without a freeze.
This is also broken in lots of other places where shifts are split
into 32-bit pieces.
Amt < 32 case:
; Broken: https://alive2.llvm.org/ce/z/7bb4vc
; Freezing the low half of the bits makes it correct
; Fixed: https://alive2.llvm.org/ce/z/zJAZFr
define i64 @src(i64 %val) {
%shl = shl i64 %val, 55
%shr = ashr i64 %shl, 55
ret i64 %shr
}
define i64 @tgt(i64 %val) {
%lo32 = trunc i64 %val to i32
%shr.half = lshr i64 %val, 32
%hi32 = trunc i64 %shr.half to i32
%inreg.0 = shl i32 %lo32, 23
%new.lo = ashr i32 %inreg.0, 23
%new.hi = ashr i32 %new.lo, 31
%zext.lo = zext i32 %new.lo to i64
%zext.hi = zext i32 %new.hi to i64
%hi.ins = shl i64 %zext.hi, 32
%or = or i64 %hi.ins, %zext.lo
ret i64 %or
}
Amt == 32 case:
Broken: https://alive2.llvm.org/ce/z/5f4qwQ
Fixed: https://alive2.llvm.org/ce/z/A2hWWF
This one times out alive; works if argument is made undef or scaled down
to a smaller bitwidth.
define i64 @src(i64 %val) {
%shl = shl i64 %val, 32
%shr = ashr i64 %shl, 32
ret i64 %shr
}
define i64 @tgt(i64 %val) {
%lo32 = trunc i64 %val to i32
%shr.half = lshr i64 %val, 32
%hi32 = trunc i64 %shr.half to i32
%new.hi = ashr i32 %lo32, 31
%zext.lo = zext i32 %lo32 to i64
%zext.hi = zext i32 %new.hi to i64
%hi.ins = shl i64 %zext.hi, 32
%or = or i64 %hi.ins, %zext.lo
ret i64 %or
}
Amt > 32 case:
; Correct: https://alive2.llvm.org/ce/z/tvrhPf
define i64 @src(i64 %val) {
%shl = shl i64 %val, 9
%shr = ashr i64 %shl, 9
ret i64 %shr
}
define i64 @tgt(i64 %val) {
%lo32 = trunc i64 %val to i32
%lshr = lshr i64 %val, 32
%hi32 = trunc i64 %lshr to i32
%inreg.0 = shl i32 %hi32, 9
%new.hi = ashr i32 %inreg.0, 9
%zext.lo = zext i32 %lo32 to i64
%zext.hi = zext i32 %new.hi to i64
%hi.ins = shl i64 %zext.hi, 32
%or = or i64 %hi.ins, %zext.lo
ret i64 %or
}
https://reviews.llvm.org/D138050
Files:
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir
@@ -135,7 +135,8 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
- ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[UV]], 31
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s32) = G_FREEZE [[UV]]
+ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[FREEZE]], 31
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32)
@@ -159,7 +160,8 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
- ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[UV]], 1
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s32) = G_FREEZE [[UV]]
+ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[FREEZE]], 1
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32)
@@ -183,10 +185,10 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:vgpr(s32) = G_FREEZE [[UV]]
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
- ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY1]], [[C]](s32)
- ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[ASHR]](s32)
+ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[FREEZE]], [[C]](s32)
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[FREEZE]](s32), [[ASHR]](s32)
; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = G_SEXT_INREG %0, 32
Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -2449,15 +2449,17 @@
if (Amt <= 32) {
if (Amt == 32) {
// The low bits are unchanged.
- B.buildCopy(DstRegs[0], SrcRegs[0]);
+ B.buildFreeze(DstRegs[0], SrcRegs[0]);
} else {
+ auto Freeze = B.buildFreeze(S32, SrcRegs[0]);
// Extend in the low bits and propagate the sign bit to the high half.
- B.buildSExtInReg(DstRegs[0], SrcRegs[0], 64 - (32 + Amt));
+ B.buildSExtInReg(DstRegs[0], Freeze, 64 - (32 + Amt));
}
B.buildAShr(DstRegs[1], DstRegs[0], B.buildConstant(S32, 31));
} else {
// The low bits are unchanged, and extend in the high bits.
+ // No freeze required
B.buildCopy(DstRegs[0], SrcRegs[0]);
B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D138050.475540.patch
Type: text/x-patch
Size: 3541 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20221115/da7e8525/attachment.bin>
More information about the llvm-commits
mailing list