[PATCH] D89316: [AMDGPU][GlobalISel] Compute known bits for zero-extending loads
Jay Foad via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 13 06:51:08 PDT 2020
foad created this revision.
foad added reviewers: arsenm, Petar.Avramovic, mbrkusanin.
Herald added subscribers: llvm-commits, kerbowa, hiraditya, t-tye, tpr, dstuttard, rovka, yaxunl, nhaehnle, jvesely, kzhuravl.
Herald added a project: LLVM.
foad requested review of this revision.
Herald added a subscriber: wdng.
Implement computeKnownBitsForTargetInstr for G_AMDGPU_BUFFER_LOAD_UBYTE
and G_AMDGPU_BUFFER_LOAD_USHORT. This allows generic combines to remove
some unnecessary G_ANDs.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D89316
Files:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
@@ -228,10 +228,7 @@
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "TargetCustom7", addrspace 4)
- ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
- ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[BUFFER_LOAD_UBYTE_BOTHEN]], [[COPY7]], implicit $exec
- ; CHECK: $vgpr0 = COPY [[V_AND_B32_e64_]]
+ ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
%ext = zext i8 %val to i32
@@ -276,10 +273,7 @@
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4)
- ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
- ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[BUFFER_LOAD_USHORT_BOTHEN]], [[COPY7]], implicit $exec
- ; CHECK: $vgpr0 = COPY [[V_AND_B32_e64_]]
+ ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
%ext = zext i16 %val to i32
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
@@ -387,10 +387,7 @@
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "TargetCustom7", addrspace 4)
- ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
- ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[BUFFER_LOAD_UBYTE_OFFEN]], [[COPY6]], implicit $exec
- ; CHECK: $vgpr0 = COPY [[V_AND_B32_e64_]]
+ ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
%zext = zext i8 %val to i32
@@ -497,10 +494,7 @@
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
- ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
- ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[BUFFER_LOAD_UBYTE_OFFEN]], [[COPY8]], implicit $exec
- ; CHECK: $vgpr0 = COPY [[V_AND_B32_e64_]]
+ ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
%zext = zext i8 %val to i32
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -11594,10 +11594,15 @@
Known.Zero.setHighBits(countLeadingZeros(getSubtarget()->getLocalMemorySize()));
break;
}
- default:
- break;
}
+ break;
}
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
+ Known.Zero.setHighBits(24);
+ break;
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
+ Known.Zero.setHighBits(16);
+ break;
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D89316.297847.patch
Type: text/x-patch
Size: 4991 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201013/79991e0c/attachment.bin>
More information about the llvm-commits
mailing list