[PATCH] D48133: [AMDGPU] Corrected computeKnownBits for V_PERM_B32
Stanislav Mekhanoshin via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 13 09:32:28 PDT 2018
rampitec created this revision.
rampitec added a reviewer: msearles.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl, arsenm.
https://reviews.llvm.org/D48133
Files:
lib/Target/AMDGPU/AMDGPUISelLowering.cpp
test/CodeGen/AMDGPU/permute.ll
Index: test/CodeGen/AMDGPU/permute.ll
===================================================================
--- test/CodeGen/AMDGPU/permute.ll
+++ test/CodeGen/AMDGPU/permute.ll
@@ -196,4 +196,26 @@
ret void
}
+; GCN-LABEL: {{^}}known_ffff8004:
+; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
+; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
+; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
+; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
+define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+bb:
+ %id = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
+ %load = load i32, i32 addrspace(1)* %gep, align 4
+ %mask1 = or i32 %arg1, 4
+ %mask2 = or i32 %load, 32768 ; 0x8000
+ %and = and i32 %mask1, 16711935 ; 0x00ff00ff
+ %tmp1 = and i32 %mask2, 4294967040 ; 0xffffff00
+ %tmp2 = or i32 %tmp1, 4294901760 ; 0xffff0000
+ %tmp3 = or i32 %tmp2, %and
+ store i32 %tmp3, i32 addrspace(1)* %gep, align 4
+ %v = and i32 %tmp3, 4294934532 ; 0xffff8004
+ store i32 %v, i32 addrspace(1)* %arg, align 4
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x()
Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4386,18 +4386,19 @@
unsigned Sel = CMask->getZExtValue();
for (unsigned I = 0; I < 32; I += 8) {
- unsigned ByteMask = 0xff << I;
unsigned SelBits = Sel & 0xff;
if (SelBits < 4) {
- Known.One |= RHSKnown.One & ByteMask;
- Known.Zero |= RHSKnown.Zero & ByteMask;
+ SelBits *= 8;
+ Known.One |= ((RHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I;
+ Known.Zero |= ((RHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I;
} else if (SelBits < 7) {
- Known.One |= LHSKnown.One & ByteMask;
- Known.Zero |= LHSKnown.Zero & ByteMask;
+ SelBits = (SelBits & 3) * 8;
+ Known.One |= ((LHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I;
+ Known.Zero |= ((LHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I;
} else if (SelBits == 0x0c) {
- Known.Zero |= ByteMask;
+ Known.Zero |= 0xff << I;
} else if (SelBits > 0x0c) {
- Known.One |= ByteMask;
+ Known.One |= 0xff << I;
}
Sel >>= 8;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D48133.151188.patch
Type: text/x-patch
Size: 2463 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180613/9d9951b5/attachment.bin>
More information about the llvm-commits
mailing list