[llvm] r334640 - [AMDGPU] Corrected computeKnownBits for V_PERM_B32

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 13 11:52:54 PDT 2018


Author: rampitec
Date: Wed Jun 13 11:52:54 2018
New Revision: 334640

URL: http://llvm.org/viewvc/llvm-project?rev=334640&view=rev
Log:
[AMDGPU] Corrected computeKnownBits for V_PERM_B32

Differential Revision: https://reviews.llvm.org/D48133

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/permute.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=334640&r1=334639&r2=334640&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Wed Jun 13 11:52:54 2018
@@ -4317,18 +4317,19 @@ void AMDGPUTargetLowering::computeKnownB
     unsigned Sel = CMask->getZExtValue();
 
     for (unsigned I = 0; I < 32; I += 8) {
-      unsigned ByteMask = 0xff << I;
       unsigned SelBits = Sel & 0xff;
       if (SelBits < 4) {
-        Known.One |= RHSKnown.One & ByteMask;
-        Known.Zero |= RHSKnown.Zero & ByteMask;
+        SelBits *= 8;
+        Known.One |= ((RHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I;
+        Known.Zero |= ((RHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I;
       } else if (SelBits < 7) {
-        Known.One |= LHSKnown.One & ByteMask;
-        Known.Zero |= LHSKnown.Zero & ByteMask;
+        SelBits = (SelBits & 3) * 8;
+        Known.One |= ((LHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I;
+        Known.Zero |= ((LHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I;
       } else if (SelBits == 0x0c) {
-        Known.Zero |= ByteMask;
+        Known.Zero |= 0xff << I;
       } else if (SelBits > 0x0c) {
-        Known.One |= ByteMask;
+        Known.One |= 0xff << I;
       }
       Sel >>= 8;
     }

Modified: llvm/trunk/test/CodeGen/AMDGPU/permute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/permute.ll?rev=334640&r1=334639&r2=334640&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/permute.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/permute.ll Wed Jun 13 11:52:54 2018
@@ -196,4 +196,26 @@ bb:
   ret void
 }
 
+; GCN-LABEL: {{^}}known_ffff8004:
+; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
+; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
+; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
+; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
+define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+bb:
+  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
+  %load = load i32, i32 addrspace(1)* %gep, align 4
+  %mask1 = or i32 %arg1, 4
+  %mask2 = or i32 %load, 32768 ; 0x8000
+  %and = and i32 %mask1, 16711935     ; 0x00ff00ff
+  %tmp1 = and i32 %mask2, 4294967040 ; 0xffffff00
+  %tmp2 = or i32 %tmp1, 4294901760   ; 0xffff0000
+  %tmp3 = or i32 %tmp2, %and
+  store i32 %tmp3, i32 addrspace(1)* %gep, align 4
+  %v = and i32 %tmp3, 4294934532 ; 0xffff8004
+  store i32 %v, i32 addrspace(1)* %arg, align 4
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x()




More information about the llvm-commits mailing list