[llvm] 295bbea - AMDGPU/GlobalISel: Fix non-power-of-2 G_SITOFP/G_UITOFP

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 16 20:06:35 PST 2020


Author: Matt Arsenault
Date: 2020-02-16T22:48:57-05:00
New Revision: 295bbea3ede470b8abb8db753350581f7865c0fc

URL: https://github.com/llvm/llvm-project/commit/295bbea3ede470b8abb8db753350581f7865c0fc
DIFF: https://github.com/llvm/llvm-project/commit/295bbea3ede470b8abb8db753350581f7865c0fc.diff

LOG: AMDGPU/GlobalISel: Fix non-power-of-2 G_SITOFP/G_UITOFP

This wouldn't work for s33-s63 sources.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index aef9aaa86466..8b6097d3a408 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -494,7 +494,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
   if (ST.has16BitInsts())
     IToFP.legalFor({{S16, S16}});
   IToFP.clampScalar(1, S32, S64)
-       .scalarize(0);
+       .scalarize(0)
+       .widenScalarToNextPow2(1);
 
   auto &FPToI = getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
     .legalFor({{S32, S32}, {S32, S64}, {S32, S16}})

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
index 8a4b23ff3d1e..0d82e12476de 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
@@ -443,3 +443,103 @@ body: |
     %2:_(s64) = G_SITOFP %1
     $vgpr0_vgpr1 = COPY %2
 ...
+
+---
+name: test_sitofp_s33_to_s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; GFX6-LABEL: name: test_sitofp_s33_to_s32
+    ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64)
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C2]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]]
+    ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
+    ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32)
+    ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]]
+    ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
+    ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C5]]
+    ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C6]](s32)
+    ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32)
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]]
+    ; GFX6: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C8]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C8]]
+    ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C1]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR]](s64)
+    ; GFX6: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[UITOFP]]
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C2]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[UITOFP]]
+    ; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[SEXT_INREG]](s64)
+    ; GFX6: $vgpr0 = COPY [[SITOFP]](s32)
+    ; GFX8-LABEL: name: test_sitofp_s33_to_s32
+    ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64)
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C2]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]]
+    ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
+    ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32)
+    ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]]
+    ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
+    ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C5]]
+    ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C6]](s32)
+    ; GFX8: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32)
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]]
+    ; GFX8: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C8]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C8]]
+    ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C1]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR]](s64)
+    ; GFX8: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[UITOFP]]
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C2]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[UITOFP]]
+    ; GFX8: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[SEXT_INREG]](s64)
+    ; GFX8: $vgpr0 = COPY [[SITOFP]](s32)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s33) = G_TRUNC %0
+    %2:_(s32) = G_SITOFP %1
+    $vgpr0 = COPY %2
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir
index 0b18f4264ff5..69a7ff9a32cd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir
@@ -405,3 +405,81 @@ body: |
     %2:_(s64) = G_UITOFP %1
     $vgpr0_vgpr1 = COPY %2
 ...
+
+---
+name: test_uitofp_s33_to_s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; GFX6-LABEL: name: test_uitofp_s33_to_s32
+    ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s64)
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND]](s64), [[C2]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]]
+    ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
+    ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[CTLZ_ZERO_UNDEF]](s32)
+    ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]]
+    ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
+    ; GFX6: [[AND2:%[0-9]+]]:_(s64) = G_AND [[AND1]], [[C5]]
+    ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[C6]](s32)
+    ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32)
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]]
+    ; GFX6: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND2]](s64), [[C8]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND2]](s64), [[C8]]
+    ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND3]], [[C1]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s64)
+    ; GFX6: $vgpr0 = COPY [[UITOFP]](s32)
+    ; GFX8-LABEL: name: test_uitofp_s33_to_s32
+    ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s64)
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND]](s64), [[C2]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]]
+    ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
+    ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[CTLZ_ZERO_UNDEF]](s32)
+    ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]]
+    ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
+    ; GFX8: [[AND2:%[0-9]+]]:_(s64) = G_AND [[AND1]], [[C5]]
+    ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[C6]](s32)
+    ; GFX8: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32)
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]]
+    ; GFX8: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND2]](s64), [[C8]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND2]](s64), [[C8]]
+    ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND3]], [[C1]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s64)
+    ; GFX8: $vgpr0 = COPY [[UITOFP]](s32)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s33) = G_TRUNC %0
+    %2:_(s32) = G_UITOFP %1
+    $vgpr0 = COPY %2
+...


        


More information about the llvm-commits mailing list