[llvm] 6a570dc - AMDGPU/GlobalISel: Fix non-pow-2 add/sub/mul for 16-bit insts

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 6 18:53:11 PST 2020


Author: Matt Arsenault
Date: 2020-02-06T21:43:54-05:00
New Revision: 6a570dc548078af92a3cc0dda0d2ad1f371b0280

URL: https://github.com/llvm/llvm-project/commit/6a570dc548078af92a3cc0dda0d2ad1f371b0280
DIFF: https://github.com/llvm/llvm-project/commit/6a570dc548078af92a3cc0dda0d2ad1f371b0280.diff

LOG: AMDGPU/GlobalISel: Fix non-pow-2 add/sub/mul for 16-bit insts

These wouldn't legalize between 16-bits and 32-bits on targets with
16-bit instructions.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 149e871d2083..78eaab44b05c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -284,7 +284,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
     getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
       .legalFor({S32, S16})
       .clampScalar(0, S16, S32)
-      .scalarize(0);
+      .scalarize(0)
+      .widenScalarToNextPow2(0, 32);
   } else {
     getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
       .legalFor({S32})

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir
index d6913a9d741c..e43c541a94db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir
@@ -266,3 +266,58 @@ body: |
     %5:_(s32) = G_ZEXT %4
     $vgpr0 = COPY %5
 ...
+
+---
+name: test_add_s24
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: test_add_s24
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY3]]
+    ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD]](s32)
+    ; GFX6: $vgpr0 = COPY [[COPY4]](s32)
+    ; GFX8-LABEL: name: test_add_s24
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY3]]
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD]](s32)
+    ; GFX8: $vgpr0 = COPY [[COPY4]](s32)
+    ; GFX9-LABEL: name: test_add_s24
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY3]]
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY4]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s24) = G_TRUNC %0
+    %3:_(s24) = G_TRUNC %1
+    %4:_(s24) = G_ADD %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+# FIXME
+# ---
+# name: test_add_s33
+# body: |
+#   bb.0:
+#     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+#     %0:_(s64) = COPY $vgpr0_vgpr1
+#     %1:_(s64) = COPY $vgpr2_vgpr3
+#     %2:_(s33) = G_TRUNC %0
+#     %3:_(s33) = G_TRUNC %1
+#     %4:_(s33) = G_ADD %2, %3
+#     %5:_(s64) = G_ANYEXT %4
+#     $vgpr0_vgpr1 = COPY %5
+# ...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir
index 9987f81b67ef..bcb999735a7e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir
@@ -320,3 +320,58 @@ body: |
     %2:_(<2 x s16>) = G_MUL %0, %1
     $vgpr0 = COPY %2
 ...
+
+---
+name: test_mul_s24
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: test_mul_s24
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY3]]
+    ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[MUL]](s32)
+    ; GFX6: $vgpr0 = COPY [[COPY4]](s32)
+    ; GFX8-LABEL: name: test_mul_s24
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY3]]
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[MUL]](s32)
+    ; GFX8: $vgpr0 = COPY [[COPY4]](s32)
+    ; GFX9-LABEL: name: test_mul_s24
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY3]]
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[MUL]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY4]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s24) = G_TRUNC %0
+    %3:_(s24) = G_TRUNC %1
+    %4:_(s24) = G_MUL %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+# FIXME:
+# ---
+# name: test_mul_s33
+# body: |
+#   bb.0:
+#     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+#     %0:_(s64) = COPY $vgpr0_vgpr1
+#     %1:_(s64) = COPY $vgpr2_vgpr3
+#     %2:_(s33) = G_TRUNC %0
+#     %3:_(s33) = G_TRUNC %1
+#     %4:_(s33) = G_MUL %2, %3
+#     %5:_(s64) = G_ANYEXT %4
+#     $vgpr0_vgpr1 = COPY %5
+# ...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir
index 9563ace78e06..8da7c0fbf6db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir
@@ -182,3 +182,136 @@ body: |
     %2:_(<2 x s16>) = G_SUB %0, %1
     $vgpr0 = COPY %2
 ...
+
+---
+name: test_sub_s64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX6-LABEL: name: test_sub_s64
+    ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]]
+    ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX8-LABEL: name: test_sub_s64
+    ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]]
+    ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX9-LABEL: name: test_sub_s64
+    ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]]
+    ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]]
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s64) = COPY $vgpr2_vgpr3
+    %2:_(s64) = G_SUB %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: test_sub_s7
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: test_sub_s7
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[COPY3]]
+    ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
+    ; GFX6: $vgpr0 = COPY [[COPY4]](s32)
+    ; GFX8-LABEL: name: test_sub_s7
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC1]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB]](s16)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: test_sub_s7
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s7) = G_TRUNC %0
+    %3:_(s7) = G_TRUNC %1
+    %4:_(s7) = G_SUB %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: test_sub_s24
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: test_sub_s24
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[COPY3]]
+    ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
+    ; GFX6: $vgpr0 = COPY [[COPY4]](s32)
+    ; GFX8-LABEL: name: test_sub_s24
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[COPY3]]
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
+    ; GFX8: $vgpr0 = COPY [[COPY4]](s32)
+    ; GFX9-LABEL: name: test_sub_s24
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[COPY3]]
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY4]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s24) = G_TRUNC %0
+    %3:_(s24) = G_TRUNC %1
+    %4:_(s24) = G_SUB %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+# FIXME
+# ---
+# name: test_sub_s33
+# body: |
+#   bb.0:
+#     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+#     %0:_(s64) = COPY $vgpr0_vgpr1
+#     %1:_(s64) = COPY $vgpr2_vgpr3
+#     %2:_(s33) = G_TRUNC %0
+#     %3:_(s33) = G_TRUNC %1
+#     %4:_(s33) = G_SUB %2, %3
+#     %5:_(s64) = G_ANYEXT %4
+#     $vgpr0_vgpr1 = COPY %5
+# ...


        


More information about the llvm-commits mailing list