[llvm] r353438 - AMDGPU/GlobalISel: Legalize fsqrt

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 7 10:14:39 PST 2019


Author: arsenm
Date: Thu Feb  7 10:14:39 2019
New Revision: 353438

URL: http://llvm.org/viewvc/llvm-project?rev=353438&view=rev
Log:
AMDGPU/GlobalISel: Legalize fsqrt

Added:
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp?rev=353438&r1=353437&r2=353438&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp Thu Feb  7 10:14:39 2019
@@ -161,6 +161,18 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo
     .scalarize(0)
     .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
 
+  if (ST.has16BitInsts()) {
+    getActionDefinitionsBuilder(G_FSQRT)
+      .legalFor({S32, S64, S16})
+      .scalarize(0)
+      .clampScalar(0, S16, S64);
+  } else {
+    getActionDefinitionsBuilder(G_FSQRT)
+      .legalFor({S32, S64})
+      .scalarize(0)
+      .clampScalar(0, S32, S64);
+  }
+
   getActionDefinitionsBuilder(G_FPTRUNC)
     .legalFor({{S32, S64}, {S16, S32}})
     .scalarize(0);

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp?rev=353438&r1=353437&r2=353438&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Thu Feb  7 10:14:39 2019
@@ -598,6 +598,7 @@ AMDGPURegisterBankInfo::getInstrMapping(
   case AMDGPU::G_FPTOUI:
   case AMDGPU::G_FMUL:
   case AMDGPU::G_FMA:
+  case AMDGPU::G_FSQRT:
   case AMDGPU::G_SITOFP:
   case AMDGPU::G_UITOFP:
   case AMDGPU::G_FPTRUNC:

Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir?rev=353438&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir Thu Feb  7 10:14:39 2019
@@ -0,0 +1,288 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI  %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9  %s
+
+---
+name: test_fsqrt_s32
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; SI-LABEL: name: test_fsqrt_s32
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI-LABEL: name: test_fsqrt_s32
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9-LABEL: name: test_fsqrt_s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_FSQRT %0
+...
+---
+name: test_fsqrt_s64
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; SI-LABEL: name: test_fsqrt_s64
+    ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; VI-LABEL: name: test_fsqrt_s64
+    ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX9-LABEL: name: test_fsqrt_s64
+    ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s64) = G_FSQRT %0
+...
+---
+name: test_fsqrt_s16
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; SI-LABEL: name: test_fsqrt_s16
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
+    ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]]
+    ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
+    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI-LABEL: name: test_fsqrt_s16
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16)
+    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: test_fsqrt_s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s16) = G_TRUNC %0
+    %2:_(s16) = G_FSQRT %1
+    %3:_(s32) = G_ANYEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: test_fsqrt_v2s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; SI-LABEL: name: test_fsqrt_v2s32
+    ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]]
+    ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-LABEL: name: test_fsqrt_v2s32
+    ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; VI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]]
+    ; VI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-LABEL: name: test_fsqrt_v2s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX9: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]]
+    ; GFX9: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s32>) = G_FSQRT %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_fsqrt_v3s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2
+
+    ; SI-LABEL: name: test_fsqrt_v3s32
+    ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]]
+    ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]]
+    ; SI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-LABEL: name: test_fsqrt_v3s32
+    ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; VI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]]
+    ; VI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]]
+    ; VI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-LABEL: name: test_fsqrt_v3s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; GFX9: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]]
+    ; GFX9: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]]
+    ; GFX9: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<3 x  s32>) = G_FSQRT %0
+    $vgpr0_vgpr1_vgpr2 = COPY %1
+...
+
+---
+name: test_fsqrt_v2s64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+
+    ; SI-LABEL: name: test_fsqrt_v2s64
+    ; SI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; SI: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]]
+    ; SI: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64)
+    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; VI-LABEL: name: test_fsqrt_v2s64
+    ; VI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; VI: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]]
+    ; VI: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64)
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX9-LABEL: name: test_fsqrt_v2s64
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX9: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]]
+    ; GFX9: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<2 x s64>) = G_FSQRT %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...
+
+---
+name: test_fsqrt_v2s16
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; SI-LABEL: name: test_fsqrt_v2s16
+    ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
+    ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]]
+    ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32)
+    ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
+    ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]]
+    ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16)
+    ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ; VI-LABEL: name: test_fsqrt_v2s16
+    ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV]]
+    ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV1]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16)
+    ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ; GFX9-LABEL: name: test_fsqrt_v2s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV]]
+    ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV1]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16)
+    ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = G_FSQRT %0
+    $vgpr0 = COPY %1
+...
+
+---
+name: test_fsqrt_v3s16
+body: |
+  bb.0:
+
+    ; SI-LABEL: name: test_fsqrt_v3s16
+    ; SI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>)
+    ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
+    ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]]
+    ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32)
+    ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
+    ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]]
+    ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32)
+    ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
+    ; SI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT2]]
+    ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT2]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16)
+    ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>)
+    ; VI-LABEL: name: test_fsqrt_v3s16
+    ; VI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>)
+    ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV]]
+    ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV1]]
+    ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[UV2]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16), [[FSQRT2]](s16)
+    ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>)
+    ; GFX9-LABEL: name: test_fsqrt_v3s16
+    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>)
+    ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV]]
+    ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV1]]
+    ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[UV2]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16), [[FSQRT2]](s16)
+    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>)
+    %0:_(<3 x s16>) = G_IMPLICIT_DEF
+    %1:_(<3 x s16>) = G_FSQRT %0
+    S_NOP 0, implicit %1
+...
+
+---
+name: test_fsqrt_v4s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; SI-LABEL: name: test_fsqrt_v4s16
+    ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
+    ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]]
+    ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32)
+    ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
+    ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]]
+    ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32)
+    ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
+    ; SI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT2]]
+    ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT2]](s32)
+    ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
+    ; SI: [[FSQRT3:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT3]]
+    ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT3]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16)
+    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
+    ; VI-LABEL: name: test_fsqrt_v4s16
+    ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV]]
+    ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV1]]
+    ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[UV2]]
+    ; VI: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[UV3]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16), [[FSQRT2]](s16), [[FSQRT3]](s16)
+    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
+    ; GFX9-LABEL: name: test_fsqrt_v4s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV]]
+    ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV1]]
+    ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[UV2]]
+    ; GFX9: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[UV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16), [[FSQRT2]](s16), [[FSQRT3]](s16)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
+    %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    %1:_(<4 x s16>) = G_FSQRT %0
+    $vgpr0_vgpr1 = COPY %1
+...

Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir?rev=353438&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir Thu Feb  7 10:14:39 2019
@@ -0,0 +1,35 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
+# RUN: llc -march=amdgcn -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+
+---
+name: fsqrt_s
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1
+    ; CHECK-LABEL: name: fsqrt_s
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[FSQRT:%[0-9]+]]:vgpr(s32) = G_FSQRT [[COPY]]
+    ; CHECK: $vgpr0 = COPY [[FSQRT]](s32)
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = G_FSQRT %0
+    $vgpr0 = COPY %1
+...
+
+---
+name: fsqrt_v
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: fsqrt_v
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[FSQRT:%[0-9]+]]:vgpr(s32) = G_FSQRT [[COPY]]
+    ; CHECK: $vgpr0 = COPY [[FSQRT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_FSQRT %0
+    $vgpr0 = COPY %1
+...




More information about the llvm-commits mailing list