[llvm] b38940d - TTI: Fix vectorization cost for bswap

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 14 10:14:19 PST 2020


Author: Matt Arsenault
Date: 2020-02-14T10:14:07-08:00
New Revision: b38940dfb97d924bee1498ec2baf1eec04a48308

URL: https://github.com/llvm/llvm-project/commit/b38940dfb97d924bee1498ec2baf1eec04a48308
DIFF: https://github.com/llvm/llvm-project/commit/b38940dfb97d924bee1498ec2baf1eec04a48308.diff

LOG: TTI: Fix vectorization cost for bswap

Added: 
    llvm/test/Transforms/SLPVectorizer/AMDGPU/bswap.ll

Modified: 
    llvm/include/llvm/CodeGen/BasicTTIImpl.h

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 0bdb49edc983..d3f8896eca16 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1471,6 +1471,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       SingleCallCost = TargetTransformInfo::TCC_Expensive;
       break;
     // FIXME: ctlz, cttz, ...
+    case Intrinsic::bswap:
+      ISDs.push_back(ISD::BSWAP);
+      break;
+    case Intrinsic::bitreverse:
+      ISDs.push_back(ISD::BITREVERSE);
+      break;
     }
 
     const TargetLoweringBase *TLI = getTLI();

diff  --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/bswap.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/bswap.ll
new file mode 100644
index 000000000000..bf42d2f5ff64
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/bswap.ll
@@ -0,0 +1,38 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX7 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
+
+; GCN-LABEL: @bswap_v2i16(
+; GFX7: call i16 @llvm.bswap.i16(
+; GFX7: call i16 @llvm.bswap.i16(
+
+; GFX8: call <2 x i16> @llvm.bswap.v2i16(
+define <2 x i16> @bswap_v2i16(<2 x i16> %arg) {
+bb:
+  %tmp = extractelement <2 x i16> %arg, i64 0
+  %tmp1 = tail call i16 @llvm.bswap.i16(i16 %tmp)
+  %tmp2 = insertelement <2 x i16> undef, i16 %tmp1, i64 0
+  %tmp3 = extractelement <2 x i16> %arg, i64 1
+  %tmp4 = tail call i16 @llvm.bswap.i16(i16 %tmp3)
+  %tmp5 = insertelement <2 x i16> %tmp2, i16 %tmp4, i64 1
+  ret <2 x i16> %tmp5
+}
+
+; GCN-LABEL: @bswap_v2i32(
+; GCN: call i32 @llvm.bswap.i32
+; GCN: call i32 @llvm.bswap.i32
+define <2 x i32> @bswap_v2i32(<2 x i32> %arg) {
+bb:
+  %tmp = extractelement <2 x i32> %arg, i64 0
+  %tmp1 = tail call i32 @llvm.bswap.i32(i32 %tmp)
+  %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i64 0
+  %tmp3 = extractelement <2 x i32> %arg, i64 1
+  %tmp4 = tail call i32 @llvm.bswap.i32(i32 %tmp3)
+  %tmp5 = insertelement <2 x i32> %tmp2, i32 %tmp4, i64 1
+  ret <2 x i32> %tmp5
+}
+
+declare i16 @llvm.bswap.i16(i16) #0
+declare i32 @llvm.bswap.i32(i32) #0
+
+attributes #0 = { nounwind readnone speculatable willreturn }


        


More information about the llvm-commits mailing list