[llvm] r283415 - [AMDGPU] Promote uniform i16 bitreverse intrinsic to i32

Konstantin Zhuravlyov via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 5 19:20:46 PDT 2016


Author: kzhuravl
Date: Wed Oct  5 21:20:46 2016
New Revision: 283415

URL: http://llvm.org/viewvc/llvm-project?rev=283415&view=rev
Log:
[AMDGPU] Promote uniform i16 bitreverse intrinsic to i32

Differential Revision: https://reviews.llvm.org/D25121

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
    llvm/trunk/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp?rev=283415&r1=283414&r2=283415&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp Wed Oct  5 21:20:46 2016
@@ -77,14 +77,14 @@ class AMDGPUCodeGenPrepare : public Func
   ///
   /// \returns True if 16 bit binary operation is promoted to equivalent 32 bit
   /// binary operation, false otherwise.
-  bool promoteUniformI16OpToI32Op(BinaryOperator &I) const;
+  bool promoteUniformI16OpToI32(BinaryOperator &I) const;
 
   /// \brief Promotes uniform 16 bit 'icmp' operation \p I to 32 bit 'icmp'
   /// operation by sign or zero extending operands to 32 bits, and replacing 16
   /// bit operation with 32 bit operation.
   ///
   /// \returns True.
-  bool promoteUniformI16OpToI32Op(ICmpInst &I) const;
+  bool promoteUniformI16OpToI32(ICmpInst &I) const;
 
   /// \brief Promotes uniform 16 bit 'select' operation \p I to 32 bit 'select'
   /// operation by sign or zero extending operands to 32 bits, replacing 16 bit
@@ -92,7 +92,16 @@ class AMDGPUCodeGenPrepare : public Func
   /// operation back to 16 bits.
   ///
   /// \returns True.
-  bool promoteUniformI16OpToI32Op(SelectInst &I) const;
+  bool promoteUniformI16OpToI32(SelectInst &I) const;
+
+  /// \brief Promotes uniform 16 bit 'bitreverse' intrinsic \p I to 32 bit
+  /// 'bitreverse' intrinsic by zero extending operand to 32 bits, replacing 16
+  /// bit intrinsic with 32 bit intrinsic, shifting the result of 32 bit
+  /// intrinsic 16 bits to the right with zero fill, and truncating the result
+  /// of shift operation back to 16 bits.
+  ///
+  /// \returns True.
+  bool promoteUniformI16BitreverseIntrinsicToI32(IntrinsicInst &I) const;
 
 public:
   static char ID;
@@ -111,6 +120,9 @@ public:
   bool visitICmpInst(ICmpInst &I);
   bool visitSelectInst(SelectInst &I);
 
+  bool visitIntrinsicInst(IntrinsicInst &I);
+  bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
+
   bool doInitialization(Module &M) override;
   bool runOnFunction(Function &F) override;
 
@@ -181,8 +193,8 @@ bool AMDGPUCodeGenPrepare::isSigned(cons
       cast<ICmpInst>(I.getOperand(0))->isSigned() : false;
 }
 
-bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(BinaryOperator &I) const {
-  assert(isI16Ty(I.getType()) && "Op must be 16 bits");
+bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32(BinaryOperator &I) const {
+  assert(isI16Ty(I.getType()) && "I must be 16 bits");
 
   if (I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::UDiv)
     return false;
@@ -212,7 +224,7 @@ bool AMDGPUCodeGenPrepare::promoteUnifor
   return true;
 }
 
-bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(ICmpInst &I) const {
+bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32(ICmpInst &I) const {
   assert(isI16Ty(I.getOperand(0)->getType()) && "Op0 must be 16 bits");
   assert(isI16Ty(I.getOperand(1)->getType()) && "Op1 must be 16 bits");
 
@@ -240,8 +252,8 @@ bool AMDGPUCodeGenPrepare::promoteUnifor
   return true;
 }
 
-bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(SelectInst &I) const {
-  assert(isI16Ty(I.getType()) && "Op must be 16 bits");
+bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32(SelectInst &I) const {
+  assert(isI16Ty(I.getType()) && "I must be 16 bits");
 
   IRBuilder<> Builder(&I);
   Builder.SetCurrentDebugLocation(I.getDebugLoc());
@@ -268,6 +280,29 @@ bool AMDGPUCodeGenPrepare::promoteUnifor
   return true;
 }
 
+bool AMDGPUCodeGenPrepare::promoteUniformI16BitreverseIntrinsicToI32(
+    IntrinsicInst &I) const {
+  assert(I.getIntrinsicID() == Intrinsic::bitreverse && "I must be bitreverse");
+  assert(isI16Ty(I.getType()) && "I must be 16 bits");
+
+  IRBuilder<> Builder(&I);
+  Builder.SetCurrentDebugLocation(I.getDebugLoc());
+
+  Type *I32Ty = getI32Ty(Builder, I.getType());
+  Function *I32 =
+      Intrinsic::getDeclaration(Mod, Intrinsic::bitreverse, { I32Ty });;
+  Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty);
+  Value *ExtRes = Builder.CreateCall(I32, { ExtOp });
+  Value *LShrOp = Builder.CreateLShr(ExtRes, 16);
+  Value *TruncRes =
+      Builder.CreateTrunc(LShrOp, getI16Ty(Builder, ExtRes->getType()));
+
+  I.replaceAllUsesWith(TruncRes);
+  I.eraseFromParent();
+
+  return true;
+}
+
 static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {
   const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
   if (!CNum)
@@ -357,7 +392,7 @@ bool AMDGPUCodeGenPrepare::visitBinaryOp
 
   // TODO: Should we promote smaller types that will be legalized to i16?
   if (ST->has16BitInsts() && isI16Ty(I.getType()) && DA->isUniform(&I))
-    Changed |= promoteUniformI16OpToI32Op(I);
+    Changed |= promoteUniformI16OpToI32(I);
 
   return Changed;
 }
@@ -368,7 +403,7 @@ bool AMDGPUCodeGenPrepare::visitICmpInst
   // TODO: Should we promote smaller types that will be legalized to i16?
   if (ST->has16BitInsts() && isI16Ty(I.getOperand(0)->getType()) &&
           isI16Ty(I.getOperand(1)->getType()) && DA->isUniform(&I))
-    Changed |= promoteUniformI16OpToI32Op(I);
+    Changed |= promoteUniformI16OpToI32(I);
 
   return Changed;
 }
@@ -378,7 +413,26 @@ bool AMDGPUCodeGenPrepare::visitSelectIn
 
   // TODO: Should we promote smaller types that will be legalized to i16?
   if (ST->has16BitInsts() && isI16Ty(I.getType()) && DA->isUniform(&I))
-    Changed |= promoteUniformI16OpToI32Op(I);
+    Changed |= promoteUniformI16OpToI32(I);
+
+  return Changed;
+}
+
+bool AMDGPUCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
+  switch (I.getIntrinsicID()) {
+  case Intrinsic::bitreverse:
+    return visitBitreverseIntrinsicInst(I);
+  default:
+    return false;
+  }
+}
+
+bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
+  bool Changed = false;
+
+  // TODO: Should we promote smaller types that will be legalized to i16?
+  if (ST->has16BitInsts() && isI16Ty(I.getType()) && DA->isUniform(&I))
+    Changed |= promoteUniformI16BitreverseIntrinsicToI32(I);
 
   return Changed;
 }

Modified: llvm/trunk/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll?rev=283415&r1=283414&r2=283415&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll Wed Oct  5 21:20:46 2016
@@ -1,856 +1,1040 @@
-; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck -check-prefix=SI %s
-; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=VI %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-; SI-NOT: zext
-; SI-NOT: sext
-; SI-NOT: trunc
-
-; VI-LABEL: @add_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = add i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @add_i16(
+; SI: %r = add i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = add i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @add_i16(i16 %a, i16 %b) {
   %r = add i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @add_nsw_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = add nsw i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @add_nsw_i16(
+; SI: %r = add nsw i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = add nsw i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @add_nsw_i16(i16 %a, i16 %b) {
   %r = add nsw i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @add_nuw_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = add nuw i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @add_nuw_i16(
+; SI: %r = add nuw i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = add nuw i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @add_nuw_i16(i16 %a, i16 %b) {
   %r = add nuw i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @add_nuw_nsw_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @add_nuw_nsw_i16(
+; SI: %r = add nuw nsw i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @add_nuw_nsw_i16(i16 %a, i16 %b) {
   %r = add nuw nsw i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @sub_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = sub i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @sub_i16(
+; SI: %r = sub i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = sub i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @sub_i16(i16 %a, i16 %b) {
   %r = sub i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @sub_nsw_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @sub_nsw_i16(
+; SI: %r = sub nsw i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @sub_nsw_i16(i16 %a, i16 %b) {
   %r = sub nsw i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @sub_nuw_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = sub nuw i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @sub_nuw_i16(
+; SI: %r = sub nuw i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @sub_nuw_i16(i16 %a, i16 %b) {
   %r = sub nuw i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @sub_nuw_nsw_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @sub_nuw_nsw_i16(
+; SI: %r = sub nuw nsw i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @sub_nuw_nsw_i16(i16 %a, i16 %b) {
   %r = sub nuw nsw i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @mul_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = mul i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @mul_i16(
+; SI: %r = mul i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = mul i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @mul_i16(i16 %a, i16 %b) {
   %r = mul i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @mul_nsw_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = mul nsw i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @mul_nsw_i16(
+; SI: %r = mul nsw i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = mul nsw i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @mul_nsw_i16(i16 %a, i16 %b) {
   %r = mul nsw i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @mul_nuw_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @mul_nuw_i16(
+; SI: %r = mul nuw i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @mul_nuw_i16(i16 %a, i16 %b) {
   %r = mul nuw i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @mul_nuw_nsw_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @mul_nuw_nsw_i16(
+; SI: %r = mul nuw nsw i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @mul_nuw_nsw_i16(i16 %a, i16 %b) {
   %r = mul nuw nsw i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @urem_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = urem i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @urem_i16(
+; SI: %r = urem i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = urem i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @urem_i16(i16 %a, i16 %b) {
   %r = urem i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @srem_i16(
+; GCN-LABEL: @srem_i16(
+; SI: %r = srem i16 %a, %b
+; SI-NEXT: ret i16 %r
 ; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = sext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = srem i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = srem i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @srem_i16(i16 %a, i16 %b) {
   %r = srem i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @shl_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = shl i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @shl_i16(
+; SI: %r = shl i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = shl i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @shl_i16(i16 %a, i16 %b) {
   %r = shl i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @shl_nsw_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = shl nsw i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @shl_nsw_i16(
+; SI: %r = shl nsw i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = shl nsw i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @shl_nsw_i16(i16 %a, i16 %b) {
   %r = shl nsw i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @shl_nuw_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = shl nuw i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @shl_nuw_i16(
+; SI: %r = shl nuw i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @shl_nuw_i16(i16 %a, i16 %b) {
   %r = shl nuw i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @shl_nuw_nsw_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @shl_nuw_nsw_i16(
+; SI: %r = shl nuw nsw i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @shl_nuw_nsw_i16(i16 %a, i16 %b) {
   %r = shl nuw nsw i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @lshr_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = lshr i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @lshr_i16(
+; SI: %r = lshr i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = lshr i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @lshr_i16(i16 %a, i16 %b) {
   %r = lshr i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @lshr_exact_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = lshr exact i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @lshr_exact_i16(
+; SI: %r = lshr exact i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @lshr_exact_i16(i16 %a, i16 %b) {
   %r = lshr exact i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @ashr_i16(
+; GCN-LABEL: @ashr_i16(
+; SI: %r = ashr i16 %a, %b
+; SI-NEXT: ret i16 %r
 ; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = sext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = ashr i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = ashr i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @ashr_i16(i16 %a, i16 %b) {
   %r = ashr i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @ashr_exact_i16(
+; GCN-LABEL: @ashr_exact_i16(
+; SI: %r = ashr exact i16 %a, %b
+; SI-NEXT: ret i16 %r
 ; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = sext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = ashr exact i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @ashr_exact_i16(i16 %a, i16 %b) {
   %r = ashr exact i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @and_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = and i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @and_i16(
+; SI: %r = and i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = and i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @and_i16(i16 %a, i16 %b) {
   %r = and i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @or_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = or i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @or_i16(
+; SI: %r = or i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = or i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @or_i16(i16 %a, i16 %b) {
   %r = or i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @xor_i16(
-; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
-; VI: %[[R_32:[0-9]+]] = xor i32 %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
-; VI: ret i16 %[[R_16]]
+; GCN-LABEL: @xor_i16(
+; SI: %r = xor i16 %a, %b
+; SI-NEXT: ret i16 %r
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = xor i32 %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
 define i16 @xor_i16(i16 %a, i16 %b) {
   %r = xor i16 %a, %b
   ret i16 %r
 }
 
-; VI-LABEL: @select_eq_i16(
+; GCN-LABEL: @select_eq_i16(
+; SI: %cmp = icmp eq i16 %a, %b
+; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
+; SI-NEXT: ret i16 %sel
 ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
-; VI: %[[CMP:[0-9]+]] = icmp eq i32 %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
-; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI: ret i16 %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq i32 %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
+; VI-NEXT: ret i16 %[[SEL_16]]
 define i16 @select_eq_i16(i16 %a, i16 %b) {
   %cmp = icmp eq i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
 }
 
-; VI-LABEL: @select_ne_i16(
+; GCN-LABEL: @select_ne_i16(
+; SI: %cmp = icmp ne i16 %a, %b
+; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
+; SI-NEXT: ret i16 %sel
 ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
-; VI: %[[CMP:[0-9]+]] = icmp ne i32 %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
-; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI: ret i16 %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne i32 %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
+; VI-NEXT: ret i16 %[[SEL_16]]
 define i16 @select_ne_i16(i16 %a, i16 %b) {
   %cmp = icmp ne i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
 }
 
-; VI-LABEL: @select_ugt_i16(
+; GCN-LABEL: @select_ugt_i16(
+; SI: %cmp = icmp ugt i16 %a, %b
+; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
+; SI-NEXT: ret i16 %sel
 ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
-; VI: %[[CMP:[0-9]+]] = icmp ugt i32 %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
-; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI: ret i16 %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt i32 %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
+; VI-NEXT: ret i16 %[[SEL_16]]
 define i16 @select_ugt_i16(i16 %a, i16 %b) {
   %cmp = icmp ugt i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
 }
 
-; VI-LABEL: @select_uge_i16(
+; GCN-LABEL: @select_uge_i16(
+; SI: %cmp = icmp uge i16 %a, %b
+; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
+; SI-NEXT: ret i16 %sel
 ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
-; VI: %[[CMP:[0-9]+]] = icmp uge i32 %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
-; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI: ret i16 %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge i32 %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
+; VI-NEXT: ret i16 %[[SEL_16]]
 define i16 @select_uge_i16(i16 %a, i16 %b) {
   %cmp = icmp uge i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
 }
 
-; VI-LABEL: @select_ult_i16(
+; GCN-LABEL: @select_ult_i16(
+; SI: %cmp = icmp ult i16 %a, %b
+; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
+; SI-NEXT: ret i16 %sel
 ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
-; VI: %[[CMP:[0-9]+]] = icmp ult i32 %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
-; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI: ret i16 %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult i32 %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
+; VI-NEXT: ret i16 %[[SEL_16]]
 define i16 @select_ult_i16(i16 %a, i16 %b) {
   %cmp = icmp ult i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
 }
 
-; VI-LABEL: @select_ule_i16(
+; GCN-LABEL: @select_ule_i16(
+; SI: %cmp = icmp ule i16 %a, %b
+; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
+; SI-NEXT: ret i16 %sel
 ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
-; VI: %[[CMP:[0-9]+]] = icmp ule i32 %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
-; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
-; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI: ret i16 %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule i32 %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
+; VI-NEXT: ret i16 %[[SEL_16]]
 define i16 @select_ule_i16(i16 %a, i16 %b) {
   %cmp = icmp ule i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
 }
 
-; VI-LABEL: @select_sgt_i16(
+; GCN-LABEL: @select_sgt_i16(
+; SI: %cmp = icmp sgt i16 %a, %b
+; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
+; SI-NEXT: ret i16 %sel
 ; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
-; VI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
-; VI: %[[CMP:[0-9]+]] = icmp sgt i32 %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
-; VI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
-; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI: ret i16 %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt i32 %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
+; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
+; VI-NEXT: ret i16 %[[SEL_16]]
 define i16 @select_sgt_i16(i16 %a, i16 %b) {
   %cmp = icmp sgt i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
 }
 
-; VI-LABEL: @select_sge_i16(
+; GCN-LABEL: @select_sge_i16(
+; SI: %cmp = icmp sge i16 %a, %b
+; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
+; SI-NEXT: ret i16 %sel
 ; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
-; VI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
-; VI: %[[CMP:[0-9]+]] = icmp sge i32 %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
-; VI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
-; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI: ret i16 %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge i32 %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
+; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
+; VI-NEXT: ret i16 %[[SEL_16]]
 define i16 @select_sge_i16(i16 %a, i16 %b) {
   %cmp = icmp sge i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
 }
 
-; VI-LABEL: @select_slt_i16(
+; GCN-LABEL: @select_slt_i16(
+; SI: %cmp = icmp slt i16 %a, %b
+; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
+; SI-NEXT: ret i16 %sel
 ; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
-; VI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
-; VI: %[[CMP:[0-9]+]] = icmp slt i32 %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
-; VI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
-; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI: ret i16 %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt i32 %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
+; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
+; VI-NEXT: ret i16 %[[SEL_16]]
 define i16 @select_slt_i16(i16 %a, i16 %b) {
   %cmp = icmp slt i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
 }
 
-; VI-LABEL: @select_sle_i16(
+; GCN-LABEL: @select_sle_i16(
+; SI: %cmp = icmp sle i16 %a, %b
+; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
+; SI-NEXT: ret i16 %sel
 ; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
-; VI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
-; VI: %[[CMP:[0-9]+]] = icmp sle i32 %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
-; VI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
-; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
-; VI: ret i16 %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle i32 %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
+; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
+; VI-NEXT: ret i16 %[[SEL_16]]
 define i16 @select_sle_i16(i16 %a, i16 %b) {
   %cmp = icmp sle i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
 }
 
-; VI-LABEL: @add_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = add <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+declare i16 @llvm.bitreverse.i16(i16)
+; GCN-LABEL: @bitreverse_i16(
+; SI: %brev = call i16 @llvm.bitreverse.i16(i16 %a)
+; SI-NEXT: ret i16 %brev
+; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
+; VI-NEXT: %[[R_32:[0-9]+]] = call i32 @llvm.bitreverse.i32(i32 %[[A_32]])
+; VI-NEXT: %[[S_32:[0-9]+]] = lshr i32 %[[R_32]], 16
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[S_32]] to i16
+; VI-NEXT: ret i16 %[[R_16]]
+define i16 @bitreverse_i16(i16 %a) {
+  %brev = call i16 @llvm.bitreverse.i16(i16 %a)
+  ret i16 %brev
+}
+
+; GCN-LABEL: @add_3xi16(
+; SI: %r = add <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = add <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @add_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = add <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @add_nsw_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = add nsw <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @add_nsw_3xi16(
+; SI: %r = add nsw <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = add nsw <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @add_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = add nsw <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @add_nuw_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = add nuw <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @add_nuw_3xi16(
+; SI: %r = add nuw <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = add nuw <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @add_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = add nuw <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @add_nuw_nsw_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @add_nuw_nsw_3xi16(
+; SI: %r = add nuw nsw <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @add_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = add nuw nsw <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @sub_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = sub <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @sub_3xi16(
+; SI: %r = sub <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = sub <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @sub_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = sub <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @sub_nsw_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @sub_nsw_3xi16(
+; SI: %r = sub nsw <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @sub_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = sub nsw <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @sub_nuw_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = sub nuw <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @sub_nuw_3xi16(
+; SI: %r = sub nuw <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @sub_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = sub nuw <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @sub_nuw_nsw_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @sub_nuw_nsw_3xi16(
+; SI: %r = sub nuw nsw <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @sub_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = sub nuw nsw <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @mul_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = mul <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @mul_3xi16(
+; SI: %r = mul <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = mul <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @mul_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = mul <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @mul_nsw_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = mul nsw <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @mul_nsw_3xi16(
+; SI: %r = mul nsw <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = mul nsw <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @mul_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = mul nsw <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @mul_nuw_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @mul_nuw_3xi16(
+; SI: %r = mul nuw <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @mul_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = mul nuw <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @mul_nuw_nsw_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @mul_nuw_nsw_3xi16(
+; SI: %r = mul nuw nsw <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @mul_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = mul nuw nsw <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @urem_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = urem <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @urem_3xi16(
+; SI: %r = urem <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = urem <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @urem_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = urem <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @srem_3xi16(
+; GCN-LABEL: @srem_3xi16(
+; SI: %r = srem <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
 ; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = srem <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = srem <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @srem_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = srem <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @shl_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = shl <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @shl_3xi16(
+; SI: %r = shl <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = shl <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @shl_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = shl <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @shl_nsw_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = shl nsw <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @shl_nsw_3xi16(
+; SI: %r = shl nsw <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = shl nsw <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @shl_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = shl nsw <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @shl_nuw_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = shl nuw <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @shl_nuw_3xi16(
+; SI: %r = shl nuw <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @shl_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = shl nuw <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @shl_nuw_nsw_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @shl_nuw_nsw_3xi16(
+; SI: %r = shl nuw nsw <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @shl_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = shl nuw nsw <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @lshr_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = lshr <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @lshr_3xi16(
+; SI: %r = lshr <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = lshr <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @lshr_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = lshr <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @lshr_exact_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = lshr exact <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @lshr_exact_3xi16(
+; SI: %r = lshr exact <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @lshr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = lshr exact <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @ashr_3xi16(
+; GCN-LABEL: @ashr_3xi16(
+; SI: %r = ashr <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
 ; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = ashr <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = ashr <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @ashr_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = ashr <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @ashr_exact_3xi16(
+; GCN-LABEL: @ashr_exact_3xi16(
+; SI: %r = ashr exact <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
 ; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = ashr exact <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @ashr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = ashr exact <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @and_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = and <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @and_3xi16(
+; SI: %r = and <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = and <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @and_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = and <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @or_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = or <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @or_3xi16(
+; SI: %r = or <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = or <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @or_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = or <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @xor_3xi16(
-; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[R_32:[0-9]+]] = xor <3 x i32> %[[A_32]], %[[B_32]]
-; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[R_16]]
+; GCN-LABEL: @xor_3xi16(
+; SI: %r = xor <3 x i16> %a, %b
+; SI-NEXT: ret <3 x i16> %r
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = xor <3 x i32> %[[A_32]], %[[B_32]]
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
 define <3 x i16> @xor_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %r = xor <3 x i16> %a, %b
   ret <3 x i16> %r
 }
 
-; VI-LABEL: @select_eq_3xi16(
+; GCN-LABEL: @select_eq_3xi16(
+; SI: %cmp = icmp eq <3 x i16> %a, %b
+; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
+; SI-NEXT: ret <3 x i16> %sel
 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[CMP:[0-9]+]] = icmp eq <3 x i32> %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq <3 x i32> %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[SEL_16]]
 define <3 x i16> @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %cmp = icmp eq <3 x i16> %a, %b
   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
   ret <3 x i16> %sel
 }
 
-; VI-LABEL: @select_ne_3xi16(
+; GCN-LABEL: @select_ne_3xi16(
+; SI: %cmp = icmp ne <3 x i16> %a, %b
+; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
+; SI-NEXT: ret <3 x i16> %sel
 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[CMP:[0-9]+]] = icmp ne <3 x i32> %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne <3 x i32> %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[SEL_16]]
 define <3 x i16> @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %cmp = icmp ne <3 x i16> %a, %b
   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
   ret <3 x i16> %sel
 }
 
-; VI-LABEL: @select_ugt_3xi16(
+; GCN-LABEL: @select_ugt_3xi16(
+; SI: %cmp = icmp ugt <3 x i16> %a, %b
+; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
+; SI-NEXT: ret <3 x i16> %sel
 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[CMP:[0-9]+]] = icmp ugt <3 x i32> %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt <3 x i32> %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[SEL_16]]
 define <3 x i16> @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %cmp = icmp ugt <3 x i16> %a, %b
   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
   ret <3 x i16> %sel
 }
 
-; VI-LABEL: @select_uge_3xi16(
+; GCN-LABEL: @select_uge_3xi16(
+; SI: %cmp = icmp uge <3 x i16> %a, %b
+; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
+; SI-NEXT: ret <3 x i16> %sel
 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[CMP:[0-9]+]] = icmp uge <3 x i32> %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge <3 x i32> %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[SEL_16]]
 define <3 x i16> @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %cmp = icmp uge <3 x i16> %a, %b
   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
   ret <3 x i16> %sel
 }
 
-; VI-LABEL: @select_ult_3xi16(
+; GCN-LABEL: @select_ult_3xi16(
+; SI: %cmp = icmp ult <3 x i16> %a, %b
+; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
+; SI-NEXT: ret <3 x i16> %sel
 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[CMP:[0-9]+]] = icmp ult <3 x i32> %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult <3 x i32> %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[SEL_16]]
 define <3 x i16> @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %cmp = icmp ult <3 x i16> %a, %b
   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
   ret <3 x i16> %sel
 }
 
-; VI-LABEL: @select_ule_3xi16(
+; GCN-LABEL: @select_ule_3xi16(
+; SI: %cmp = icmp ule <3 x i16> %a, %b
+; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
+; SI-NEXT: ret <3 x i16> %sel
 ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[CMP:[0-9]+]] = icmp ule <3 x i32> %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
-; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule <3 x i32> %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[SEL_16]]
 define <3 x i16> @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %cmp = icmp ule <3 x i16> %a, %b
   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
   ret <3 x i16> %sel
 }
 
-; VI-LABEL: @select_sgt_3xi16(
+; GCN-LABEL: @select_sgt_3xi16(
+; SI: %cmp = icmp sgt <3 x i16> %a, %b
+; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
+; SI-NEXT: ret <3 x i16> %sel
 ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
-; VI: %[[CMP:[0-9]+]] = icmp sgt <3 x i32> %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
-; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt <3 x i32> %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[SEL_16]]
 define <3 x i16> @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %cmp = icmp sgt <3 x i16> %a, %b
   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
   ret <3 x i16> %sel
 }
 
-; VI-LABEL: @select_sge_3xi16(
+; GCN-LABEL: @select_sge_3xi16(
+; SI: %cmp = icmp sge <3 x i16> %a, %b
+; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
+; SI-NEXT: ret <3 x i16> %sel
 ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
-; VI: %[[CMP:[0-9]+]] = icmp sge <3 x i32> %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
-; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge <3 x i32> %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[SEL_16]]
 define <3 x i16> @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %cmp = icmp sge <3 x i16> %a, %b
   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
   ret <3 x i16> %sel
 }
 
-; VI-LABEL: @select_slt_3xi16(
+; GCN-LABEL: @select_slt_3xi16(
+; SI: %cmp = icmp slt <3 x i16> %a, %b
+; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
+; SI-NEXT: ret <3 x i16> %sel
 ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
-; VI: %[[CMP:[0-9]+]] = icmp slt <3 x i32> %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
-; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt <3 x i32> %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[SEL_16]]
 define <3 x i16> @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %cmp = icmp slt <3 x i16> %a, %b
   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
   ret <3 x i16> %sel
 }
 
-; VI-LABEL: @select_sle_3xi16(
+; GCN-LABEL: @select_sle_3xi16(
+; SI: %cmp = icmp sle <3 x i16> %a, %b
+; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
+; SI-NEXT: ret <3 x i16> %sel
 ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
-; VI: %[[CMP:[0-9]+]] = icmp sle <3 x i32> %[[A_32_0]], %[[B_32_0]]
-; VI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
-; VI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
-; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
-; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
-; VI: ret <3 x i16> %[[SEL_16]]
+; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle <3 x i32> %[[A_32_0]], %[[B_32_0]]
+; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
+; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
+; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[SEL_16]]
 define <3 x i16> @select_sle_3xi16(<3 x i16> %a, <3 x i16> %b) {
   %cmp = icmp sle <3 x i16> %a, %b
   %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
   ret <3 x i16> %sel
 }
+
+declare <3 x i16> @llvm.bitreverse.v3i16(<3 x i16>)
+; GCN-LABEL: @bitreverse_3xi16(
+; SI: %brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a)
+; SI-NEXT: ret <3 x i16> %brev
+; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
+; VI-NEXT: %[[R_32:[0-9]+]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> %[[A_32]])
+; VI-NEXT: %[[S_32:[0-9]+]] = lshr <3 x i32> %[[R_32]], <i32 16, i32 16, i32 16>
+; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[S_32]] to <3 x i16>
+; VI-NEXT: ret <3 x i16> %[[R_16]]
+define <3 x i16> @bitreverse_3xi16(<3 x i16> %a) {
+  %brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a)
+  ret <3 x i16> %brev
+}




More information about the llvm-commits mailing list