[llvm] [DirectX] Add lowering support for `llvm.fsh[l|r].*` (PR #170570)
Finn Plummer via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 11 13:18:29 PST 2025
https://github.com/inbelic updated https://github.com/llvm/llvm-project/pull/170570
>From 5441b451d1e8f35e994ca3513f4896de58a719e0 Mon Sep 17 00:00:00 2001
From: Finn Plummer <mail at inbelic.dev>
Date: Wed, 3 Dec 2025 15:05:14 -0800
Subject: [PATCH 1/6] [DirectX] Add lowering for fshl
---
.../Target/DirectX/DXILIntrinsicExpansion.cpp | 28 +++++++
llvm/test/CodeGen/DirectX/fshl.ll | 82 +++++++++++++++++++
2 files changed, 110 insertions(+)
create mode 100644 llvm/test/CodeGen/DirectX/fshl.ll
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index e0d2dbde92150..b02e03411b47f 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
@@ -200,6 +201,7 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::assume:
case Intrinsic::abs:
case Intrinsic::atan2:
+ case Intrinsic::fshl:
case Intrinsic::exp:
case Intrinsic::is_fpclass:
case Intrinsic::log:
@@ -656,6 +658,29 @@ static Value *expandAtan2Intrinsic(CallInst *Orig) {
return Result;
}
+static Value *expandFunnelShiftIntrinsic(CallInst *Orig) {
+ Type *Ty = Orig->getType();
+ Value *A = Orig->getOperand(0);
+ Value *B = Orig->getOperand(1);
+ Value *Shift = Orig->getOperand(2);
+
+ IRBuilder<> Builder(Orig);
+
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ Constant *Mask = ConstantInt::get(Ty, BitWidth - 1);
+ Constant *Size = ConstantInt::get(Ty, BitWidth);
+
+ // The shift is not required to be masked as DXIL op will do so automatically
+ Value *Left = Builder.CreateShl(A, Shift);
+
+ Value *MaskedShift = Builder.CreateAnd(Shift, Mask);
+ Value *InverseShift = Builder.CreateSub(Size, MaskedShift);
+ Value *Right = Builder.CreateLShr(B, InverseShift);
+
+ Value *Result = Builder.CreateOr(Left, Right);
+ return Result;
+}
+
static Value *expandPowIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId) {
Value *X = Orig->getOperand(0);
@@ -995,6 +1020,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
case Intrinsic::atan2:
Result = expandAtan2Intrinsic(Orig);
break;
+ case Intrinsic::fshl:
+ Result = expandFunnelShiftIntrinsic(Orig);
+ break;
case Intrinsic::exp:
Result = expandExpIntrinsic(Orig);
break;
diff --git a/llvm/test/CodeGen/DirectX/fshl.ll b/llvm/test/CodeGen/DirectX/fshl.ll
new file mode 100644
index 0000000000000..0b542525faea6
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/fshl.ll
@@ -0,0 +1,82 @@
+; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+;
+; Make sure dxil operation function calls for funnel shifts left are generated.
+
+; CHECK-LABEL: define{{.*}}@fshl_i16(
+; CHECK-SAME: i16 %[[A:.*]], i16 %[[B:.*]], i16 %[[SHIFT:.*]])
+define noundef i16 @fshl_i16(i16 %a, i16 %b, i16 %shift) {
+entry:
+; CHECK: %[[LEFT:.*]] = shl i16 %[[A]], %[[SHIFT]]
+; CHECK: %[[MASKED_SHIFT:.*]] = and i16 %[[SHIFT]], 15
+; CHECK: %[[INVERSE_SHIFT:.*]] = sub i16 16, %[[MASKED_SHIFT]]
+; CHECK: %[[RIGHT:.*]] = lshr i16 %[[B]], %[[INVERSE_SHIFT]]
+; CHECK: %[[RES:.*]] = or i16 %[[LEFT]], %[[RIGHT]]
+; CHECK: ret i16 %[[RES]]
+ %fsh = call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 %shift)
+ ret i16 %fsh
+}
+
+declare i16 @llvm.fshl.i16(i16, i16, i16)
+
+; CHECK-LABEL: define{{.*}}@fshl_v1i32(
+; CHECK-SAME: <1 x i32> %[[A_VEC:.*]], <1 x i32> %[[B_VEC:.*]], <1 x i32> %[[SHIFT_VEC:.*]])
+define noundef <1 x i32> @fshl_v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift) {
+entry:
+; CHECK: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0
+; CHECK: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0
+; CHECK: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0
+; CHECK: %[[LEFT:.*]] = shl i32 %[[A]], %[[SHIFT]]
+; CHECK: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31
+; CHECK: %[[INVERSE_SHIFT:.*]] = sub i32 32, %[[MASKED_SHIFT]]
+; CHECK: %[[RIGHT:.*]] = lshr i32 %[[B]], %[[INVERSE_SHIFT]]
+; CHECK: %[[RES:.*]] = or i32 %[[LEFT]], %[[RIGHT]]
+; CHECK: %[[RES_VEC:.*]] = insertelement <1 x i32> poison, i32 %[[RES]], i64 0
+; CHECK: ret <1 x i32> %[[RES_VEC]]
+ %fsh = call <1 x i32> @llvm.fshl.v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift)
+ ret <1 x i32> %fsh
+}
+
+declare <1 x i32> @llvm.fshl.v1i32(<1 x i32>, <1 x i32>, <1 x i32>)
+
+; CHECK-LABEL: define{{.*}}@fshl_v1i64(
+; CHECK-SAME: <3 x i64> %[[A_VEC:.*]], <3 x i64> %[[B_VEC:.*]], <3 x i64> %[[SHIFT_VEC:.*]])
+define noundef <3 x i64> @fshl_v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift) {
+entry:
+; CHECK: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0
+; CHECK: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0
+; CHECK: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0
+; CHECK: %[[LEFT0:.*]] = shl i64 %[[A0]], %[[SHIFT0]]
+; CHECK: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63
+; CHECK: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]]
+; CHECK: %[[RIGHT0:.*]] = lshr i64 %[[B0]], %[[INVERSE_SHIFT0]]
+; CHECK: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]]
+;
+; CHECK: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1
+; CHECK: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1
+; CHECK: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1
+; CHECK: %[[LEFT1:.*]] = shl i64 %[[A1]], %[[SHIFT1]]
+; CHECK: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63
+; CHECK: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]]
+; CHECK: %[[RIGHT1:.*]] = lshr i64 %[[B1]], %[[INVERSE_SHIFT1]]
+; CHECK: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]]
+;
+; CHECK: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2
+; CHECK: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2
+; CHECK: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2
+; CHECK: %[[LEFT2:.*]] = shl i64 %[[A2]], %[[SHIFT2]]
+; CHECK: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63
+; CHECK: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]]
+; CHECK: %[[RIGHT2:.*]] = lshr i64 %[[B2]], %[[INVERSE_SHIFT2]]
+; CHECK: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]]
+;
+; CHECK: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0
+; CHECK: %[[INSERT1:.*]] = insertelement <3 x i64> %[[INSERT0]], i64 %[[RES1]], i64 1
+; CHECK: %[[RES_VEC:.*]] = insertelement <3 x i64> %[[INSERT1]], i64 %[[RES2]], i64 2
+;
+; CHECK: ret <3 x i64> %[[RES_VEC]]
+ %fsh = call <3 x i64> @llvm.fshl.v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift)
+ ret <3 x i64> %fsh
+}
+
+declare <3 x i64> @llvm.fshl.v1i64(<3 x i64>, <3 x i64>, <3 x i64>)
>From bd36e8b59612d3ad8e57d297f5a9bb5ce3308332 Mon Sep 17 00:00:00 2001
From: Finn Plummer <mail at inbelic.dev>
Date: Wed, 3 Dec 2025 15:11:32 -0800
Subject: [PATCH 2/6] [DirectX] Add lowering for fshr
---
.../Target/DirectX/DXILIntrinsicExpansion.cpp | 13 ++-
llvm/test/CodeGen/DirectX/fshr.ll | 82 +++++++++++++++++++
2 files changed, 92 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/DirectX/fshr.ll
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index b02e03411b47f..4616f0c98bb9a 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -202,6 +202,7 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::abs:
case Intrinsic::atan2:
case Intrinsic::fshl:
+ case Intrinsic::fshr:
case Intrinsic::exp:
case Intrinsic::is_fpclass:
case Intrinsic::log:
@@ -658,6 +659,7 @@ static Value *expandAtan2Intrinsic(CallInst *Orig) {
return Result;
}
+template <bool LeftFunnel>
static Value *expandFunnelShiftIntrinsic(CallInst *Orig) {
Type *Ty = Orig->getType();
Value *A = Orig->getOperand(0);
@@ -671,11 +673,13 @@ static Value *expandFunnelShiftIntrinsic(CallInst *Orig) {
Constant *Size = ConstantInt::get(Ty, BitWidth);
// The shift is not required to be masked as DXIL op will do so automatically
- Value *Left = Builder.CreateShl(A, Shift);
+ Value *Left =
+ LeftFunnel ? Builder.CreateShl(A, Shift) : Builder.CreateLShr(B, Shift);
Value *MaskedShift = Builder.CreateAnd(Shift, Mask);
Value *InverseShift = Builder.CreateSub(Size, MaskedShift);
- Value *Right = Builder.CreateLShr(B, InverseShift);
+ Value *Right = LeftFunnel ? Builder.CreateLShr(B, InverseShift)
+ : Builder.CreateShl(A, InverseShift);
Value *Result = Builder.CreateOr(Left, Right);
return Result;
@@ -1021,7 +1025,10 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
Result = expandAtan2Intrinsic(Orig);
break;
case Intrinsic::fshl:
- Result = expandFunnelShiftIntrinsic(Orig);
+ Result = expandFunnelShiftIntrinsic<true>(Orig);
+ break;
+ case Intrinsic::fshr:
+ Result = expandFunnelShiftIntrinsic<false>(Orig);
break;
case Intrinsic::exp:
Result = expandExpIntrinsic(Orig);
diff --git a/llvm/test/CodeGen/DirectX/fshr.ll b/llvm/test/CodeGen/DirectX/fshr.ll
new file mode 100644
index 0000000000000..c99c38297a7f6
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/fshr.ll
@@ -0,0 +1,82 @@
+; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+;
+; Make sure dxil operation function calls for funnel shifts right are generated.
+
+; CHECK-LABEL: define{{.*}}@fshr_i16(
+; CHECK-SAME: i16 %[[A:.*]], i16 %[[B:.*]], i16 %[[SHIFT:.*]])
+define noundef i16 @fshr_i16(i16 %a, i16 %b, i16 %shift) {
+entry:
+; CHECK: %[[LEFT:.*]] = lshr i16 %[[B]], %[[SHIFT]]
+; CHECK: %[[MASKED_SHIFT:.*]] = and i16 %[[SHIFT]], 15
+; CHECK: %[[INVERSE_SHIFT:.*]] = sub i16 16, %[[MASKED_SHIFT]]
+; CHECK: %[[RIGHT:.*]] = shl i16 %[[A]], %[[INVERSE_SHIFT]]
+; CHECK: %[[RES:.*]] = or i16 %[[LEFT]], %[[RIGHT]]
+; CHECK: ret i16 %[[RES]]
+ %fsh = call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %shift)
+ ret i16 %fsh
+}
+
+declare i16 @llvm.fshr.i16(i16, i16, i16)
+
+; CHECK-LABEL: define{{.*}}@fshr_v1i32(
+; CHECK-SAME: <1 x i32> %[[A_VEC:.*]], <1 x i32> %[[B_VEC:.*]], <1 x i32> %[[SHIFT_VEC:.*]])
+define noundef <1 x i32> @fshr_v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift) {
+entry:
+; CHECK: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0
+; CHECK: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0
+; CHECK: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0
+; CHECK: %[[LEFT:.*]] = lshr i32 %[[B]], %[[SHIFT]]
+; CHECK: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31
+; CHECK: %[[INVERSE_SHIFT:.*]] = sub i32 32, %[[MASKED_SHIFT]]
+; CHECK: %[[RIGHT:.*]] = shl i32 %[[A]], %[[INVERSE_SHIFT]]
+; CHECK: %[[RES:.*]] = or i32 %[[LEFT]], %[[RIGHT]]
+; CHECK: %[[RES_VEC:.*]] = insertelement <1 x i32> poison, i32 %[[RES]], i64 0
+; CHECK: ret <1 x i32> %[[RES_VEC]]
+ %fsh = call <1 x i32> @llvm.fshr.v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift)
+ ret <1 x i32> %fsh
+}
+
+declare <1 x i32> @llvm.fshr.v1i32(<1 x i32>, <1 x i32>, <1 x i32>)
+
+; CHECK-LABEL: define{{.*}}@fshr_v1i64(
+; CHECK-SAME: <3 x i64> %[[A_VEC:.*]], <3 x i64> %[[B_VEC:.*]], <3 x i64> %[[SHIFT_VEC:.*]])
+define noundef <3 x i64> @fshr_v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift) {
+entry:
+; CHECK: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0
+; CHECK: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0
+; CHECK: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0
+; CHECK: %[[LEFT0:.*]] = lshr i64 %[[B0]], %[[SHIFT0]]
+; CHECK: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63
+; CHECK: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]]
+; CHECK: %[[RIGHT0:.*]] = shl i64 %[[A0]], %[[INVERSE_SHIFT0]]
+; CHECK: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]]
+;
+; CHECK: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1
+; CHECK: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1
+; CHECK: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1
+; CHECK: %[[LEFT1:.*]] = lshr i64 %[[B1]], %[[SHIFT1]]
+; CHECK: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63
+; CHECK: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]]
+; CHECK: %[[RIGHT1:.*]] = shl i64 %[[A1]], %[[INVERSE_SHIFT1]]
+; CHECK: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]]
+;
+; CHECK: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2
+; CHECK: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2
+; CHECK: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2
+; CHECK: %[[LEFT2:.*]] = lshr i64 %[[B2]], %[[SHIFT2]]
+; CHECK: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63
+; CHECK: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]]
+; CHECK: %[[RIGHT2:.*]] = shl i64 %[[A2]], %[[INVERSE_SHIFT2]]
+; CHECK: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]]
+;
+; CHECK: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0
+; CHECK: %[[INSERT1:.*]] = insertelement <3 x i64> %[[INSERT0]], i64 %[[RES1]], i64 1
+; CHECK: %[[RES_VEC:.*]] = insertelement <3 x i64> %[[INSERT1]], i64 %[[RES2]], i64 2
+;
+; CHECK: ret <3 x i64> %[[RES_VEC]]
+ %fsh = call <3 x i64> @llvm.fshr.v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift)
+ ret <3 x i64> %fsh
+}
+
+declare <3 x i64> @llvm.fshr.v1i64(<3 x i64>, <3 x i64>, <3 x i64>)
>From 7f46c8e0e148b0538c18d161772bb91d22a1b760 Mon Sep 17 00:00:00 2001
From: Finn Plummer <mail at inbelic.dev>
Date: Fri, 5 Dec 2025 09:21:57 -0800
Subject: [PATCH 3/6] review: make stricter with check-next
---
llvm/test/CodeGen/DirectX/fshl.ll | 91 ++++++++++++++++---------------
llvm/test/CodeGen/DirectX/fshr.ll | 91 ++++++++++++++++---------------
2 files changed, 94 insertions(+), 88 deletions(-)
diff --git a/llvm/test/CodeGen/DirectX/fshl.ll b/llvm/test/CodeGen/DirectX/fshl.ll
index 0b542525faea6..a1f37ebe1d554 100644
--- a/llvm/test/CodeGen/DirectX/fshl.ll
+++ b/llvm/test/CodeGen/DirectX/fshl.ll
@@ -7,12 +7,13 @@
; CHECK-SAME: i16 %[[A:.*]], i16 %[[B:.*]], i16 %[[SHIFT:.*]])
define noundef i16 @fshl_i16(i16 %a, i16 %b, i16 %shift) {
entry:
-; CHECK: %[[LEFT:.*]] = shl i16 %[[A]], %[[SHIFT]]
-; CHECK: %[[MASKED_SHIFT:.*]] = and i16 %[[SHIFT]], 15
-; CHECK: %[[INVERSE_SHIFT:.*]] = sub i16 16, %[[MASKED_SHIFT]]
-; CHECK: %[[RIGHT:.*]] = lshr i16 %[[B]], %[[INVERSE_SHIFT]]
-; CHECK: %[[RES:.*]] = or i16 %[[LEFT]], %[[RIGHT]]
-; CHECK: ret i16 %[[RES]]
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %[[LEFT:.*]] = shl i16 %[[A]], %[[SHIFT]]
+; CHECK-NEXT: %[[MASKED_SHIFT:.*]] = and i16 %[[SHIFT]], 15
+; CHECK-NEXT: %[[INVERSE_SHIFT:.*]] = sub i16 16, %[[MASKED_SHIFT]]
+; CHECK-NEXT: %[[RIGHT:.*]] = lshr i16 %[[B]], %[[INVERSE_SHIFT]]
+; CHECK-NEXT: %[[RES:.*]] = or i16 %[[LEFT]], %[[RIGHT]]
+; CHECK-NEXT: ret i16 %[[RES]]
%fsh = call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 %shift)
ret i16 %fsh
}
@@ -23,16 +24,17 @@ declare i16 @llvm.fshl.i16(i16, i16, i16)
; CHECK-SAME: <1 x i32> %[[A_VEC:.*]], <1 x i32> %[[B_VEC:.*]], <1 x i32> %[[SHIFT_VEC:.*]])
define noundef <1 x i32> @fshl_v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift) {
entry:
-; CHECK: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0
-; CHECK: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0
-; CHECK: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0
-; CHECK: %[[LEFT:.*]] = shl i32 %[[A]], %[[SHIFT]]
-; CHECK: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31
-; CHECK: %[[INVERSE_SHIFT:.*]] = sub i32 32, %[[MASKED_SHIFT]]
-; CHECK: %[[RIGHT:.*]] = lshr i32 %[[B]], %[[INVERSE_SHIFT]]
-; CHECK: %[[RES:.*]] = or i32 %[[LEFT]], %[[RIGHT]]
-; CHECK: %[[RES_VEC:.*]] = insertelement <1 x i32> poison, i32 %[[RES]], i64 0
-; CHECK: ret <1 x i32> %[[RES_VEC]]
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0
+; CHECK-NEXT: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0
+; CHECK-NEXT: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0
+; CHECK-NEXT: %[[LEFT:.*]] = shl i32 %[[A]], %[[SHIFT]]
+; CHECK-NEXT: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31
+; CHECK-NEXT: %[[INVERSE_SHIFT:.*]] = sub i32 32, %[[MASKED_SHIFT]]
+; CHECK-NEXT: %[[RIGHT:.*]] = lshr i32 %[[B]], %[[INVERSE_SHIFT]]
+; CHECK-NEXT: %[[RES:.*]] = or i32 %[[LEFT]], %[[RIGHT]]
+; CHECK-NEXT: %[[RES_VEC:.*]] = insertelement <1 x i32> poison, i32 %[[RES]], i64 0
+; CHECK-NEXT: ret <1 x i32> %[[RES_VEC]]
%fsh = call <1 x i32> @llvm.fshl.v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift)
ret <1 x i32> %fsh
}
@@ -43,38 +45,39 @@ declare <1 x i32> @llvm.fshl.v1i32(<1 x i32>, <1 x i32>, <1 x i32>)
; CHECK-SAME: <3 x i64> %[[A_VEC:.*]], <3 x i64> %[[B_VEC:.*]], <3 x i64> %[[SHIFT_VEC:.*]])
define noundef <3 x i64> @fshl_v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift) {
entry:
-; CHECK: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0
-; CHECK: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0
-; CHECK: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0
-; CHECK: %[[LEFT0:.*]] = shl i64 %[[A0]], %[[SHIFT0]]
-; CHECK: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63
-; CHECK: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]]
-; CHECK: %[[RIGHT0:.*]] = lshr i64 %[[B0]], %[[INVERSE_SHIFT0]]
-; CHECK: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]]
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0
+; CHECK-NEXT: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0
+; CHECK-NEXT: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0
+; CHECK-NEXT: %[[LEFT0:.*]] = shl i64 %[[A0]], %[[SHIFT0]]
+; CHECK-NEXT: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63
+; CHECK-NEXT: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]]
+; CHECK-NEXT: %[[RIGHT0:.*]] = lshr i64 %[[B0]], %[[INVERSE_SHIFT0]]
+; CHECK-NEXT: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]]
;
-; CHECK: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1
-; CHECK: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1
-; CHECK: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1
-; CHECK: %[[LEFT1:.*]] = shl i64 %[[A1]], %[[SHIFT1]]
-; CHECK: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63
-; CHECK: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]]
-; CHECK: %[[RIGHT1:.*]] = lshr i64 %[[B1]], %[[INVERSE_SHIFT1]]
-; CHECK: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]]
+; CHECK-NEXT: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1
+; CHECK-NEXT: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1
+; CHECK-NEXT: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1
+; CHECK-NEXT: %[[LEFT1:.*]] = shl i64 %[[A1]], %[[SHIFT1]]
+; CHECK-NEXT: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63
+; CHECK-NEXT: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]]
+; CHECK-NEXT: %[[RIGHT1:.*]] = lshr i64 %[[B1]], %[[INVERSE_SHIFT1]]
+; CHECK-NEXT: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]]
;
-; CHECK: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2
-; CHECK: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2
-; CHECK: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2
-; CHECK: %[[LEFT2:.*]] = shl i64 %[[A2]], %[[SHIFT2]]
-; CHECK: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63
-; CHECK: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]]
-; CHECK: %[[RIGHT2:.*]] = lshr i64 %[[B2]], %[[INVERSE_SHIFT2]]
-; CHECK: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]]
+; CHECK-NEXT: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2
+; CHECK-NEXT: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2
+; CHECK-NEXT: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2
+; CHECK-NEXT: %[[LEFT2:.*]] = shl i64 %[[A2]], %[[SHIFT2]]
+; CHECK-NEXT: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63
+; CHECK-NEXT: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]]
+; CHECK-NEXT: %[[RIGHT2:.*]] = lshr i64 %[[B2]], %[[INVERSE_SHIFT2]]
+; CHECK-NEXT: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]]
;
-; CHECK: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0
-; CHECK: %[[INSERT1:.*]] = insertelement <3 x i64> %[[INSERT0]], i64 %[[RES1]], i64 1
-; CHECK: %[[RES_VEC:.*]] = insertelement <3 x i64> %[[INSERT1]], i64 %[[RES2]], i64 2
+; CHECK-NEXT: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0
+; CHECK-NEXT: %[[INSERT1:.*]] = insertelement <3 x i64> %[[INSERT0]], i64 %[[RES1]], i64 1
+; CHECK-NEXT: %[[RES_VEC:.*]] = insertelement <3 x i64> %[[INSERT1]], i64 %[[RES2]], i64 2
;
-; CHECK: ret <3 x i64> %[[RES_VEC]]
+; CHECK-NEXT: ret <3 x i64> %[[RES_VEC]]
%fsh = call <3 x i64> @llvm.fshl.v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift)
ret <3 x i64> %fsh
}
diff --git a/llvm/test/CodeGen/DirectX/fshr.ll b/llvm/test/CodeGen/DirectX/fshr.ll
index c99c38297a7f6..15ac60e96cc62 100644
--- a/llvm/test/CodeGen/DirectX/fshr.ll
+++ b/llvm/test/CodeGen/DirectX/fshr.ll
@@ -7,12 +7,13 @@
; CHECK-SAME: i16 %[[A:.*]], i16 %[[B:.*]], i16 %[[SHIFT:.*]])
define noundef i16 @fshr_i16(i16 %a, i16 %b, i16 %shift) {
entry:
-; CHECK: %[[LEFT:.*]] = lshr i16 %[[B]], %[[SHIFT]]
-; CHECK: %[[MASKED_SHIFT:.*]] = and i16 %[[SHIFT]], 15
-; CHECK: %[[INVERSE_SHIFT:.*]] = sub i16 16, %[[MASKED_SHIFT]]
-; CHECK: %[[RIGHT:.*]] = shl i16 %[[A]], %[[INVERSE_SHIFT]]
-; CHECK: %[[RES:.*]] = or i16 %[[LEFT]], %[[RIGHT]]
-; CHECK: ret i16 %[[RES]]
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %[[LEFT:.*]] = lshr i16 %[[B]], %[[SHIFT]]
+; CHECK-NEXT: %[[MASKED_SHIFT:.*]] = and i16 %[[SHIFT]], 15
+; CHECK-NEXT: %[[INVERSE_SHIFT:.*]] = sub i16 16, %[[MASKED_SHIFT]]
+; CHECK-NEXT: %[[RIGHT:.*]] = shl i16 %[[A]], %[[INVERSE_SHIFT]]
+; CHECK-NEXT: %[[RES:.*]] = or i16 %[[LEFT]], %[[RIGHT]]
+; CHECK-NEXT: ret i16 %[[RES]]
%fsh = call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %shift)
ret i16 %fsh
}
@@ -23,16 +24,17 @@ declare i16 @llvm.fshr.i16(i16, i16, i16)
; CHECK-SAME: <1 x i32> %[[A_VEC:.*]], <1 x i32> %[[B_VEC:.*]], <1 x i32> %[[SHIFT_VEC:.*]])
define noundef <1 x i32> @fshr_v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift) {
entry:
-; CHECK: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0
-; CHECK: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0
-; CHECK: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0
-; CHECK: %[[LEFT:.*]] = lshr i32 %[[B]], %[[SHIFT]]
-; CHECK: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31
-; CHECK: %[[INVERSE_SHIFT:.*]] = sub i32 32, %[[MASKED_SHIFT]]
-; CHECK: %[[RIGHT:.*]] = shl i32 %[[A]], %[[INVERSE_SHIFT]]
-; CHECK: %[[RES:.*]] = or i32 %[[LEFT]], %[[RIGHT]]
-; CHECK: %[[RES_VEC:.*]] = insertelement <1 x i32> poison, i32 %[[RES]], i64 0
-; CHECK: ret <1 x i32> %[[RES_VEC]]
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0
+; CHECK-NEXT: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0
+; CHECK-NEXT: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0
+; CHECK-NEXT: %[[LEFT:.*]] = lshr i32 %[[B]], %[[SHIFT]]
+; CHECK-NEXT: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31
+; CHECK-NEXT: %[[INVERSE_SHIFT:.*]] = sub i32 32, %[[MASKED_SHIFT]]
+; CHECK-NEXT: %[[RIGHT:.*]] = shl i32 %[[A]], %[[INVERSE_SHIFT]]
+; CHECK-NEXT: %[[RES:.*]] = or i32 %[[LEFT]], %[[RIGHT]]
+; CHECK-NEXT: %[[RES_VEC:.*]] = insertelement <1 x i32> poison, i32 %[[RES]], i64 0
+; CHECK-NEXT: ret <1 x i32> %[[RES_VEC]]
%fsh = call <1 x i32> @llvm.fshr.v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift)
ret <1 x i32> %fsh
}
@@ -43,38 +45,39 @@ declare <1 x i32> @llvm.fshr.v1i32(<1 x i32>, <1 x i32>, <1 x i32>)
; CHECK-SAME: <3 x i64> %[[A_VEC:.*]], <3 x i64> %[[B_VEC:.*]], <3 x i64> %[[SHIFT_VEC:.*]])
define noundef <3 x i64> @fshr_v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift) {
entry:
-; CHECK: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0
-; CHECK: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0
-; CHECK: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0
-; CHECK: %[[LEFT0:.*]] = lshr i64 %[[B0]], %[[SHIFT0]]
-; CHECK: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63
-; CHECK: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]]
-; CHECK: %[[RIGHT0:.*]] = shl i64 %[[A0]], %[[INVERSE_SHIFT0]]
-; CHECK: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]]
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0
+; CHECK-NEXT: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0
+; CHECK-NEXT: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0
+; CHECK-NEXT: %[[LEFT0:.*]] = lshr i64 %[[B0]], %[[SHIFT0]]
+; CHECK-NEXT: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63
+; CHECK-NEXT: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]]
+; CHECK-NEXT: %[[RIGHT0:.*]] = shl i64 %[[A0]], %[[INVERSE_SHIFT0]]
+; CHECK-NEXT: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]]
;
-; CHECK: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1
-; CHECK: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1
-; CHECK: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1
-; CHECK: %[[LEFT1:.*]] = lshr i64 %[[B1]], %[[SHIFT1]]
-; CHECK: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63
-; CHECK: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]]
-; CHECK: %[[RIGHT1:.*]] = shl i64 %[[A1]], %[[INVERSE_SHIFT1]]
-; CHECK: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]]
+; CHECK-NEXT: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1
+; CHECK-NEXT: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1
+; CHECK-NEXT: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1
+; CHECK-NEXT: %[[LEFT1:.*]] = lshr i64 %[[B1]], %[[SHIFT1]]
+; CHECK-NEXT: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63
+; CHECK-NEXT: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]]
+; CHECK-NEXT: %[[RIGHT1:.*]] = shl i64 %[[A1]], %[[INVERSE_SHIFT1]]
+; CHECK-NEXT: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]]
;
-; CHECK: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2
-; CHECK: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2
-; CHECK: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2
-; CHECK: %[[LEFT2:.*]] = lshr i64 %[[B2]], %[[SHIFT2]]
-; CHECK: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63
-; CHECK: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]]
-; CHECK: %[[RIGHT2:.*]] = shl i64 %[[A2]], %[[INVERSE_SHIFT2]]
-; CHECK: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]]
+; CHECK-NEXT: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2
+; CHECK-NEXT: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2
+; CHECK-NEXT: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2
+; CHECK-NEXT: %[[LEFT2:.*]] = lshr i64 %[[B2]], %[[SHIFT2]]
+; CHECK-NEXT: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63
+; CHECK-NEXT: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]]
+; CHECK-NEXT: %[[RIGHT2:.*]] = shl i64 %[[A2]], %[[INVERSE_SHIFT2]]
+; CHECK-NEXT: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]]
;
-; CHECK: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0
-; CHECK: %[[INSERT1:.*]] = insertelement <3 x i64> %[[INSERT0]], i64 %[[RES1]], i64 1
-; CHECK: %[[RES_VEC:.*]] = insertelement <3 x i64> %[[INSERT1]], i64 %[[RES2]], i64 2
+; CHECK-NEXT: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0
+; CHECK-NEXT: %[[INSERT1:.*]] = insertelement <3 x i64> %[[INSERT0]], i64 %[[RES1]], i64 1
+; CHECK-NEXT: %[[RES_VEC:.*]] = insertelement <3 x i64> %[[INSERT1]], i64 %[[RES2]], i64 2
;
-; CHECK: ret <3 x i64> %[[RES_VEC]]
+; CHECK-NEXT: ret <3 x i64> %[[RES_VEC]]
%fsh = call <3 x i64> @llvm.fshr.v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift)
ret <3 x i64> %fsh
}
>From 2752c8ea09686f82d92be77dc94e14739b070e10 Mon Sep 17 00:00:00 2001
From: Finn Plummer <mail at inbelic.dev>
Date: Fri, 5 Dec 2025 09:27:29 -0800
Subject: [PATCH 4/6] review: use practical order of passes
---
llvm/test/CodeGen/DirectX/fshl.ll | 35 ++++++++++++++++++-------------
llvm/test/CodeGen/DirectX/fshr.ll | 33 ++++++++++++++++-------------
2 files changed, 39 insertions(+), 29 deletions(-)
diff --git a/llvm/test/CodeGen/DirectX/fshl.ll b/llvm/test/CodeGen/DirectX/fshl.ll
index a1f37ebe1d554..31cc8beeb7124 100644
--- a/llvm/test/CodeGen/DirectX/fshl.ll
+++ b/llvm/test/CodeGen/DirectX/fshl.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
-; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
;
; Make sure dxil operation function calls for funnel shifts left are generated.
@@ -25,8 +25,8 @@ declare i16 @llvm.fshl.i16(i16, i16, i16)
define noundef <1 x i32> @fshl_v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift) {
entry:
; CHECK-NEXT: entry:
-; CHECK-NEXT: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0
; CHECK-NEXT: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0
+; CHECK-NEXT: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0
; CHECK-NEXT: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0
; CHECK-NEXT: %[[LEFT:.*]] = shl i32 %[[A]], %[[SHIFT]]
; CHECK-NEXT: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31
@@ -46,31 +46,36 @@ declare <1 x i32> @llvm.fshl.v1i32(<1 x i32>, <1 x i32>, <1 x i32>)
define noundef <3 x i64> @fshl_v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift) {
entry:
; CHECK-NEXT: entry:
-; CHECK-NEXT: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0
; CHECK-NEXT: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0
+; CHECK-NEXT: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1
+; CHECK-NEXT: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2
+;
+; CHECK-NEXT: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0
; CHECK-NEXT: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0
; CHECK-NEXT: %[[LEFT0:.*]] = shl i64 %[[A0]], %[[SHIFT0]]
-; CHECK-NEXT: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63
-; CHECK-NEXT: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]]
-; CHECK-NEXT: %[[RIGHT0:.*]] = lshr i64 %[[B0]], %[[INVERSE_SHIFT0]]
-; CHECK-NEXT: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]]
-;
+;
; CHECK-NEXT: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1
-; CHECK-NEXT: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1
; CHECK-NEXT: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1
; CHECK-NEXT: %[[LEFT1:.*]] = shl i64 %[[A1]], %[[SHIFT1]]
-; CHECK-NEXT: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63
-; CHECK-NEXT: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]]
-; CHECK-NEXT: %[[RIGHT1:.*]] = lshr i64 %[[B1]], %[[INVERSE_SHIFT1]]
-; CHECK-NEXT: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]]
;
; CHECK-NEXT: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2
-; CHECK-NEXT: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2
; CHECK-NEXT: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2
; CHECK-NEXT: %[[LEFT2:.*]] = shl i64 %[[A2]], %[[SHIFT2]]
+;
+; CHECK-NEXT: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63
+; CHECK-NEXT: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63
; CHECK-NEXT: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63
+;
+; CHECK-NEXT: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]]
+; CHECK-NEXT: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]]
; CHECK-NEXT: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]]
+;
+; CHECK-NEXT: %[[RIGHT0:.*]] = lshr i64 %[[B0]], %[[INVERSE_SHIFT0]]
+; CHECK-NEXT: %[[RIGHT1:.*]] = lshr i64 %[[B1]], %[[INVERSE_SHIFT1]]
; CHECK-NEXT: %[[RIGHT2:.*]] = lshr i64 %[[B2]], %[[INVERSE_SHIFT2]]
+;
+; CHECK-NEXT: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]]
+; CHECK-NEXT: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]]
; CHECK-NEXT: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]]
;
; CHECK-NEXT: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0
diff --git a/llvm/test/CodeGen/DirectX/fshr.ll b/llvm/test/CodeGen/DirectX/fshr.ll
index 15ac60e96cc62..1b9bf7ad1009a 100644
--- a/llvm/test/CodeGen/DirectX/fshr.ll
+++ b/llvm/test/CodeGen/DirectX/fshr.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
-; RUN: opt -S -scalarizer -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
;
; Make sure dxil operation function calls for funnel shifts right are generated.
@@ -47,30 +47,35 @@ define noundef <3 x i64> @fshr_v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shif
entry:
; CHECK-NEXT: entry:
; CHECK-NEXT: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0
+; CHECK-NEXT: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1
+; CHECK-NEXT: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2
+;
; CHECK-NEXT: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0
; CHECK-NEXT: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0
; CHECK-NEXT: %[[LEFT0:.*]] = lshr i64 %[[B0]], %[[SHIFT0]]
-; CHECK-NEXT: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63
-; CHECK-NEXT: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]]
-; CHECK-NEXT: %[[RIGHT0:.*]] = shl i64 %[[A0]], %[[INVERSE_SHIFT0]]
-; CHECK-NEXT: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]]
-;
-; CHECK-NEXT: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1
+;
; CHECK-NEXT: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1
; CHECK-NEXT: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1
; CHECK-NEXT: %[[LEFT1:.*]] = lshr i64 %[[B1]], %[[SHIFT1]]
-; CHECK-NEXT: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63
-; CHECK-NEXT: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]]
-; CHECK-NEXT: %[[RIGHT1:.*]] = shl i64 %[[A1]], %[[INVERSE_SHIFT1]]
-; CHECK-NEXT: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]]
-;
-; CHECK-NEXT: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2
+;
; CHECK-NEXT: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2
; CHECK-NEXT: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2
; CHECK-NEXT: %[[LEFT2:.*]] = lshr i64 %[[B2]], %[[SHIFT2]]
+;
+; CHECK-NEXT: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63
+; CHECK-NEXT: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63
; CHECK-NEXT: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63
+;
+; CHECK-NEXT: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]]
+; CHECK-NEXT: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]]
; CHECK-NEXT: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]]
+;
+; CHECK-NEXT: %[[RIGHT0:.*]] = shl i64 %[[A0]], %[[INVERSE_SHIFT0]]
+; CHECK-NEXT: %[[RIGHT1:.*]] = shl i64 %[[A1]], %[[INVERSE_SHIFT1]]
; CHECK-NEXT: %[[RIGHT2:.*]] = shl i64 %[[A2]], %[[INVERSE_SHIFT2]]
+;
+; CHECK-NEXT: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]]
+; CHECK-NEXT: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]]
; CHECK-NEXT: %[[RES2:.*]] = or i64 %[[LEFT2]], %[[RIGHT2]]
;
; CHECK-NEXT: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0
>From 3839396c37ae69382382ad2fa558d722d16cd7b3 Mon Sep 17 00:00:00 2001
From: Finn Plummer <mail at inbelic.dev>
Date: Thu, 11 Dec 2025 12:45:03 -0800
Subject: [PATCH 5/6] review: correct algorithm for edge-case and bits
this fixes two issues with the implementation:
1. Handles the case when Shift == 0 by separating them into 2
well-defined shifts, following lowerFunnelShiftAsShifts impl
2. Corrects fshr to extract the least-sig bits instead of most-sig
bits as the algorithm was doing
---
.../Target/DirectX/DXILIntrinsicExpansion.cpp | 51 +++++++++++++++----
1 file changed, 41 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 4616f0c98bb9a..02b1aedd4f5c4 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -669,19 +669,50 @@ static Value *expandFunnelShiftIntrinsic(CallInst *Orig) {
IRBuilder<> Builder(Orig);
unsigned BitWidth = Ty->getScalarSizeInBits();
- Constant *Mask = ConstantInt::get(Ty, BitWidth - 1);
- Constant *Size = ConstantInt::get(Ty, BitWidth);
+ assert(llvm::isPowerOf2_32(BitWidth) &&
+ "Can't use Mask to compute modulo and inverse");
+
+ // Note: if (Shift % BitWidth) == 0 then (BitWidth - Shift) == BitWidth,
+ // shifting by the bitwidth for shl/lshr returns a poisoned result. As such,
+ // we implement the same formula as LegalizerHelper::lowerFunnelShiftAsShifts.
+ //
+ // The funnel shift is expanded like so:
+ // fshl
+ // -> msb_extract((concat(A, B) << (Shift % BitWidth)), BitWidth)
+ // -> A << (Shift % BitWidth) | B >> 1 >> (BitWidth - 1 - (Shift % BitWidth))
+ // fshr
+ // -> lsb_extract((concat(A, B) >> (Shift % BitWidth), BitWidth))
+ // -> A << 1 << (BitWidth - 1 - (Shift % BitWidth)) | B >> (Shift % BitWidth)
+
+ // (BitWidth - 1) -> Mask
+ Constant *Mask = ConstantInt::get(Ty, Ty->getScalarSizeInBits() - 1);
+
+ // Shift % BitWidth
+ // -> Shift & (BitWidth - 1)
+ // -> Shift & Mask
+ Value *MaskedShift = Builder.CreateAnd(Shift, Mask);
- // The shift is not required to be masked as DXIL op will do so automatically
- Value *Left =
- LeftFunnel ? Builder.CreateShl(A, Shift) : Builder.CreateLShr(B, Shift);
+ // (BitWidth - 1) - (Shift % BitWidth)
+ // -> ~Shift & (BitWidth - 1)
+ // -> ~Shift & Mask
+ Value *NotShift = Builder.CreateNot(Shift);
+ Value *InverseShift = Builder.CreateAnd(NotShift, Mask);
- Value *MaskedShift = Builder.CreateAnd(Shift, Mask);
- Value *InverseShift = Builder.CreateSub(Size, MaskedShift);
- Value *Right = LeftFunnel ? Builder.CreateLShr(B, InverseShift)
- : Builder.CreateShl(A, InverseShift);
+ Constant *One = ConstantInt::get(Ty, 1);
+ Value *ShiftedA;
+ Value *ShiftedB;
+
+ if (LeftFunnel) {
+ ShiftedA = Builder.CreateShl(A, MaskedShift);
+ Value *ShiftB1 = Builder.CreateLShr(B, One);
+ ShiftedB = Builder.CreateLShr(ShiftB1, InverseShift);
+ } else {
+ Value *ShiftA1 = Builder.CreateShl(A, One);
+ ShiftedA = Builder.CreateShl(ShiftA1, InverseShift);
+ ShiftedB = Builder.CreateLShr(B, MaskedShift);
+ }
- Value *Result = Builder.CreateOr(Left, Right);
+ Value *Result = Builder.CreateOr(ShiftedA, ShiftedB);
return Result;
}
>From 284c45af042f18a2e1eda409628460d21048c4f9 Mon Sep 17 00:00:00 2001
From: Finn Plummer <mail at inbelic.dev>
Date: Thu, 11 Dec 2025 13:16:50 -0800
Subject: [PATCH 6/6] update test cases
---
llvm/test/CodeGen/DirectX/fshl.ll | 53 +++++++++++++++-----------
llvm/test/CodeGen/DirectX/fshr.ll | 62 ++++++++++++++++++-------------
2 files changed, 68 insertions(+), 47 deletions(-)
diff --git a/llvm/test/CodeGen/DirectX/fshl.ll b/llvm/test/CodeGen/DirectX/fshl.ll
index 31cc8beeb7124..e950b7392a35d 100644
--- a/llvm/test/CodeGen/DirectX/fshl.ll
+++ b/llvm/test/CodeGen/DirectX/fshl.ll
@@ -8,10 +8,12 @@
define noundef i16 @fshl_i16(i16 %a, i16 %b, i16 %shift) {
entry:
; CHECK-NEXT: entry:
-; CHECK-NEXT: %[[LEFT:.*]] = shl i16 %[[A]], %[[SHIFT]]
; CHECK-NEXT: %[[MASKED_SHIFT:.*]] = and i16 %[[SHIFT]], 15
-; CHECK-NEXT: %[[INVERSE_SHIFT:.*]] = sub i16 16, %[[MASKED_SHIFT]]
-; CHECK-NEXT: %[[RIGHT:.*]] = lshr i16 %[[B]], %[[INVERSE_SHIFT]]
+; CHECK-NEXT: %[[NOT_SHIFT:.*]] = xor i16 %[[SHIFT]], -1
+; CHECK-NEXT: %[[INVERSE_SHIFT:.*]] = and i16 %[[NOT_SHIFT]], 15
+; CHECK-NEXT: %[[LEFT:.*]] = shl i16 %[[A]], %[[MASKED_SHIFT]]
+; CHECK-NEXT: %[[SHIFT_B_1:.*]] = lshr i16 %[[B]], 1
+; CHECK-NEXT: %[[RIGHT:.*]] = lshr i16 %[[SHIFT_B_1]], %[[INVERSE_SHIFT]]
; CHECK-NEXT: %[[RES:.*]] = or i16 %[[LEFT]], %[[RIGHT]]
; CHECK-NEXT: ret i16 %[[RES]]
%fsh = call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 %shift)
@@ -28,10 +30,12 @@ entry:
; CHECK-NEXT: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0
; CHECK-NEXT: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0
; CHECK-NEXT: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0
-; CHECK-NEXT: %[[LEFT:.*]] = shl i32 %[[A]], %[[SHIFT]]
; CHECK-NEXT: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31
-; CHECK-NEXT: %[[INVERSE_SHIFT:.*]] = sub i32 32, %[[MASKED_SHIFT]]
-; CHECK-NEXT: %[[RIGHT:.*]] = lshr i32 %[[B]], %[[INVERSE_SHIFT]]
+; CHECK-NEXT: %[[NOT_SHIFT:.*]] = xor i32 %[[SHIFT]], -1
+; CHECK-NEXT: %[[INVERSE_SHIFT:.*]] = and i32 %[[NOT_SHIFT]], 31
+; CHECK-NEXT: %[[LEFT:.*]] = shl i32 %[[A]], %[[MASKED_SHIFT]]
+; CHECK-NEXT: %[[SHIFT_B_1:.*]] = lshr i32 %[[B]], 1
+; CHECK-NEXT: %[[RIGHT:.*]] = lshr i32 %[[SHIFT_B_1]], %[[INVERSE_SHIFT]]
; CHECK-NEXT: %[[RES:.*]] = or i32 %[[LEFT]], %[[RIGHT]]
; CHECK-NEXT: %[[RES_VEC:.*]] = insertelement <1 x i32> poison, i32 %[[RES]], i64 0
; CHECK-NEXT: ret <1 x i32> %[[RES_VEC]]
@@ -51,28 +55,35 @@ entry:
; CHECK-NEXT: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2
;
; CHECK-NEXT: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0
-; CHECK-NEXT: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0
-; CHECK-NEXT: %[[LEFT0:.*]] = shl i64 %[[A0]], %[[SHIFT0]]
-;
; CHECK-NEXT: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1
-; CHECK-NEXT: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1
-; CHECK-NEXT: %[[LEFT1:.*]] = shl i64 %[[A1]], %[[SHIFT1]]
-;
; CHECK-NEXT: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2
-; CHECK-NEXT: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2
-; CHECK-NEXT: %[[LEFT2:.*]] = shl i64 %[[A2]], %[[SHIFT2]]
-;
+;
+; CHECK-NEXT: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0
; CHECK-NEXT: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63
+; CHECK-NEXT: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1
; CHECK-NEXT: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63
+; CHECK-NEXT: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2
; CHECK-NEXT: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63
+;
+; CHECK-NEXT: %[[NOT_SHIFT0:.*]] = xor i64 %[[SHIFT0]], -1
+; CHECK-NEXT: %[[NOT_SHIFT1:.*]] = xor i64 %[[SHIFT1]], -1
+; CHECK-NEXT: %[[NOT_SHIFT2:.*]] = xor i64 %[[SHIFT2]], -1
+;
+; CHECK-NEXT: %[[INVERSE_SHIFT0:.*]] = and i64 %[[NOT_SHIFT0]], 63
+; CHECK-NEXT: %[[INVERSE_SHIFT1:.*]] = and i64 %[[NOT_SHIFT1]], 63
+; CHECK-NEXT: %[[INVERSE_SHIFT2:.*]] = and i64 %[[NOT_SHIFT2]], 63
;
-; CHECK-NEXT: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]]
-; CHECK-NEXT: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]]
-; CHECK-NEXT: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]]
+; CHECK-NEXT: %[[LEFT0:.*]] = shl i64 %[[A0]], %[[MASKED_SHIFT0]]
+; CHECK-NEXT: %[[LEFT1:.*]] = shl i64 %[[A1]], %[[MASKED_SHIFT1]]
+; CHECK-NEXT: %[[LEFT2:.*]] = shl i64 %[[A2]], %[[MASKED_SHIFT2]]
;
-; CHECK-NEXT: %[[RIGHT0:.*]] = lshr i64 %[[B0]], %[[INVERSE_SHIFT0]]
-; CHECK-NEXT: %[[RIGHT1:.*]] = lshr i64 %[[B1]], %[[INVERSE_SHIFT1]]
-; CHECK-NEXT: %[[RIGHT2:.*]] = lshr i64 %[[B2]], %[[INVERSE_SHIFT2]]
+; CHECK-NEXT: %[[SHIFT_B0_1:.*]] = lshr i64 %[[B0]], 1
+; CHECK-NEXT: %[[SHIFT_B1_1:.*]] = lshr i64 %[[B1]], 1
+; CHECK-NEXT: %[[SHIFT_B2_1:.*]] = lshr i64 %[[B2]], 1
+;
+; CHECK-NEXT: %[[RIGHT0:.*]] = lshr i64 %[[SHIFT_B0_1]], %[[INVERSE_SHIFT0]]
+; CHECK-NEXT: %[[RIGHT1:.*]] = lshr i64 %[[SHIFT_B1_1]], %[[INVERSE_SHIFT1]]
+; CHECK-NEXT: %[[RIGHT2:.*]] = lshr i64 %[[SHIFT_B2_1]], %[[INVERSE_SHIFT2]]
;
; CHECK-NEXT: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]]
; CHECK-NEXT: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]]
diff --git a/llvm/test/CodeGen/DirectX/fshr.ll b/llvm/test/CodeGen/DirectX/fshr.ll
index 1b9bf7ad1009a..74ce1c48d0981 100644
--- a/llvm/test/CodeGen/DirectX/fshr.ll
+++ b/llvm/test/CodeGen/DirectX/fshr.ll
@@ -8,10 +8,12 @@
define noundef i16 @fshr_i16(i16 %a, i16 %b, i16 %shift) {
entry:
; CHECK-NEXT: entry:
-; CHECK-NEXT: %[[LEFT:.*]] = lshr i16 %[[B]], %[[SHIFT]]
; CHECK-NEXT: %[[MASKED_SHIFT:.*]] = and i16 %[[SHIFT]], 15
-; CHECK-NEXT: %[[INVERSE_SHIFT:.*]] = sub i16 16, %[[MASKED_SHIFT]]
-; CHECK-NEXT: %[[RIGHT:.*]] = shl i16 %[[A]], %[[INVERSE_SHIFT]]
+; CHECK-NEXT: %[[NOT_SHIFT:.*]] = xor i16 %[[SHIFT]], -1
+; CHECK-NEXT: %[[INVERSE_SHIFT:.*]] = and i16 %[[NOT_SHIFT]], 15
+; CHECK-NEXT: %[[SHIFT_A_1:.*]] = shl i16 %[[A]], 1
+; CHECK-NEXT: %[[LEFT:.*]] = shl i16 %[[SHIFT_A_1]], %[[INVERSE_SHIFT]]
+; CHECK-NEXT: %[[RIGHT:.*]] = lshr i16 %[[B]], %[[MASKED_SHIFT]]
; CHECK-NEXT: %[[RES:.*]] = or i16 %[[LEFT]], %[[RIGHT]]
; CHECK-NEXT: ret i16 %[[RES]]
%fsh = call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %shift)
@@ -25,13 +27,15 @@ declare i16 @llvm.fshr.i16(i16, i16, i16)
define noundef <1 x i32> @fshr_v1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %shift) {
entry:
; CHECK-NEXT: entry:
-; CHECK-NEXT: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0
; CHECK-NEXT: %[[B:.*]] = extractelement <1 x i32> %[[B_VEC]], i64 0
+; CHECK-NEXT: %[[A:.*]] = extractelement <1 x i32> %[[A_VEC]], i64 0
; CHECK-NEXT: %[[SHIFT:.*]] = extractelement <1 x i32> %[[SHIFT_VEC]], i64 0
-; CHECK-NEXT: %[[LEFT:.*]] = lshr i32 %[[B]], %[[SHIFT]]
; CHECK-NEXT: %[[MASKED_SHIFT:.*]] = and i32 %[[SHIFT]], 31
-; CHECK-NEXT: %[[INVERSE_SHIFT:.*]] = sub i32 32, %[[MASKED_SHIFT]]
-; CHECK-NEXT: %[[RIGHT:.*]] = shl i32 %[[A]], %[[INVERSE_SHIFT]]
+; CHECK-NEXT: %[[NOT_SHIFT:.*]] = xor i32 %[[SHIFT]], -1
+; CHECK-NEXT: %[[INVERSE_SHIFT:.*]] = and i32 %[[NOT_SHIFT]], 31
+; CHECK-NEXT: %[[SHIFT_A_1:.*]] = shl i32 %[[A]], 1
+; CHECK-NEXT: %[[LEFT:.*]] = shl i32 %[[SHIFT_A_1]], %[[INVERSE_SHIFT]]
+; CHECK-NEXT: %[[RIGHT:.*]] = lshr i32 %[[B]], %[[MASKED_SHIFT]]
; CHECK-NEXT: %[[RES:.*]] = or i32 %[[LEFT]], %[[RIGHT]]
; CHECK-NEXT: %[[RES_VEC:.*]] = insertelement <1 x i32> poison, i32 %[[RES]], i64 0
; CHECK-NEXT: ret <1 x i32> %[[RES_VEC]]
@@ -46,33 +50,40 @@ declare <1 x i32> @llvm.fshr.v1i32(<1 x i32>, <1 x i32>, <1 x i32>)
define noundef <3 x i64> @fshr_v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift) {
entry:
; CHECK-NEXT: entry:
-; CHECK-NEXT: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0
-; CHECK-NEXT: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1
-; CHECK-NEXT: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2
-;
; CHECK-NEXT: %[[B0:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 0
-; CHECK-NEXT: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0
-; CHECK-NEXT: %[[LEFT0:.*]] = lshr i64 %[[B0]], %[[SHIFT0]]
-;
; CHECK-NEXT: %[[B1:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 1
-; CHECK-NEXT: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1
-; CHECK-NEXT: %[[LEFT1:.*]] = lshr i64 %[[B1]], %[[SHIFT1]]
-;
; CHECK-NEXT: %[[B2:.*]] = extractelement <3 x i64> %[[B_VEC]], i64 2
-; CHECK-NEXT: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2
-; CHECK-NEXT: %[[LEFT2:.*]] = lshr i64 %[[B2]], %[[SHIFT2]]
;
+; CHECK-NEXT: %[[A0:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 0
+; CHECK-NEXT: %[[A1:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 1
+; CHECK-NEXT: %[[A2:.*]] = extractelement <3 x i64> %[[A_VEC]], i64 2
+;
+; CHECK-NEXT: %[[SHIFT0:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 0
; CHECK-NEXT: %[[MASKED_SHIFT0:.*]] = and i64 %[[SHIFT0]], 63
+; CHECK-NEXT: %[[SHIFT1:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 1
; CHECK-NEXT: %[[MASKED_SHIFT1:.*]] = and i64 %[[SHIFT1]], 63
+; CHECK-NEXT: %[[SHIFT2:.*]] = extractelement <3 x i64> %[[SHIFT_VEC]], i64 2
; CHECK-NEXT: %[[MASKED_SHIFT2:.*]] = and i64 %[[SHIFT2]], 63
+;
+; CHECK-NEXT: %[[NOT_SHIFT0:.*]] = xor i64 %[[SHIFT0]], -1
+; CHECK-NEXT: %[[NOT_SHIFT1:.*]] = xor i64 %[[SHIFT1]], -1
+; CHECK-NEXT: %[[NOT_SHIFT2:.*]] = xor i64 %[[SHIFT2]], -1
+;
+; CHECK-NEXT: %[[INVERSE_SHIFT0:.*]] = and i64 %[[NOT_SHIFT0]], 63
+; CHECK-NEXT: %[[INVERSE_SHIFT1:.*]] = and i64 %[[NOT_SHIFT1]], 63
+; CHECK-NEXT: %[[INVERSE_SHIFT2:.*]] = and i64 %[[NOT_SHIFT2]], 63
;
-; CHECK-NEXT: %[[INVERSE_SHIFT0:.*]] = sub i64 64, %[[MASKED_SHIFT0]]
-; CHECK-NEXT: %[[INVERSE_SHIFT1:.*]] = sub i64 64, %[[MASKED_SHIFT1]]
-; CHECK-NEXT: %[[INVERSE_SHIFT2:.*]] = sub i64 64, %[[MASKED_SHIFT2]]
+; CHECK-NEXT: %[[SHIFT_A0_1:.*]] = shl i64 %[[A0]], 1
+; CHECK-NEXT: %[[SHIFT_A1_1:.*]] = shl i64 %[[A1]], 1
+; CHECK-NEXT: %[[SHIFT_A2_1:.*]] = shl i64 %[[A2]], 1
+;
+; CHECK-NEXT: %[[LEFT0:.*]] = shl i64 %[[SHIFT_A0_1]], %[[INVERSE_SHIFT0]]
+; CHECK-NEXT: %[[LEFT1:.*]] = shl i64 %[[SHIFT_A1_1]], %[[INVERSE_SHIFT1]]
+; CHECK-NEXT: %[[LEFT2:.*]] = shl i64 %[[SHIFT_A2_1]], %[[INVERSE_SHIFT2]]
;
-; CHECK-NEXT: %[[RIGHT0:.*]] = shl i64 %[[A0]], %[[INVERSE_SHIFT0]]
-; CHECK-NEXT: %[[RIGHT1:.*]] = shl i64 %[[A1]], %[[INVERSE_SHIFT1]]
-; CHECK-NEXT: %[[RIGHT2:.*]] = shl i64 %[[A2]], %[[INVERSE_SHIFT2]]
+; CHECK-NEXT: %[[RIGHT0:.*]] = lshr i64 %[[B0]], %[[MASKED_SHIFT0]]
+; CHECK-NEXT: %[[RIGHT1:.*]] = lshr i64 %[[B1]], %[[MASKED_SHIFT1]]
+; CHECK-NEXT: %[[RIGHT2:.*]] = lshr i64 %[[B2]], %[[MASKED_SHIFT2]]
;
; CHECK-NEXT: %[[RES0:.*]] = or i64 %[[LEFT0]], %[[RIGHT0]]
; CHECK-NEXT: %[[RES1:.*]] = or i64 %[[LEFT1]], %[[RIGHT1]]
@@ -81,7 +92,6 @@ entry:
; CHECK-NEXT: %[[INSERT0:.*]] = insertelement <3 x i64> poison, i64 %[[RES0]], i64 0
; CHECK-NEXT: %[[INSERT1:.*]] = insertelement <3 x i64> %[[INSERT0]], i64 %[[RES1]], i64 1
; CHECK-NEXT: %[[RES_VEC:.*]] = insertelement <3 x i64> %[[INSERT1]], i64 %[[RES2]], i64 2
-;
; CHECK-NEXT: ret <3 x i64> %[[RES_VEC]]
%fsh = call <3 x i64> @llvm.fshr.v1i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %shift)
ret <3 x i64> %fsh
More information about the llvm-commits
mailing list