[llvm-branch-commits] [llvm] release/22.x: [Hexagon] Fix 64-bit funnel shift miscompilation with register shift amounts (#183669) (PR #185781)

Cullen Rhodes via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Mar 30 07:46:01 PDT 2026


https://github.com/c-rhodes updated https://github.com/llvm/llvm-project/pull/185781

>From 240f42d95f51486f14dccbeb900824aacfd15d69 Mon Sep 17 00:00:00 2001
From: Brian Cain <brian.cain at oss.qualcomm.com>
Date: Tue, 10 Mar 2026 09:16:46 -0500
Subject: [PATCH] [Hexagon] Fix 64-bit funnel shift miscompilation with
 register shift amounts (#183669)

64-bit regpair shift amounts are treated as signed 7-bits, so a
complement
shift amount of 64 (when the primary amount is 0) is sign-extended to
-64,
reversing the shift direction and producing incorrect results. This
affected
any 64-bit rotate or funnel shift where the runtime shift amount could
be 0
(making the complement 64) or >= 64.

Fix by masking the shift amount to [0, 63] and computing the complement
as
(m - 64), which is always in [-64, -1]. Using lsl/lsr (logical shift)
instructions with this negative amount causes the hardware to reverse
the
shift direction while zero-filling vacated positions:

fshl(a, b, amt) = (a << m) | lsl(b, m - 64) // lsl reverses to lsr
fshr(a, b, amt) = (b >> m) | lsr(a, m - 64) // lsr reverses to lsl

where m = amt & 63. The logical shift instructions (lsl/lsr) are used
instead of arithmetic (asl) because asl with a negative amount performs
an
arithmetic right shift that sign-extends, which would corrupt the result
for negative source values.

When m = 0, the complement amount is -64 (magnitude 64), which shifts
all
64 bits out and produces zero, so the complement term vanishes as
required.

(cherry picked from commit 4fffee037520f3f514d5e2d6a40e26648fdb92e2)
---
 llvm/lib/Target/Hexagon/HexagonPatterns.td |  8 +++++--
 llvm/test/CodeGen/Hexagon/funnel-shift.ll  | 28 ++++++++++++----------
 llvm/test/CodeGen/Hexagon/rotate.ll        | 20 +++++++++-------
 3 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index ca2704978df79..f6d51acce562e 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -1274,7 +1274,9 @@ def FShl32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
 def FShl64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
   (S2_lsr_i_p_or (S2_asl_i_p $Rs, $S),  $Rt, (Subi<64> $S))>;
 def FShl64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
-  (S2_lsr_r_p_or (S2_asl_r_p $Rs, $Ru), $Rt, (A2_subri 64, $Ru))>;
+  (S2_lsl_r_p_or (S2_asl_r_p $Rs, (A2_andir $Ru, 63)),
+                  $Rt,
+                  (A2_addi (A2_andir $Ru, 63), -64))>;
 
 // Combined SDNodeXForm: (Divu8 (Subi<64> $S))
 def Divu64_8: SDNodeXForm<imm, [{
@@ -1314,7 +1316,9 @@ def FShr32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
 def FShr64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
   (S2_asl_i_p_or (S2_lsr_i_p $Rt, $S),  $Rs, (Subi<64> $S))>;
 def FShr64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
-  (S2_asl_r_p_or (S2_lsr_r_p $Rt, $Ru), $Rs, (A2_subri 64, $Ru))>;
+  (S2_lsr_r_p_or (S2_lsr_r_p $Rt, (A2_andir $Ru, 63)),
+                  $Rs,
+                  (A2_addi (A2_andir $Ru, 63), -64))>;
 
 // Special cases:
 let AddedComplexity = 100 in {
diff --git a/llvm/test/CodeGen/Hexagon/funnel-shift.ll b/llvm/test/CodeGen/Hexagon/funnel-shift.ll
index 5c318dfb6066e..5c859323feaee 100644
--- a/llvm/test/CodeGen/Hexagon/funnel-shift.ll
+++ b/llvm/test/CodeGen/Hexagon/funnel-shift.ll
@@ -28,9 +28,10 @@ b0:
 }
 
 ; CHECK-LABEL: f3:
-; CHECK: r[[R30:[0-9]+]]:[[R31:[0-9]+]] = asl(r1:0,r4)
-; CHECK: r[[R32:[0-9]+]] = sub(#64,r4)
-; CHECK: r[[R30]]:[[R31]] |= lsr(r3:2,r[[R32]])
+; CHECK: r[[R33:[0-9]+]] = and(r4,#63)
+; CHECK: r[[R30:[0-9]+]]:[[R31:[0-9]+]] = asl(r1:0,r[[R33]])
+; CHECK: r[[R33]] = add(r[[R33]],#-64)
+; CHECK: r[[R30]]:[[R31]] |= lsl(r3:2,r[[R33]])
 define i64 @f3(i64 %a0, i64 %a1, i64 %a2) #1 {
 b0:
   %v0 = tail call i64 @llvm.fshl.i64(i64 %a0, i64 %a1, i64 %a2)
@@ -65,9 +66,10 @@ b0:
 }
 
 ; CHECK-LABEL: f7:
-; CHECK: r[[R70:[0-9]+]]:[[R71:[0-9]+]] = lsr(r3:2,r4)
-; CHECK: r[[R72:[0-9]+]] = sub(#64,r4)
-; CHECK: r[[R70]]:[[R71]] |= asl(r1:0,r6)
+; CHECK: r[[R73:[0-9]+]] = and(r4,#63)
+; CHECK: r[[R76:[0-9]+]] = add(r[[R73]],#-64)
+; CHECK: r[[R70:[0-9]+]]:[[R71:[0-9]+]] = lsr(r3:2,r[[R73]])
+; CHECK: r[[R70]]:[[R71]] |= lsr(r1:0,r[[R76]])
 define i64 @f7(i64 %a0, i64 %a1, i64 %a2) #1 {
 b0:
   %v0 = tail call i64 @llvm.fshr.i64(i64 %a0, i64 %a1, i64 %a2)
@@ -100,9 +102,10 @@ b0:
 }
 
 ; CHECK-LABEL: f11:
-; CHECK: r[[RB0:[0-9]+]]:[[RB1:[0-9]+]] = asl(r1:0,r2)
-; CHECK: r[[RB2:[0-9]+]] = sub(#64,r2)
-; CHECK: r[[RB0]]:[[RB1]] |= lsr(r1:0,r[[RB2]])
+; CHECK: r[[RB3:[0-9]+]] = and(r2,#63)
+; CHECK: r[[RB4:[0-9]+]] = add(r[[RB3]],#-64)
+; CHECK: r[[RB0:[0-9]+]]:[[RB1:[0-9]+]] = asl(r1:0,r[[RB3]])
+; CHECK: r[[RB0]]:[[RB1]] |= lsl(r1:0,r[[RB4]])
 define i64 @f11(i64 %a0, i64 %a1) #1 {
 b0:
   %v0 = tail call i64 @llvm.fshl.i64(i64 %a0, i64 %a0, i64 %a1)
@@ -135,9 +138,10 @@ b0:
 }
 
 ; CHECK-LABEL: f15:
-; CHECK: r[[RF0:[0-9]+]]:[[RF1:[0-9]+]] = lsr(r1:0,r2)
-; CHECK: r[[RF2:[0-9]+]] = sub(#64,r2)
-; CHECK: r[[RF0]]:[[RF1]] |= asl(r1:0,r[[RF2]])
+; CHECK: r[[RF3:[0-9]+]] = and(r2,#63)
+; CHECK: r[[RF4:[0-9]+]] = add(r[[RF3]],#-64)
+; CHECK: r[[RF0:[0-9]+]]:[[RF1:[0-9]+]] = lsr(r1:0,r[[RF3]])
+; CHECK: r[[RF0]]:[[RF1]] |= lsr(r1:0,r[[RF4]])
 define i64 @f15(i64 %a0, i64 %a1) #1 {
 b0:
   %v0 = tail call i64 @llvm.fshr.i64(i64 %a0, i64 %a0, i64 %a1)
diff --git a/llvm/test/CodeGen/Hexagon/rotate.ll b/llvm/test/CodeGen/Hexagon/rotate.ll
index 19af539ed966f..14bddab09303b 100644
--- a/llvm/test/CodeGen/Hexagon/rotate.ll
+++ b/llvm/test/CodeGen/Hexagon/rotate.ll
@@ -60,10 +60,12 @@ b0:
 }
 
 ; CHECK-LABEL: f5
-; This is a rotate left by %a1(r2).
-; CHECK: r[[R50:[0-9]+]]:[[R51:[0-9]+]] = asl(r1:0,r2)
-; CHECK: r[[R52:[0-9]+]] = sub(#64,r2)
-; CHECK: r[[R50]]:[[R51]] |= lsr(r1:0,r[[R52]])
+; This is a rotate left by %a1(r2). The complement shift uses lsl with a
+; negative amount (m - 64), which reverses to a logical right shift by (64 - m).
+; CHECK: r[[R53:[0-9]+]] = and(r2,#63)
+; CHECK: r[[R54:[0-9]+]] = add(r[[R53]],#-64)
+; CHECK: r[[R50:[0-9]+]]:[[R51:[0-9]+]] = asl(r1:0,r[[R53]])
+; CHECK: r[[R50]]:[[R51]] |= lsl(r1:0,r[[R54]])
 define i64 @f5(i64 %a0, i32 %a1) #0 {
 b0:
   %v0 = zext i32 %a1 to i64
@@ -86,10 +88,12 @@ b0:
 }
 
 ; CHECK-LABEL: f7
-; This is a rotate right by %a1(r2).
-; CHECK: r[[R70:[0-9]+]]:[[R71:[0-9]+]] = lsr(r1:0,r2)
-; CHECK: r[[R72:[0-9]+]] = sub(#64,r2)
-; CHECK: r[[R70]]:[[R71]] |= asl(r1:0,r[[R72]])
+; This is a rotate right by %a1(r2). The complement shift uses lsr with a
+; negative amount (m - 64), which reverses to a logical left shift by (64 - m).
+; CHECK: r[[R73:[0-9]+]] = and(r2,#63)
+; CHECK: r[[R74:[0-9]+]] = add(r[[R73]],#-64)
+; CHECK: r[[R70:[0-9]+]]:[[R71:[0-9]+]] = lsr(r1:0,r[[R73]])
+; CHECK: r[[R70]]:[[R71]] |= lsr(r1:0,r[[R74]])
 define i64 @f7(i64 %a0, i32 %a1) #0 {
 b0:
   %v0 = zext i32 %a1 to i64



More information about the llvm-branch-commits mailing list