[llvm] [GlobalISel] Port computeNumSignBits for G_MUL (PR #167311)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 10 05:04:52 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: None (AnushaK6)
<details>
<summary>Changes</summary>
This patch continues the effort in issue [#<!-- -->150515](https://github.com/llvm/llvm-project/issues/150515) to port computeSignBits functionality from the SelectionDAG (SDAG) to GlobalISel (GISel).
Specifically, it introduces support for G_MUL and improves precision when analyzing known bits and sign bits.
Changes:
- Implemented computeSignBits support for the G_MUL operation in GISel.
- Added logic to distinguish between constant and non-constant operands. When the operand is a constant, we can determine the exact number of sign bits.
- When the operand is a variable, conservatively compute an upper bound valid for all possible values.
- Added special handling for constant cases such as multiplication by zero.
Added new tests to:
- known-signbits.mir with targeted test cases for G_MUL
- Also modified existing test checks that use mul
---
Patch is 151.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/167311.diff
6 Files Affected:
- (modified) llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp (+36)
- (added) llvm/test/CodeGen/AArch64/GlobalISel/knownbits-mul.mir (+79)
- (modified) llvm/test/CodeGen/AArch64/combine-sdiv.ll (+1-3)
- (modified) llvm/test/CodeGen/AArch64/neon-dotreduce.ll (+1564-1564)
- (modified) llvm/test/CodeGen/AArch64/rem-by-const.ll (+1-2)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll (+16-32)
``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index c1fb8b6d78ff8..3b5c68f266fff 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/FMF.h"
#include "llvm/MC/TargetRegistry.h"
@@ -2084,6 +2085,41 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1;
break;
}
+ case TargetOpcode::G_MUL: {
+ Register Src1 = MI.getOperand(1).getReg();
+ Register Src2 = MI.getOperand(2).getReg();
+
+ KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth + 1);
+ KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth + 1);
+
+ if (Known1.isZero() || Known2.isZero())
+ return TyBits;
+
+ auto C1 = getIConstantVRegValWithLookThrough(Src1, MRI);
+ auto C2 = getIConstantVRegValWithLookThrough(Src2, MRI);
+
+ if (C1 && C2) {
+ APInt Val1 = C1->Value;
+ APInt Val2 = C2->Value;
+ APInt Product = Val1 * Val2;
+ return Product.getNumSignBits();
+ }
+ unsigned Src1NumSignBits =
+ computeNumSignBits(Src1, DemandedElts, Depth + 1);
+ if (Src1NumSignBits == 1) {
+ return 1;
+ }
+ unsigned Src2NumSignBits =
+ computeNumSignBits(Src2, DemandedElts, Depth + 1);
+ if (Src2NumSignBits == 1) {
+ return 1;
+ }
+
+ unsigned OutValidBits =
+ (TyBits - Src1NumSignBits + 1) + (TyBits - Src2NumSignBits + 1);
+ FirstAnswer = OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
+ break;
+ }
case TargetOpcode::G_FCMP:
case TargetOpcode::G_ICMP: {
bool IsFP = Opcode == TargetOpcode::G_FCMP;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-mul.mir
new file mode 100644
index 0000000000000..ea5281948a211
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-mul.mir
@@ -0,0 +1,79 @@
+# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64 -passes="print<gisel-value-tracking>" -filetype=null %s 2>&1 | FileCheck %s
+
+---
+name: ConstPositives
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @ConstPositives
+ ; CHECK-NEXT: %0:_ KnownBits:00000011 SignBits:6
+ ; CHECK-NEXT: %1:_ KnownBits:00000101 SignBits:5
+ ; CHECK-NEXT: %2:_ KnownBits:00001111 SignBits:4
+ %0:_(s8) = G_CONSTANT i8 3
+ %1:_(s8) = G_CONSTANT i8 5
+ %2:_(s8) = G_MUL %0, %1
+...
+---
+name: ConstZero
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @ConstZero
+ ; CHECK-NEXT: %0:_ KnownBits:00000000 SignBits:8
+ ; CHECK-NEXT: %1:_ KnownBits:00000001 SignBits:7
+ ; CHECK-NEXT: %2:_ KnownBits:00000000 SignBits:8
+ %0:_(s8) = G_CONSTANT i8 0
+ %1:_(s8) = G_CONSTANT i8 1
+ %2:_(s8) = G_MUL %0, %1
+...
+---
+name: ConstNegatives
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @ConstNegatives
+ ; CHECK-NEXT: %0:_ KnownBits:11111110 SignBits:7
+ ; CHECK-NEXT: %1:_ KnownBits:11111100 SignBits:6
+ ; CHECK-NEXT: %2:_ KnownBits:00001000 SignBits:4
+ %0:_(s8) = G_CONSTANT i8 -2
+ %1:_(s8) = G_CONSTANT i8 -4
+ %2:_(s8) = G_MUL %0, %1
+...
+---
+name: MixedSigns
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @MixedSigns
+ ; CHECK-NEXT: %0:_ KnownBits:11111100 SignBits:6
+ ; CHECK-NEXT: %1:_ KnownBits:00000011 SignBits:6
+ ; CHECK-NEXT: %2:_ KnownBits:11110100 SignBits:4
+ %0:_(s8) = G_CONSTANT i8 -4
+ %1:_(s8) = G_CONSTANT i8 3
+ %2:_(s8) = G_MUL %0, %1
+...
+---
+name: UnknownVar
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @UnknownVar
+ ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:00000010 SignBits:6
+ ; CHECK-NEXT: %2:_ KnownBits:???????0 SignBits:1
+ %0:_(s8) = COPY $b0
+ %1:_(s8) = G_CONSTANT i8 2
+ %2:_(s8) = G_MUL %0, %1
+...
+---
+name: VectorMul
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @VectorMul
+ ; CHECK-NEXT: %0:_ KnownBits:0000000000000001 SignBits:15
+ ; CHECK-NEXT: %1:_ KnownBits:0000000000000010 SignBits:14
+ ; CHECK-NEXT: %2:_ KnownBits:00000000000000?? SignBits:14
+ ; CHECK-NEXT: %3:_ KnownBits:00000000000000?? SignBits:14
+ ; CHECK-NEXT: %4:_ KnownBits:000000000000???? SignBits:12
+ %0:_(s16) = G_CONSTANT i16 1
+ %1:_(s16) = G_CONSTANT i16 2
+ %2:_(<4 x s16>) = G_BUILD_VECTOR %0, %1, %1, %0
+ %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %0, %0, %1
+ %4:_(<4 x s16>) = G_MUL %2, %3
+...
diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
index cca190f08df2b..37ea64f700fd8 100644
--- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll
+++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
@@ -1510,7 +1510,6 @@ define i5 @combine_i5_sdiv_const7(i5 %x) {
; CHECK-GI-NEXT: sbfx w9, w0, #0, #5
; CHECK-GI-NEXT: sbfx w8, w8, #0, #5
; CHECK-GI-NEXT: mul w8, w9, w8
-; CHECK-GI-NEXT: sbfx w8, w8, #0, #10
; CHECK-GI-NEXT: add w8, w0, w8, asr #5
; CHECK-GI-NEXT: sbfx w8, w8, #0, #5
; CHECK-GI-NEXT: asr w8, w8, #2
@@ -1560,7 +1559,6 @@ define i8 @combine_i8_sdiv_const7(i8 %x) {
; CHECK-GI-NEXT: sxtb w8, w0
; CHECK-GI-NEXT: mov w9, #-109 // =0xffffff93
; CHECK-GI-NEXT: mul w8, w8, w9
-; CHECK-GI-NEXT: sxth w8, w8
; CHECK-GI-NEXT: add w8, w0, w8, asr #8
; CHECK-GI-NEXT: sbfx w8, w8, #2, #6
; CHECK-GI-NEXT: ubfx w9, w8, #7, #1
@@ -1585,7 +1583,7 @@ define i8 @combine_i8_sdiv_const100(i8 %x) {
; CHECK-GI-NEXT: sxtb w8, w0
; CHECK-GI-NEXT: mov w9, #41 // =0x29
; CHECK-GI-NEXT: mul w8, w8, w9
-; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
+; CHECK-GI-NEXT: asr w8, w8, #8
; CHECK-GI-NEXT: asr w8, w8, #4
; CHECK-GI-NEXT: ubfx w9, w8, #7, #1
; CHECK-GI-NEXT: add w0, w8, w9
diff --git a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
index fb2a1fa697c26..581d8e116359b 100644
--- a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
+++ b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
@@ -2832,247 +2832,246 @@ define i32 @test_sdot_v25i8(ptr nocapture readonly %a, ptr nocapture readonly %b
; CHECK-GI-NEXT: ldp q2, q1, [x1]
; CHECK-GI-NEXT: movi d0, #0000000000000000
; CHECK-GI-NEXT: str w2, [sp, #12] // 4-byte Folded Spill
-; CHECK-GI-NEXT: mov b5, v2.b[2]
; CHECK-GI-NEXT: mov b6, v2.b[3]
; CHECK-GI-NEXT: mov b7, v2.b[4]
; CHECK-GI-NEXT: mov b16, v2.b[5]
+; CHECK-GI-NEXT: mov b19, v2.b[8]
+; CHECK-GI-NEXT: mov b4, v2.b[1]
+; CHECK-GI-NEXT: mov b5, v2.b[2]
; CHECK-GI-NEXT: mov b17, v2.b[6]
; CHECK-GI-NEXT: mov b18, v2.b[7]
-; CHECK-GI-NEXT: mov b19, v2.b[8]
; CHECK-GI-NEXT: mov b20, v2.b[9]
-; CHECK-GI-NEXT: mov b21, v2.b[15]
-; CHECK-GI-NEXT: mov b3, v2.b[1]
-; CHECK-GI-NEXT: fmov w19, s2
-; CHECK-GI-NEXT: mov b22, v1.b[6]
-; CHECK-GI-NEXT: fmov w6, s5
-; CHECK-GI-NEXT: mov b5, v2.b[10]
-; CHECK-GI-NEXT: fmov w14, s6
-; CHECK-GI-NEXT: mov b6, v2.b[11]
+; CHECK-GI-NEXT: mov b21, v2.b[10]
+; CHECK-GI-NEXT: mov b22, v2.b[11]
+; CHECK-GI-NEXT: fmov w7, s2
+; CHECK-GI-NEXT: fmov w13, s6
+; CHECK-GI-NEXT: mov b6, v2.b[12]
; CHECK-GI-NEXT: fmov w2, s7
-; CHECK-GI-NEXT: stp s17, s18, [sp, #4] // 8-byte Folded Spill
-; CHECK-GI-NEXT: mov b7, v2.b[12]
+; CHECK-GI-NEXT: mov b7, v2.b[13]
; CHECK-GI-NEXT: fmov w11, s16
-; CHECK-GI-NEXT: sxtb w28, w19
-; CHECK-GI-NEXT: mov b16, v2.b[13]
-; CHECK-GI-NEXT: mov b18, v1.b[1]
-; CHECK-GI-NEXT: sxtb w6, w6
-; CHECK-GI-NEXT: mov b17, v2.b[14]
-; CHECK-GI-NEXT: ldp q4, q2, [x0]
-; CHECK-GI-NEXT: fmov w25, s19
+; CHECK-GI-NEXT: mov b16, v2.b[14]
+; CHECK-GI-NEXT: mov b23, v2.b[15]
+; CHECK-GI-NEXT: ldp q3, q2, [x0]
+; CHECK-GI-NEXT: fmov w26, s19
+; CHECK-GI-NEXT: fmov w19, s4
+; CHECK-GI-NEXT: stp s17, s18, [sp, #4] // 8-byte Folded Spill
+; CHECK-GI-NEXT: fmov w29, s5
; CHECK-GI-NEXT: fmov w24, s20
-; CHECK-GI-NEXT: fmov w22, s5
-; CHECK-GI-NEXT: mov b5, v1.b[2]
-; CHECK-GI-NEXT: fmov w0, s6
-; CHECK-GI-NEXT: sxtb w14, w14
-; CHECK-GI-NEXT: mov b20, v1.b[3]
-; CHECK-GI-NEXT: fmov w16, s7
-; CHECK-GI-NEXT: mov b7, v1.b[4]
-; CHECK-GI-NEXT: fmov w15, s16
-; CHECK-GI-NEXT: sxtb w25, w25
+; CHECK-GI-NEXT: sxtb w8, w7
+; CHECK-GI-NEXT: mov b4, v3.b[2]
+; CHECK-GI-NEXT: mov b5, v3.b[1]
+; CHECK-GI-NEXT: sxtb w13, w13
+; CHECK-GI-NEXT: mov b17, v1.b[1]
+; CHECK-GI-NEXT: fmov w22, s21
+; CHECK-GI-NEXT: sxtb w26, w26
+; CHECK-GI-NEXT: mov b18, v1.b[2]
+; CHECK-GI-NEXT: fmov w18, s22
; CHECK-GI-NEXT: sxtb w24, w24
-; CHECK-GI-NEXT: mov b16, v1.b[5]
-; CHECK-GI-NEXT: fmov w13, s21
+; CHECK-GI-NEXT: mov b19, v1.b[3]
+; CHECK-GI-NEXT: fmov w16, s6
+; CHECK-GI-NEXT: sxtb w19, w19
+; CHECK-GI-NEXT: mov b21, v1.b[4]
+; CHECK-GI-NEXT: fmov w15, s7
; CHECK-GI-NEXT: sxtb w22, w22
-; CHECK-GI-NEXT: mov b6, v4.b[2]
-; CHECK-GI-NEXT: fmov w26, s18
-; CHECK-GI-NEXT: sxtb w0, w0
-; CHECK-GI-NEXT: mov b21, v1.b[7]
-; CHECK-GI-NEXT: mov b18, v4.b[4]
-; CHECK-GI-NEXT: fmov w7, s3
-; CHECK-GI-NEXT: mov b3, v4.b[1]
-; CHECK-GI-NEXT: fmov w12, s17
-; CHECK-GI-NEXT: fmov w5, s5
-; CHECK-GI-NEXT: mov b19, v4.b[3]
-; CHECK-GI-NEXT: fmov w4, s20
-; CHECK-GI-NEXT: fmov w3, s7
-; CHECK-GI-NEXT: sxtb w29, w7
-; CHECK-GI-NEXT: mov b17, v4.b[5]
-; CHECK-GI-NEXT: fmov w1, s16
-; CHECK-GI-NEXT: sxtb w5, w5
-; CHECK-GI-NEXT: mov b16, v4.b[6]
-; CHECK-GI-NEXT: fmov w18, s22
-; CHECK-GI-NEXT: mov b7, v4.b[7]
-; CHECK-GI-NEXT: fmov w17, s21
-; CHECK-GI-NEXT: mov b5, v4.b[8]
-; CHECK-GI-NEXT: mov b20, v4.b[9]
-; CHECK-GI-NEXT: fmov w27, s6
-; CHECK-GI-NEXT: mov b6, v4.b[10]
-; CHECK-GI-NEXT: mov b21, v4.b[11]
-; CHECK-GI-NEXT: fmov w21, s18
-; CHECK-GI-NEXT: mov b18, v4.b[12]
-; CHECK-GI-NEXT: mov b22, v4.b[13]
-; CHECK-GI-NEXT: mov b23, v4.b[14]
-; CHECK-GI-NEXT: fmov w10, s4
+; CHECK-GI-NEXT: mov b7, v1.b[5]
+; CHECK-GI-NEXT: mov b6, v3.b[3]
+; CHECK-GI-NEXT: sxtb w11, w11
+; CHECK-GI-NEXT: fmov w12, s23
+; CHECK-GI-NEXT: mov b22, v1.b[6]
+; CHECK-GI-NEXT: mov b23, v1.b[7]
+; CHECK-GI-NEXT: mov b20, v3.b[4]
+; CHECK-GI-NEXT: fmov w28, s4
+; CHECK-GI-NEXT: fmov s4, w26
+; CHECK-GI-NEXT: fmov w14, s16
+; CHECK-GI-NEXT: fmov w27, s17
+; CHECK-GI-NEXT: fmov w5, s18
+; CHECK-GI-NEXT: sxtb w12, w12
+; CHECK-GI-NEXT: fmov w4, s19
+; CHECK-GI-NEXT: mov b19, v3.b[5]
+; CHECK-GI-NEXT: sxtb w28, w28
+; CHECK-GI-NEXT: fmov w3, s21
+; CHECK-GI-NEXT: mov b18, v3.b[6]
; CHECK-GI-NEXT: sxtb w27, w27
-; CHECK-GI-NEXT: mov b24, v4.b[15]
-; CHECK-GI-NEXT: fmov s4, w25
-; CHECK-GI-NEXT: fmov w30, s3
-; CHECK-GI-NEXT: fmov s3, w28
-; CHECK-GI-NEXT: fmov w9, s5
-; CHECK-GI-NEXT: sxtb w10, w10
-; CHECK-GI-NEXT: fmov w7, s7
-; CHECK-GI-NEXT: mov b7, v2.b[1]
+; CHECK-GI-NEXT: sxtb w5, w5
+; CHECK-GI-NEXT: fmov w1, s7
+; CHECK-GI-NEXT: mov b16, v3.b[7]
+; CHECK-GI-NEXT: fmov w0, s22
+; CHECK-GI-NEXT: mov b17, v3.b[8]
+; CHECK-GI-NEXT: fmov w17, s23
+; CHECK-GI-NEXT: mov b7, v3.b[9]
+; CHECK-GI-NEXT: fmov w30, s5
+; CHECK-GI-NEXT: mov b5, v3.b[10]
+; CHECK-GI-NEXT: mov b21, v3.b[11]
+; CHECK-GI-NEXT: fmov w25, s6
+; CHECK-GI-NEXT: mov b6, v3.b[12]
+; CHECK-GI-NEXT: fmov w23, s20
+; CHECK-GI-NEXT: mov b20, v3.b[13]
+; CHECK-GI-NEXT: mov b22, v3.b[14]
+; CHECK-GI-NEXT: fmov w6, s3
+; CHECK-GI-NEXT: mov b23, v3.b[15]
+; CHECK-GI-NEXT: fmov s3, w8
+; CHECK-GI-NEXT: fmov w8, s1
; CHECK-GI-NEXT: mov v4.h[1], w24
-; CHECK-GI-NEXT: fmov w24, s1
-; CHECK-GI-NEXT: fmov w8, s20
-; CHECK-GI-NEXT: sxtb w9, w9
-; CHECK-GI-NEXT: mov v3.h[1], w29
-; CHECK-GI-NEXT: fmov w29, s6
-; CHECK-GI-NEXT: fmov s6, w10
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: fmov w19, s16
-; CHECK-GI-NEXT: sxtb w24, w24
+; CHECK-GI-NEXT: fmov w21, s19
+; CHECK-GI-NEXT: mov b19, v2.b[1]
+; CHECK-GI-NEXT: fmov w9, s17
+; CHECK-GI-NEXT: fmov w24, s6
+; CHECK-GI-NEXT: fmov w7, s16
+; CHECK-GI-NEXT: mov b16, v2.b[2]
; CHECK-GI-NEXT: sxtb w8, w8
-; CHECK-GI-NEXT: mov b16, v2.b[3]
+; CHECK-GI-NEXT: mov v3.h[1], w19
+; CHECK-GI-NEXT: sxtb w19, w29
+; CHECK-GI-NEXT: sxtb w9, w9
+; CHECK-GI-NEXT: fmov w29, s5
+; CHECK-GI-NEXT: mov v4.h[2], w22
+; CHECK-GI-NEXT: sxtb w22, w6
+; CHECK-GI-NEXT: fmov s5, w8
+; CHECK-GI-NEXT: fmov w10, s7
+; CHECK-GI-NEXT: fmov s7, w9
+; CHECK-GI-NEXT: fmov w9, s16
+; CHECK-GI-NEXT: fmov w20, s18
; CHECK-GI-NEXT: sxtb w29, w29
-; CHECK-GI-NEXT: fmov w23, s19
-; CHECK-GI-NEXT: mov b19, v2.b[2]
+; CHECK-GI-NEXT: fmov s6, w22
+; CHECK-GI-NEXT: fmov w22, s2
; CHECK-GI-NEXT: sxtb w10, w10
-; CHECK-GI-NEXT: fmov s5, w24
-; CHECK-GI-NEXT: sxtb w24, w30
-; CHECK-GI-NEXT: mov v3.h[2], w6
-; CHECK-GI-NEXT: sxtb w6, w26
-; CHECK-GI-NEXT: fmov w28, s21
-; CHECK-GI-NEXT: sxtb w23, w23
-; CHECK-GI-NEXT: mov v6.h[1], w24
-; CHECK-GI-NEXT: fmov w24, s7
-; CHECK-GI-NEXT: fmov s7, w9
-; CHECK-GI-NEXT: fmov w9, s19
-; CHECK-GI-NEXT: mov v5.h[1], w6
-; CHECK-GI-NEXT: mov v4.h[2], w22
-; CHECK-GI-NEXT: fmov w20, s17
-; CHECK-GI-NEXT: mov b17, v2.b[4]
-; CHECK-GI-NEXT: sxtb w24, w24
-; CHECK-GI-NEXT: mov v3.h[3], w14
-; CHECK-GI-NEXT: sxtb w14, w2
+; CHECK-GI-NEXT: mov v5.h[1], w27
+; CHECK-GI-NEXT: sxtb w27, w30
; CHECK-GI-NEXT: sxtb w9, w9
-; CHECK-GI-NEXT: mov v7.h[1], w8
-; CHECK-GI-NEXT: fmov w8, s16
-; CHECK-GI-NEXT: fmov s16, w10
-; CHECK-GI-NEXT: mov v6.h[2], w27
+; CHECK-GI-NEXT: mov b18, v2.b[3]
+; CHECK-GI-NEXT: mov v3.h[2], w19
+; CHECK-GI-NEXT: sxtb w22, w22
+; CHECK-GI-NEXT: mov v6.h[1], w27
+; CHECK-GI-NEXT: fmov w27, s19
+; CHECK-GI-NEXT: mov v7.h[1], w10
+; CHECK-GI-NEXT: fmov w26, s21
+; CHECK-GI-NEXT: mov b17, v2.b[4]
+; CHECK-GI-NEXT: fmov s16, w22
; CHECK-GI-NEXT: mov v5.h[2], w5
-; CHECK-GI-NEXT: fmov w25, s18
-; CHECK-GI-NEXT: mov v4.h[3], w0
-; CHECK-GI-NEXT: sxtb w0, w4
-; CHECK-GI-NEXT: sxtb w8, w8
-; CHECK-GI-NEXT: mov b18, v2.b[5]
-; CHECK-GI-NEXT: fmov w10, s17
-; CHECK-GI-NEXT: mov v16.h[1], w24
+; CHECK-GI-NEXT: sxtb w5, w25
+; CHECK-GI-NEXT: sxtb w27, w27
+; CHECK-GI-NEXT: fmov w10, s18
+; CHECK-GI-NEXT: mov v3.h[3], w13
+; CHECK-GI-NEXT: sxtb w13, w4
+; CHECK-GI-NEXT: mov v6.h[2], w28
+; CHECK-GI-NEXT: fmov w8, s20
+; CHECK-GI-NEXT: mov v16.h[1], w27
; CHECK-GI-NEXT: mov v7.h[2], w29
-; CHECK-GI-NEXT: mov v3.h[4], w14
-; CHECK-GI-NEXT: sxtb w14, w25
-; CHECK-GI-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov b20, v2.b[5]
; CHECK-GI-NEXT: sxtb w10, w10
-; CHECK-GI-NEXT: mov v6.h[3], w23
-; CHECK-GI-NEXT: mov v5.h[3], w0
-; CHECK-GI-NEXT: fmov w26, s22
-; CHECK-GI-NEXT: mov b19, v2.b[6]
-; CHECK-GI-NEXT: fmov w27, s18
+; CHECK-GI-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: sxtb w8, w8
+; CHECK-GI-NEXT: fmov w22, s17
+; CHECK-GI-NEXT: mov v5.h[3], w13
+; CHECK-GI-NEXT: sxtb w13, w2
+; CHECK-GI-NEXT: mov v6.h[3], w5
+; CHECK-GI-NEXT: mov b21, v2.b[6]
; CHECK-GI-NEXT: mov v16.h[2], w9
-; CHECK-GI-NEXT: sxtb w9, w28
-; CHECK-GI-NEXT: fmov w22, s23
+; CHECK-GI-NEXT: sxtb w9, w18
+; CHECK-GI-NEXT: sxtb w18, w23
+; CHECK-GI-NEXT: mov v3.h[4], w13
+; CHECK-GI-NEXT: sxtb w13, w24
+; CHECK-GI-NEXT: fmov w27, s20
+; CHECK-GI-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v4.h[3], w9
+; CHECK-GI-NEXT: sxtb w9, w26
+; CHECK-GI-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v16.h[3], w10
+; CHECK-GI-NEXT: sxtb w10, w3
+; CHECK-GI-NEXT: mov v6.h[4], w18
+; CHECK-GI-NEXT: ldr w18, [sp, #4] // 4-byte Folded Reload
+; CHECK-GI-NEXT: mov v7.h[3], w9
+; CHECK-GI-NEXT: sxtb w9, w16
+; CHECK-GI-NEXT: sxtb w16, w22
+; CHECK-GI-NEXT: mov v5.h[4], w10
+; CHECK-GI-NEXT: sxtb w10, w15
+; CHECK-GI-NEXT: sxtb w18, w18
+; CHECK-GI-NEXT: mov v4.h[4], w9
+; CHECK-GI-NEXT: sxtb w9, w21
+; CHECK-GI-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v16.h[4], w16
+; CHECK-GI-NEXT: mov v7.h[4], w13
+; CHECK-GI-NEXT: ldr w13, [sp, #8] // 4-byte Folded Reload
+; CHECK-GI-NEXT: mov v6.h[5], w9
+; CHECK-GI-NEXT: sxtb w9, w1
+; CHECK-GI-NEXT: mov v3.h[5], w11
+; CHECK-GI-NEXT: sxtb w11, w27
+; CHECK-GI-NEXT: fmov w19, s22
+; CHECK-GI-NEXT: fmov w28, s21
+; CHECK-GI-NEXT: sxtb w13, w13
; CHECK-GI-NEXT: mov b17, v2.b[7]
-; CHECK-GI-NEXT: fmov w6, s24
+; CHECK-GI-NEXT: mov v5.h[5], w9
+; CHECK-GI-NEXT: sxtb w9, w0
+; CHECK-GI-NEXT: mov v4.h[5], w10
+; CHECK-GI-NEXT: sxtb w10, w20
+; CHECK-GI-NEXT: mov v7.h[5], w8
+; CHECK-GI-NEXT: mov v16.h[5], w11
+; CHECK-GI-NEXT: sxtb w8, w14
+; CHECK-GI-NEXT: sxtb w11, w28
+; CHECK-GI-NEXT: mov v6.h[6], w10
+; CHECK-GI-NEXT: sxtb w10, w19
+; CHECK-GI-NEXT: fmov w6, s23
+; CHECK-GI-NEXT: mov v5.h[6], w9
+; CHECK-GI-NEXT: fmov w9, s17
+; CHECK-GI-NEXT: mov v3.h[6], w18
+; CHECK-GI-NEXT: mov v4.h[6], w8
+; CHECK-GI-NEXT: sxtb w8, w7
+; CHECK-GI-NEXT: mov v7.h[6], w10
+; CHECK-GI-NEXT: mov v16.h[6], w11
+; CHECK-GI-NEXT: sxtb w10, w6
; CHECK-GI-NEXT: mov v0.s[1], wzr
-; CHECK-GI-NEXT: mov v7.h[3], w9
-; CHECK-GI-NEXT: sxtb w9, w11
-; CHECK-GI-NEXT: sxtb w11, w21
-; CHECK-GI-NEXT: fmov w24, s19
-; CHECK-GI-NEXT: mov v16.h[3], w8
-; CHECK-GI-NEXT: sxtb w8, w16
-; CHECK-GI-NEXT: sxtb w16, w3
-; CHECK-GI-NEXT: mov v6.h[4], w11
-; CHECK-GI-NEXT: ldr w11, [sp, #4] // 4-byte Folded Reload
-; CHECK-GI-NEXT: mov v3.h[5], w9
-; CHECK-GI-NEXT: sxtb w9, w15
-; CHECK-GI-NEXT: sxtb w15, w27
-; CHECK-GI-NEXT: mov v7.h[4], w14
-; CHECK-GI-NEXT: sxtb w14, w1
-; CHECK-GI-NEXT: sxtb w11, w11
-; CHECK-GI-NEXT: mov v4.h[4], w8
-; CHECK-GI-NEXT: sxtb w8, w20
+; CHECK-GI-NEXT: mov v6.h[7], w8
+; CHECK-GI-NEXT: sxtb w8, w17
+; CHECK-GI-NEXT: sxtb w9, w9
+; CHECK-GI-NEXT: mov v3.h[7], w13
; CHECK-GI-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT: mov v5.h[4], w16
-; CHECK-GI-NEXT: mov v16.h[4], w10
-; CHECK-GI-NEXT: sxtb w10, w26
-; CHECK-GI-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT: mov v6.h[5], w8
-; CHECK-GI-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload
-; CHECK-GI-NEXT: mov v7.h[5], w10
-; CHECK-GI-NEXT: sxtb w10, w12
-; CHECK-GI-NEXT: sxtb w12, w18
-; CHECK-GI-NEXT: mov v4.h[5], w9
-; CHECK-GI-NEXT: sxtb w9, w19
-; CHECK-GI-NEXT: mov v5.h[5], w14
-; CHECK-GI-NEXT: sxtb w8, w8
-; CHECK-GI-NEXT: mov v16.h[5], w15
-; CHECK-GI-NEXT: mov v3.h[6], w11
-; CHECK-GI-NEXT: sxtb w11, w22
-; CHECK-GI-NEXT: mov v6.h[6], w9
-; CHECK-GI-NEXT: sxtb w9, w13
-; CHECK-GI-NEXT: sxtb w13, w24
+; CHECK-GI-NEXT: mov v4.h[7], w12
+; CHECK-GI-NEXT: mov v5.h[7], w8
+; CHECK-GI-NEXT: mov v7.h[7], w10
+; CHECK-GI-NEXT: mov v16.h[7], w9
+; CHECK-GI-NEXT: smov w8, v1.b[8]
+; CHECK-GI-NEXT: smov w9, v2.b[8]
; CHECK-GI-NEXT: mov v0.s[2], wzr
-; CHECK-GI-NEXT: mov v7.h[6], w11
-; CHECK-GI-NEXT: fmov w11, s17
-; CHECK-GI-NEXT: mov v4.h[6], w10
-; CHECK-GI-NEXT: sxtb w10, w7
-; CHECK-GI-NEXT: mov v5.h[6], w12
-; CHECK-GI-NEXT: mov v16.h[6], w13
-; CHECK-GI-NEXT: mov v3.h[7], w8
-; CHECK-GI-NEXT: sxtb w8, w6
-; CHECK-GI-NEXT: smov w12, v1.b[8]
-; CHECK-GI-NEXT: mov v6.h[7], w10
-; CHECK-GI-NEXT: sxtb w10, w17
-; CHECK-GI-NEXT: sxtb w11, w11
-; CHECK-GI-NEXT: mov v4.h[7], w9
-; CHECK-GI-NEXT: mov v7.h[7], w8
-; CHECK-GI-NEXT: smov w8, v2.b[8]
-; CHECK-GI-NEXT: mov v5.h[7], w10
-; CHECK-GI-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
-; CHECK-GI-NEXT: mov v16.h[7], w11
-; CHECK-GI-NEXT: mov v...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/167311
More information about the llvm-commits
mailing list