[llvm] [X86] Fold shift into GF2P8AFFINEQB instruction (PR #180019)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 14 04:43:04 PST 2026
================
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+gfni | FileCheck %s
+
+; Test that shift operations on gf2p8affineqb results are folded
+; into the matrix transformation.
+
+define <16 x i8> @test_shl1_identity(<16 x i8> %src) {
+; CHECK-LABEL: test_shl1_identity:
+; CHECK: # %bb.0:
+; CHECK-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %1 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src,
+ <16 x i8> <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>, i8 0)
+ %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %2
+}
+
+define <16 x i8> @test_shl2_identity(<16 x i8> %src) {
+; CHECK-LABEL: test_shl2_identity:
+; CHECK: # %bb.0:
+; CHECK-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %1 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src,
+ <16 x i8> <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>, i8 0)
+ %2 = shl <16 x i8> %1, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2,
+ i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+ ret <16 x i8> %2
+}
+
+; Test with non-zero immediate: shift is folded into both matrix and immediate
+define <16 x i8> @test_shl1_nonzero_imm(<16 x i8> %src) {
+; CHECK-LABEL: test_shl1_nonzero_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: gf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %1 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src,
+ <16 x i8> <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>, i8 1)
+ %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %2
+}
+
+declare <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8)
----------------
RKSimon wrote:
256 and 512 test coverage
https://github.com/llvm/llvm-project/pull/180019
More information about the llvm-commits
mailing list