[llvm] [X86][CodeGen] Not compress EVEX into VEX when R16-R31 is used (PR #73604)

Mon Nov 27 17:57:45 PST 2023

https://github.com/KanRobert created https://github.com/llvm/llvm-project/pull/73604

b/c VEX prefix can not encode R16-R31.


>From 44dc80a4db90727ec1883ed0a723aff821982a9a Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Tue, 28 Nov 2023 09:55:13 +0800
Subject: [PATCH] [X86][CodeGen] Not compress EVEX into VEX when R16-R31 is
 used

b/c VEX prefix can not encode R16-R31.
---
 llvm/lib/Target/X86/X86EvexToVex.cpp     |  4 +++
 llvm/test/CodeGen/X86/apx/evex-to-vex.ll | 34 ++++++++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/apx/evex-to-vex.ll

diff --git a/llvm/lib/Target/X86/X86EvexToVex.cpp b/llvm/lib/Target/X86/X86EvexToVex.cpp
index 88366558562c51e..fda6c15fed34db7 100644
--- a/llvm/lib/Target/X86/X86EvexToVex.cpp
+++ b/llvm/lib/Target/X86/X86EvexToVex.cpp
@@ -125,6 +125,10 @@ static bool usesExtendedRegister(const MachineInstr &MI) {
     if (Reg >= X86::YMM16 && Reg <= X86::YMM31)
       return true;
 
+    // Check for GPR with indexes between 16 - 31.
+    if (X86II::isApxExtendedReg(Reg))
+      return true;
+
     return false;
   };
 
diff --git a/llvm/test/CodeGen/X86/apx/evex-to-vex.ll b/llvm/test/CodeGen/X86/apx/evex-to-vex.ll
new file mode 100644
index 000000000000000..51a547c9a3a5131
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/evex-to-vex.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; Check EVEX is not compressed into VEX when egpr is used.
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+egpr -show-mc-encoding | FileCheck %s
+
+define void @test_x86_vcvtps2ph_256_m(ptr nocapture %d, <8 x float> %a) nounwind {
+; CHECK-LABEL: test_x86_vcvtps2ph_256_m:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rbp # encoding: [0x55]
+; CHECK-NEXT:    pushq %r15 # encoding: [0x41,0x57]
+; CHECK-NEXT:    pushq %r14 # encoding: [0x41,0x56]
+; CHECK-NEXT:    pushq %r13 # encoding: [0x41,0x55]
+; CHECK-NEXT:    pushq %r12 # encoding: [0x41,0x54]
+; CHECK-NEXT:    pushq %rbx # encoding: [0x53]
+; CHECK-NEXT:    movq %rdi, %r16 # encoding: [0xd5,0x18,0x89,0xf8]
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    vcvtps2ph $3, %ymm0, (%r16) # encoding: [0x62,0xfb,0x7d,0x28,0x1d,0x00,0x03]
+; CHECK-NEXT:    popq %rbx # encoding: [0x5b]
+; CHECK-NEXT:    popq %r12 # encoding: [0x41,0x5c]
+; CHECK-NEXT:    popq %r13 # encoding: [0x41,0x5d]
+; CHECK-NEXT:    popq %r14 # encoding: [0x41,0x5e]
+; CHECK-NEXT:    popq %r15 # encoding: [0x41,0x5f]
+; CHECK-NEXT:    popq %rbp # encoding: [0x5d]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = load i32, ptr %d, align 4
+  call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{esp},~{r8d},~{r9d},~{r10d},~{r11d},~{r12d},~{r13d},~{r14d},~{r15d},~{dirflag},~{fpsr},~{flags}"()
+  %1 = tail call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a, i32 3)
+  store <8 x i16> %1, ptr %d, align 16
+  ret void
+}
+
+declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readonly