[PATCH] D156240: [X86] AMD Genoa (znver4) LoopMicroOpBufferSize update
Ganesh Gopalasubramanian via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 25 07:35:04 PDT 2023
GGanesh created this revision.
GGanesh added reviewers: RKSimon, craig.topper, andreadb, mnagaraj.
GGanesh added a project: LLVM.
Herald added subscribers: pengfei, zzheng, hiraditya.
Herald added a project: All.
GGanesh requested review of this revision.
Herald added a subscriber: llvm-commits.
Change LoopMicroOpBufferSize to 512 similar to znver3 value. This is done so as to support unrolling threshold.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D156240
Files:
llvm/lib/Target/X86/X86ScheduleZnver4.td
llvm/test/Transforms/LoopUnroll/X86/call-remark.ll
Index: llvm/test/Transforms/LoopUnroll/X86/call-remark.ll
===================================================================
--- llvm/test/Transforms/LoopUnroll/X86/call-remark.ll
+++ llvm/test/Transforms/LoopUnroll/X86/call-remark.ll
@@ -1,5 +1,6 @@
; RUN: opt -passes=debugify,loop-unroll -mcpu=znver3 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 | FileCheck --check-prefixes=ALL,UNROLL %s
; RUN: opt -passes=debugify,loop-unroll -mcpu=znver3 -pass-remarks=TTI -pass-remarks-analysis=TTI < %s -S 2>&1 | FileCheck --check-prefixes=ALL,TTI %s
+; RUN: opt -passes=debugify,loop-unroll -mcpu=znver4 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 | FileCheck --check-prefixes=ALL,UNROLL %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
Index: llvm/lib/Target/X86/X86ScheduleZnver4.td
===================================================================
--- llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -35,9 +35,10 @@
// with large values here the compilation of certain loops
// ends up taking way too long.
// Ideally for znver4, we should have 6.75K. However we don't add that
- // considerting the impact compile time and prefer using default values
+ // considerting the impact compile time and prefer using default values
// instead.
- // let LoopMicroOpBufferSize = 6750;
+ // Retaining minimal value to influence unrolling as we did for znver3.
+ let LoopMicroOpBufferSize = 512;
// AMD SOG 19h, 2.6.2 L1 Data Cache
// The L1 data cache has a 4- or 5- cycle integer load-to-use latency.
// AMD SOG 19h, 2.12 L1 Data Cache
@@ -1538,7 +1539,7 @@
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVFIXUPIMMPDZrr_VRANGESDrr], (instregex
- "VFIXUPIMM(S|P)(S|D)(Z|Z128|Z256?)rrik", "VFIXUPIMM(S|P)(S|D)(Z?|Z128?|Z256?)rrikz",
+ "VFIXUPIMM(S|P)(S|D)(Z|Z128|Z256?)rrik", "VFIXUPIMM(S|P)(S|D)(Z?|Z128?|Z256?)rrikz",
"VFIXUPIMM(S|P)(S|D)(Z128|Z256?)rri", "VRANGE(S|P)(S|D)(Z?|Z128?|Z256?)rri(b?)",
"VRANGE(S|P)(S|D)(Z|Z128|Z256?)rri(b?)k","VRANGE(S|P)(S|D)(Z?|Z128?|Z256?)rri(b?)kz"
)>;
@@ -1790,20 +1791,20 @@
let ResourceCycles = [2];
let NumMicroOps = 1;
}
-def : InstRW<[Zn4VecALUZSlow], (instrs
- VPABSBZ128rr, VPABSBZ128rrk, VPABSBZ128rrkz, VPABSDZ128rr,
- VPABSDZ128rrk, VPABSDZ128rrkz, VPABSQZ128rr, VPABSQZ128rrk,
- VPABSQZ128rrkz, VPABSWZ128rr, VPABSWZ128rrk, VPABSWZ128rrkz,
- VPADDSBZ128rr, VPADDSBZ128rrk, VPADDSBZ128rrkz, VPADDSWZ128rr,
- VPADDSWZ128rrk, VPADDSWZ128rrkz,VPADDUSBZ128rr, VPADDUSBZ128rrk,
- VPADDUSBZ128rrkz, VPADDUSWZ128rr, VPADDUSWZ128rrk, VPADDUSWZ128rrkz,
- VPAVGBZ128rr, VPAVGBZ128rrk, VPAVGBZ128rrkz, VPAVGWZ128rr,
- VPAVGWZ128rrk, VPAVGWZ128rrkz, VPOPCNTBZ128rr, VPOPCNTBZ128rrk,
- VPOPCNTBZ128rrkz, VPOPCNTDZ128rr, VPOPCNTDZ128rrk, VPOPCNTDZ128rrkz,
- VPOPCNTQZ128rr, VPOPCNTQZ128rrk,VPOPCNTQZ128rrkz, VPOPCNTWZ128rr,
- VPOPCNTWZ128rrk, VPOPCNTWZ128rrkz,VPSUBSBZ128rr, VPSUBSBZ128rrk,
- VPSUBSBZ128rrkz, VPSUBSWZ128rr, VPSUBSWZ128rrk, VPSUBSWZ128rrkz,
- VPSUBUSBZ128rr, VPSUBUSBZ128rrk, VPSUBUSBZ128rrkz,VPSUBUSWZ128rr,
+def : InstRW<[Zn4VecALUZSlow], (instrs
+ VPABSBZ128rr, VPABSBZ128rrk, VPABSBZ128rrkz, VPABSDZ128rr,
+ VPABSDZ128rrk, VPABSDZ128rrkz, VPABSQZ128rr, VPABSQZ128rrk,
+ VPABSQZ128rrkz, VPABSWZ128rr, VPABSWZ128rrk, VPABSWZ128rrkz,
+ VPADDSBZ128rr, VPADDSBZ128rrk, VPADDSBZ128rrkz, VPADDSWZ128rr,
+ VPADDSWZ128rrk, VPADDSWZ128rrkz,VPADDUSBZ128rr, VPADDUSBZ128rrk,
+ VPADDUSBZ128rrkz, VPADDUSWZ128rr, VPADDUSWZ128rrk, VPADDUSWZ128rrkz,
+ VPAVGBZ128rr, VPAVGBZ128rrk, VPAVGBZ128rrkz, VPAVGWZ128rr,
+ VPAVGWZ128rrk, VPAVGWZ128rrkz, VPOPCNTBZ128rr, VPOPCNTBZ128rrk,
+ VPOPCNTBZ128rrkz, VPOPCNTDZ128rr, VPOPCNTDZ128rrk, VPOPCNTDZ128rrkz,
+ VPOPCNTQZ128rr, VPOPCNTQZ128rrk,VPOPCNTQZ128rrkz, VPOPCNTWZ128rr,
+ VPOPCNTWZ128rrk, VPOPCNTWZ128rrkz,VPSUBSBZ128rr, VPSUBSBZ128rrk,
+ VPSUBSBZ128rrkz, VPSUBSWZ128rr, VPSUBSWZ128rrk, VPSUBSWZ128rrkz,
+ VPSUBUSBZ128rr, VPSUBUSBZ128rrk, VPSUBUSBZ128rrkz,VPSUBUSWZ128rr,
VPSUBUSWZ128rrk, VPSUBUSWZ128rrkz
)>;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D156240.543966.patch
Type: text/x-patch
Size: 4303 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230725/3f85f97d/attachment.bin>
More information about the llvm-commits
mailing list