[llvm] r184664 - Add MI-Sched support for x86 macro fusion.

Andrew Trick atrick at apple.com
Sun Jun 23 19:09:41 PDT 2013


On Jun 23, 2013, at 6:17 AM, Hal Finkel <hfinkel at anl.gov> wrote:

> ----- Original Message -----
>> Author: atrick
>> Date: Sun Jun 23 04:00:28 2013
>> New Revision: 184664
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=184664&view=rev
>> Log:
>> Add MI-Sched support for x86 macro fusion.
>> 
>> This is an awful implementation of the target hook. But we don't have
>> abstractions yet for common machine ops, and I don't see any quick
>> way
>> to make it table-driven.
> 
> This may not be 'quick' (or sufficiently clean), but if you were to define pseudo-instructions to represent FuseTest, FuseCmp, FuseInc, then you could use TableGen's InstrMapping backend to generate a query function for these.

Yes, exactly. That’s how I would like it to work. It would take me a bit of time to figure out how, and I’m also not sure it will be cleaner. I’m can’t say when I’ll get to this cleanup task, so if anyone else wants to tackle it, be my guest.

Thanks for the suggestion,
-Andy

>> Added:
>>    llvm/trunk/test/CodeGen/X86/misched-fusion.ll
>> Modified:
>>    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
>>    llvm/trunk/lib/Target/X86/X86InstrInfo.h
>> 
>> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=184664&r1=184663&r2=184664&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
>> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Jun 23 04:00:28
>> 2013
>> @@ -4647,6 +4647,167 @@ bool X86InstrInfo::shouldScheduleLoadsNe
>>   return true;
>> }
>> 
>> +bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First,
>> +                                          MachineInstr *Second)
>> const {
>> +  // Check if this processor supports macro-fusion. Since this is a
>> minor
>> +  // heuristic, we haven't specifically reserved a feature. hasAVX
>> is a decent
>> +  // proxy for SandyBridge+.
>> +  if (!TM.getSubtarget<X86Subtarget>().hasAVX())
>> +    return false;
>> +
>> +  enum {
>> +    FuseTest,
>> +    FuseCmp,
>> +    FuseInc
>> +  } FuseKind;
>> +
>> +  switch(Second->getOpcode()) {
>> +  default:
>> +    return false;
>> +  case X86::JE_4:
>> +  case X86::JNE_4:
>> +  case X86::JL_4:
>> +  case X86::JLE_4:
>> +  case X86::JG_4:
>> +  case X86::JGE_4:
>> +    FuseKind = FuseInc;
>> +    break;
>> +  case X86::JB_4:
>> +  case X86::JBE_4:
>> +  case X86::JA_4:
>> +  case X86::JAE_4:
>> +    FuseKind = FuseCmp;
>> +    break;
>> +  case X86::JS_4:
>> +  case X86::JNS_4:
>> +  case X86::JP_4:
>> +  case X86::JNP_4:
>> +  case X86::JO_4:
>> +  case X86::JNO_4:
>> +    FuseKind = FuseTest;
>> +    break;
>> +  }
>> +  switch (First->getOpcode()) {
>> +  default:
>> +    return false;
>> +  case X86::TEST8rr:
>> +  case X86::TEST16rr:
>> +  case X86::TEST32rr:
>> +  case X86::TEST64rr:
>> +  case X86::TEST8ri:
>> +  case X86::TEST16ri:
>> +  case X86::TEST32ri:
>> +  case X86::TEST32i32:
>> +  case X86::TEST64i32:
>> +  case X86::TEST64ri32:
>> +  case X86::TEST8rm:
>> +  case X86::TEST16rm:
>> +  case X86::TEST32rm:
>> +  case X86::TEST64rm:
>> +  case X86::AND16i16:
>> +  case X86::AND16ri:
>> +  case X86::AND16ri8:
>> +  case X86::AND16rm:
>> +  case X86::AND16rr:
>> +  case X86::AND32i32:
>> +  case X86::AND32ri:
>> +  case X86::AND32ri8:
>> +  case X86::AND32rm:
>> +  case X86::AND32rr:
>> +  case X86::AND64i32:
>> +  case X86::AND64ri32:
>> +  case X86::AND64ri8:
>> +  case X86::AND64rm:
>> +  case X86::AND64rr:
>> +  case X86::AND8i8:
>> +  case X86::AND8ri:
>> +  case X86::AND8rm:
>> +  case X86::AND8rr:
>> +    return true;
>> +  case X86::CMP16i16:
>> +  case X86::CMP16ri:
>> +  case X86::CMP16ri8:
>> +  case X86::CMP16rm:
>> +  case X86::CMP16rr:
>> +  case X86::CMP32i32:
>> +  case X86::CMP32ri:
>> +  case X86::CMP32ri8:
>> +  case X86::CMP32rm:
>> +  case X86::CMP32rr:
>> +  case X86::CMP64i32:
>> +  case X86::CMP64ri32:
>> +  case X86::CMP64ri8:
>> +  case X86::CMP64rm:
>> +  case X86::CMP64rr:
>> +  case X86::CMP8i8:
>> +  case X86::CMP8ri:
>> +  case X86::CMP8rm:
>> +  case X86::CMP8rr:
>> +  case X86::ADD16i16:
>> +  case X86::ADD16ri:
>> +  case X86::ADD16ri8:
>> +  case X86::ADD16ri8_DB:
>> +  case X86::ADD16ri_DB:
>> +  case X86::ADD16rm:
>> +  case X86::ADD16rr:
>> +  case X86::ADD16rr_DB:
>> +  case X86::ADD32i32:
>> +  case X86::ADD32ri:
>> +  case X86::ADD32ri8:
>> +  case X86::ADD32ri8_DB:
>> +  case X86::ADD32ri_DB:
>> +  case X86::ADD32rm:
>> +  case X86::ADD32rr:
>> +  case X86::ADD32rr_DB:
>> +  case X86::ADD64i32:
>> +  case X86::ADD64ri32:
>> +  case X86::ADD64ri32_DB:
>> +  case X86::ADD64ri8:
>> +  case X86::ADD64ri8_DB:
>> +  case X86::ADD64rm:
>> +  case X86::ADD64rr:
>> +  case X86::ADD64rr_DB:
>> +  case X86::ADD8i8:
>> +  case X86::ADD8mi:
>> +  case X86::ADD8mr:
>> +  case X86::ADD8ri:
>> +  case X86::ADD8rm:
>> +  case X86::ADD8rr:
>> +  case X86::SUB16i16:
>> +  case X86::SUB16ri:
>> +  case X86::SUB16ri8:
>> +  case X86::SUB16rm:
>> +  case X86::SUB16rr:
>> +  case X86::SUB32i32:
>> +  case X86::SUB32ri:
>> +  case X86::SUB32ri8:
>> +  case X86::SUB32rm:
>> +  case X86::SUB32rr:
>> +  case X86::SUB64i32:
>> +  case X86::SUB64ri32:
>> +  case X86::SUB64ri8:
>> +  case X86::SUB64rm:
>> +  case X86::SUB64rr:
>> +  case X86::SUB8i8:
>> +  case X86::SUB8ri:
>> +  case X86::SUB8rm:
>> +  case X86::SUB8rr:
>> +    return FuseKind == FuseCmp || FuseKind == FuseInc;
>> +  case X86::INC16r:
>> +  case X86::INC32r:
>> +  case X86::INC64_16r:
>> +  case X86::INC64_32r:
>> +  case X86::INC64r:
>> +  case X86::INC8r:
>> +  case X86::DEC16r:
>> +  case X86::DEC32r:
>> +  case X86::DEC64_16r:
>> +  case X86::DEC64_32r:
>> +  case X86::DEC64r:
>> +  case X86::DEC8r:
>> +    return FuseKind == FuseInc;
>> +  }
>> +}
>> 
>> bool X86InstrInfo::
>> ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
>> {
>> 
>> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=184664&r1=184663&r2=184664&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
>> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Sun Jun 23 04:00:28 2013
>> @@ -339,6 +339,9 @@ public:
>>                                        int64_t Offset1, int64_t
>>                                        Offset2,
>>                                        unsigned NumLoads) const;
>> 
>> +  virtual bool shouldScheduleAdjacent(MachineInstr* First,
>> +                                      MachineInstr *Second) const
>> LLVM_OVERRIDE;
>> +
>>   virtual void getNoopForMachoTarget(MCInst &NopInst) const;
>> 
>>   virtual
>> 
>> Added: llvm/trunk/test/CodeGen/X86/misched-fusion.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/misched-fusion.ll?rev=184664&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/misched-fusion.ll (added)
>> +++ llvm/trunk/test/CodeGen/X86/misched-fusion.ll Sun Jun 23 04:00:28
>> 2013
>> @@ -0,0 +1,108 @@
>> +; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -disable-lsr
>> -pre-RA-sched=source -enable-misched -verify-machineinstrs |
>> FileCheck %s
>> +
>> +; Verify that TEST+JE are scheduled together.
>> +; CHECK: test_je
>> +; CHECK: %loop
>> +; CHECK: test
>> +; CHECK-NEXT: je
>> +define void @test_je() {
>> +entry:
>> +  br label %loop
>> +
>> +loop:
>> +  %var = phi i32* [ null, %entry ], [ %next.load, %loop1 ], [ %var,
>> %loop2 ]
>> +  %next.ptr = phi i32** [ null, %entry ], [ %next.ptr, %loop1 ], [
>> %gep, %loop2 ]
>> +  br label %loop1
>> +
>> +loop1:
>> +  %cond = icmp eq i32* %var, null
>> +  %next.load = load i32** %next.ptr
>> +  br i1 %cond, label %loop, label %loop2
>> +
>> +loop2:                                           ; preds = %loop1
>> +  %gep = getelementptr inbounds i32** %next.ptr, i32 1
>> +  store i32* %next.load, i32** undef
>> +  br label %loop
>> +}
>> +
>> +; Verify that DEC+JE are scheduled together.
>> +; CHECK: dec_je
>> +; CHECK: %loop1
>> +; CHECK: dec
>> +; CHECK-NEXT: je
>> +define void @dec_je() {
>> +entry:
>> +  br label %loop
>> +
>> +loop:
>> +  %var = phi i32 [ 0, %entry ], [ %next.var, %loop1 ], [ %var2,
>> %loop2 ]
>> +  %next.ptr = phi i32** [ null, %entry ], [ %next.ptr, %loop1 ], [
>> %gep, %loop2 ]
>> +  br label %loop1
>> +
>> +loop1:
>> +  %var2 = sub i32 %var, 1
>> +  %cond = icmp eq i32 %var2, 0
>> +  %next.load = load i32** %next.ptr
>> +  %next.var = load i32* %next.load
>> +  br i1 %cond, label %loop, label %loop2
>> +
>> +loop2:
>> +  %gep = getelementptr inbounds i32** %next.ptr, i32 1
>> +  store i32* %next.load, i32** undef
>> +  br label %loop
>> +}
>> +
>> +; DEC+JS should *not* be scheduled together.
>> +; CHECK: dec_js
>> +; CHECK: %loop1
>> +; CHECK: dec
>> +; CHECK: mov
>> +; CHECK: js
>> +define void @dec_js() {
>> +entry:
>> +  br label %loop2a
>> +
>> +loop2a:                                           ; preds = %loop1,
>> %body, %entry
>> +  %var = phi i32 [ 0, %entry ], [ %next.var, %loop1 ], [ %var2,
>> %loop2b ]
>> +  %next.ptr = phi i32** [ null, %entry ], [ %next.ptr, %loop1 ], [
>> %gep, %loop2b ]
>> +  br label %loop1
>> +
>> +loop1:                                            ; preds = %loop2a,
>> %loop2b
>> +  %var2 = sub i32 %var, 1
>> +  %cond = icmp slt i32 %var2, 0
>> +  %next.load = load i32** %next.ptr
>> +  %next.var = load i32* %next.load
>> +  br i1 %cond, label %loop2a, label %loop2b
>> +
>> +loop2b:                                           ; preds = %loop1
>> +  %gep = getelementptr inbounds i32** %next.ptr, i32 1
>> +  store i32* %next.load, i32** undef
>> +  br label %loop2a
>> +}
>> +
>> +; Verify that CMP+JB are scheduled together.
>> +; CHECK: cmp_jb
>> +; CHECK: %loop1
>> +; CHECK: cmp
>> +; CHECK-NEXT: jb
>> +define void @cmp_jb(i32 %n) {
>> +entry:
>> +  br label %loop2a
>> +
>> +loop2a:                                           ; preds = %loop1,
>> %body, %entry
>> +  %var = phi i32 [ 0, %entry ], [ %next.var, %loop1 ], [ %var2,
>> %loop2b ]
>> +  %next.ptr = phi i32** [ null, %entry ], [ %next.ptr, %loop1 ], [
>> %gep, %loop2b ]
>> +  br label %loop1
>> +
>> +loop1:                                            ; preds = %loop2a,
>> %loop2b
>> +  %var2 = sub i32 %var, 1
>> +  %cond = icmp ult i32 %var2, %n
>> +  %next.load = load i32** %next.ptr
>> +  %next.var = load i32* %next.load
>> +  br i1 %cond, label %loop2a, label %loop2b
>> +
>> +loop2b:                                           ; preds = %loop1
>> +  %gep = getelementptr inbounds i32** %next.ptr, i32 1
>> +  store i32* %next.load, i32** undef
>> +  br label %loop2a
>> +}
>> 
>> 
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>> 
> 
> -- 
> Hal Finkel
> Assistant Computational Scientist
> Leadership Computing Facility
> Argonne National Laboratory

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130623/aeea2518/attachment.html>


More information about the llvm-commits mailing list