[llvm] r320397 - [X86] Add ADD/SUB schedule tests

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 11 10:13:40 PST 2017


Author: rksimon
Date: Mon Dec 11 10:13:40 2017
New Revision: 320397

URL: http://llvm.org/viewvc/llvm-project?rev=320397&view=rev
Log:
[X86] Add ADD/SUB schedule tests

Modified:
    llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll

Modified: llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll?rev=320397&r1=320396&r2=320397&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll Mon Dec 11 10:13:40 2017
@@ -758,832 +758,1473 @@ define void @test_adcx(i32 %a0, i32* %a1
   ret void
 }
 
-; TODO - test_add
-
-define void @test_adox(i32 %a0, i32* %a1, i64 %a2, i64* %a3) optsize {
-; GENERIC-LABEL: test_adox:
+define void @test_add_8(i8 %a0, i8* %a1) optsize {
+; GENERIC-LABEL: test_add_8:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    adoxl %edi, %edi # sched: [1:0.33]
-; GENERIC-NEXT:    adoxq %rdx, %rdx # sched: [1:0.33]
-; GENERIC-NEXT:    adoxl (%rsi), %edi # sched: [5:0.50]
-; GENERIC-NEXT:    adoxq (%rcx), %rdx # sched: [5:0.50]
+; GENERIC-NEXT:    addb $7, %al # sched: [1:0.33]
+; GENERIC-NEXT:    addb $7, %dil # sched: [1:0.33]
+; GENERIC-NEXT:    addb $7, (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    addb %dil, %dil # sched: [1:0.33]
+; GENERIC-NEXT:    addb %dil, (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    addb (%rsi), %dil # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
-; ATOM-LABEL: test_adox:
+; ATOM-LABEL: test_add_8:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    adoxl %edi, %edi # sched: [1:0.50]
-; ATOM-NEXT:    adoxq %rdx, %rdx # sched: [1:0.50]
-; ATOM-NEXT:    adoxl (%rsi), %edi # sched: [1:1.00]
-; ATOM-NEXT:    adoxq (%rcx), %rdx # sched: [1:1.00]
+; ATOM-NEXT:    addb $7, %al # sched: [1:0.50]
+; ATOM-NEXT:    addb $7, %dil # sched: [1:0.50]
+; ATOM-NEXT:    addb $7, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    addb %dil, %dil # sched: [1:0.50]
+; ATOM-NEXT:    addb %dil, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    addb (%rsi), %dil # sched: [1:1.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
-; SLM-LABEL: test_adox:
+; SLM-LABEL: test_add_8:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    adoxl %edi, %edi # sched: [1:0.50]
-; SLM-NEXT:    adoxq %rdx, %rdx # sched: [1:0.50]
-; SLM-NEXT:    adoxl (%rsi), %edi # sched: [4:1.00]
-; SLM-NEXT:    adoxq (%rcx), %rdx # sched: [4:1.00]
+; SLM-NEXT:    addb $7, %al # sched: [1:0.50]
+; SLM-NEXT:    addb $7, %dil # sched: [1:0.50]
+; SLM-NEXT:    addb $7, (%rsi) # sched: [4:2.00]
+; SLM-NEXT:    addb %dil, %dil # sched: [1:0.50]
+; SLM-NEXT:    addb %dil, (%rsi) # sched: [4:2.00]
+; SLM-NEXT:    addb (%rsi), %dil # sched: [4:1.00]
 ; SLM-NEXT:    #NO_APP
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
-; SANDY-LABEL: test_adox:
+; SANDY-LABEL: test_add_8:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    adoxl %edi, %edi # sched: [1:0.33]
-; SANDY-NEXT:    adoxq %rdx, %rdx # sched: [1:0.33]
-; SANDY-NEXT:    adoxl (%rsi), %edi # sched: [5:0.50]
-; SANDY-NEXT:    adoxq (%rcx), %rdx # sched: [5:0.50]
+; SANDY-NEXT:    addb $7, %al # sched: [1:0.33]
+; SANDY-NEXT:    addb $7, %dil # sched: [1:0.33]
+; SANDY-NEXT:    addb $7, (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    addb %dil, %dil # sched: [1:0.33]
+; SANDY-NEXT:    addb %dil, (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    addb (%rsi), %dil # sched: [6:0.50]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
-; HASWELL-LABEL: test_adox:
+; HASWELL-LABEL: test_add_8:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    adoxl %edi, %edi # sched: [1:0.25]
-; HASWELL-NEXT:    adoxq %rdx, %rdx # sched: [1:0.25]
-; HASWELL-NEXT:    adoxl (%rsi), %edi # sched: [6:0.50]
-; HASWELL-NEXT:    adoxq (%rcx), %rdx # sched: [6:0.50]
+; HASWELL-NEXT:    addb $7, %al # sched: [1:0.25]
+; HASWELL-NEXT:    addb $7, %dil # sched: [1:0.25]
+; HASWELL-NEXT:    addb $7, (%rsi) # sched: [7:1.00]
+; HASWELL-NEXT:    addb %dil, %dil # sched: [1:0.25]
+; HASWELL-NEXT:    addb %dil, (%rsi) # sched: [7:1.00]
+; HASWELL-NEXT:    addb (%rsi), %dil # sched: [6:0.50]
 ; HASWELL-NEXT:    #NO_APP
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; BROADWELL-LABEL: test_adox:
+; BROADWELL-LABEL: test_add_8:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    adoxl %edi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT:    adoxq %rdx, %rdx # sched: [1:0.50]
-; BROADWELL-NEXT:    adoxl (%rsi), %edi # sched: [6:0.50]
-; BROADWELL-NEXT:    adoxq (%rcx), %rdx # sched: [6:0.50]
+; BROADWELL-NEXT:    addb $7, %al # sched: [1:0.25]
+; BROADWELL-NEXT:    addb $7, %dil # sched: [1:0.25]
+; BROADWELL-NEXT:    addb $7, (%rsi) # sched: [6:1.00]
+; BROADWELL-NEXT:    addb %dil, %dil # sched: [1:0.25]
+; BROADWELL-NEXT:    addb %dil, (%rsi) # sched: [6:1.00]
+; BROADWELL-NEXT:    addb (%rsi), %dil # sched: [6:0.50]
 ; BROADWELL-NEXT:    #NO_APP
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; SKYLAKE-LABEL: test_adox:
+; SKYLAKE-LABEL: test_add_8:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    adoxl %edi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT:    adoxq %rdx, %rdx # sched: [1:0.50]
-; SKYLAKE-NEXT:    adoxl (%rsi), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT:    adoxq (%rcx), %rdx # sched: [6:0.50]
+; SKYLAKE-NEXT:    addb $7, %al # sched: [1:0.25]
+; SKYLAKE-NEXT:    addb $7, %dil # sched: [1:0.25]
+; SKYLAKE-NEXT:    addb $7, (%rsi) # sched: [6:1.00]
+; SKYLAKE-NEXT:    addb %dil, %dil # sched: [1:0.25]
+; SKYLAKE-NEXT:    addb %dil, (%rsi) # sched: [6:1.00]
+; SKYLAKE-NEXT:    addb (%rsi), %dil # sched: [6:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
-; SKX-LABEL: test_adox:
+; SKX-LABEL: test_add_8:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    adoxl %edi, %edi # sched: [1:0.50]
-; SKX-NEXT:    adoxq %rdx, %rdx # sched: [1:0.50]
-; SKX-NEXT:    adoxl (%rsi), %edi # sched: [6:0.50]
-; SKX-NEXT:    adoxq (%rcx), %rdx # sched: [6:0.50]
+; SKX-NEXT:    addb $7, %al # sched: [1:0.25]
+; SKX-NEXT:    addb $7, %dil # sched: [1:0.25]
+; SKX-NEXT:    addb $7, (%rsi) # sched: [6:1.00]
+; SKX-NEXT:    addb %dil, %dil # sched: [1:0.25]
+; SKX-NEXT:    addb %dil, (%rsi) # sched: [6:1.00]
+; SKX-NEXT:    addb (%rsi), %dil # sched: [6:0.50]
 ; SKX-NEXT:    #NO_APP
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
-; BTVER2-LABEL: test_adox:
+; BTVER2-LABEL: test_add_8:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    adoxl %edi, %edi # sched: [1:0.50]
-; BTVER2-NEXT:    adoxq %rdx, %rdx # sched: [1:0.50]
-; BTVER2-NEXT:    adoxl (%rsi), %edi # sched: [4:1.00]
-; BTVER2-NEXT:    adoxq (%rcx), %rdx # sched: [4:1.00]
+; BTVER2-NEXT:    addb $7, %al # sched: [1:0.50]
+; BTVER2-NEXT:    addb $7, %dil # sched: [1:0.50]
+; BTVER2-NEXT:    addb $7, (%rsi) # sched: [4:1.00]
+; BTVER2-NEXT:    addb %dil, %dil # sched: [1:0.50]
+; BTVER2-NEXT:    addb %dil, (%rsi) # sched: [4:1.00]
+; BTVER2-NEXT:    addb (%rsi), %dil # sched: [4:1.00]
 ; BTVER2-NEXT:    #NO_APP
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
-; ZNVER1-LABEL: test_adox:
+; ZNVER1-LABEL: test_add_8:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    adoxl %edi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT:    adoxq %rdx, %rdx # sched: [1:0.25]
-; ZNVER1-NEXT:    adoxl (%rsi), %edi # sched: [5:0.50]
-; ZNVER1-NEXT:    adoxq (%rcx), %rdx # sched: [5:0.50]
+; ZNVER1-NEXT:    addb $7, %al # sched: [1:0.25]
+; ZNVER1-NEXT:    addb $7, %dil # sched: [1:0.25]
+; ZNVER1-NEXT:    addb $7, (%rsi) # sched: [5:0.50]
+; ZNVER1-NEXT:    addb %dil, %dil # sched: [1:0.25]
+; ZNVER1-NEXT:    addb %dil, (%rsi) # sched: [5:0.50]
+; ZNVER1-NEXT:    addb (%rsi), %dil # sched: [5:0.50]
 ; ZNVER1-NEXT:    #NO_APP
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  tail call void asm "adox $0, $0 \0A\09 adox $2, $2 \0A\09 adox $1, $0 \0A\09 adox $3, $2", "r,*m,r,*m"(i32 %a0, i32* %a1, i64 %a2, i64* %a3) nounwind
+  tail call void asm "addb $2, %AL \0A\09 addb $2, $0 \0A\09 addb $2, $1 \0A\09 addb $0, $0 \0A\09 addb $0, $1 \0A\09 addb $1, $0", "r,*m,i"(i8 %a0, i8* %a1, i8 7) nounwind
   ret void
 }
-
-; TODO - test_and
-
-define i16 @test_bsf16(i16 %a0, i16* %a1) optsize {
-; GENERIC-LABEL: test_bsf16:
+define void @test_add_16(i16 %a0, i16* %a1) optsize {
+; GENERIC-LABEL: test_add_16:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    bsfw %di, %ax # sched: [3:1.00]
-; GENERIC-NEXT:    bsfw (%rsi), %cx # sched: [8:1.00]
+; GENERIC-NEXT:    addw $511, %ax # imm = 0x1FF
+; GENERIC-NEXT:    # sched: [1:0.33]
+; GENERIC-NEXT:    addw $511, %di # imm = 0x1FF
+; GENERIC-NEXT:    # sched: [1:0.33]
+; GENERIC-NEXT:    addw $511, (%rsi) # imm = 0x1FF
+; GENERIC-NEXT:    # sched: [7:1.00]
+; GENERIC-NEXT:    addw $7, %di # sched: [1:0.33]
+; GENERIC-NEXT:    addw $7, (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    addw %di, %di # sched: [1:0.33]
+; GENERIC-NEXT:    addw %di, (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    addw (%rsi), %di # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    # kill: def %ax killed %ax killed %eax
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
-; ATOM-LABEL: test_bsf16:
+; ATOM-LABEL: test_add_16:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    bsfw %di, %ax # sched: [16:8.00]
-; ATOM-NEXT:    bsfw (%rsi), %cx # sched: [16:8.00]
+; ATOM-NEXT:    addw $511, %ax # imm = 0x1FF
+; ATOM-NEXT:    # sched: [1:0.50]
+; ATOM-NEXT:    addw $511, %di # imm = 0x1FF
+; ATOM-NEXT:    # sched: [1:0.50]
+; ATOM-NEXT:    addw $511, (%rsi) # imm = 0x1FF
+; ATOM-NEXT:    # sched: [1:1.00]
+; ATOM-NEXT:    addw $7, %di # sched: [1:0.50]
+; ATOM-NEXT:    addw $7, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    addw %di, %di # sched: [1:0.50]
+; ATOM-NEXT:    addw %di, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    addw (%rsi), %di # sched: [1:1.00]
 ; ATOM-NEXT:    #NO_APP
-; ATOM-NEXT:    orl %ecx, %eax # sched: [1:0.50]
-; ATOM-NEXT:    # kill: def %ax killed %ax killed %eax
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
-; SLM-LABEL: test_bsf16:
+; SLM-LABEL: test_add_16:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    bsfw %di, %ax # sched: [1:1.00]
-; SLM-NEXT:    bsfw (%rsi), %cx # sched: [4:1.00]
+; SLM-NEXT:    addw $511, %ax # imm = 0x1FF
+; SLM-NEXT:    # sched: [1:0.50]
+; SLM-NEXT:    addw $511, %di # imm = 0x1FF
+; SLM-NEXT:    # sched: [1:0.50]
+; SLM-NEXT:    addw $511, (%rsi) # imm = 0x1FF
+; SLM-NEXT:    # sched: [4:2.00]
+; SLM-NEXT:    addw $7, %di # sched: [1:0.50]
+; SLM-NEXT:    addw $7, (%rsi) # sched: [4:2.00]
+; SLM-NEXT:    addw %di, %di # sched: [1:0.50]
+; SLM-NEXT:    addw %di, (%rsi) # sched: [4:2.00]
+; SLM-NEXT:    addw (%rsi), %di # sched: [4:1.00]
 ; SLM-NEXT:    #NO_APP
-; SLM-NEXT:    orl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT:    # kill: def %ax killed %ax killed %eax
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
-; SANDY-LABEL: test_bsf16:
+; SANDY-LABEL: test_add_16:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    bsfw %di, %ax # sched: [3:1.00]
-; SANDY-NEXT:    bsfw (%rsi), %cx # sched: [8:1.00]
+; SANDY-NEXT:    addw $511, %ax # imm = 0x1FF
+; SANDY-NEXT:    # sched: [1:0.33]
+; SANDY-NEXT:    addw $511, %di # imm = 0x1FF
+; SANDY-NEXT:    # sched: [1:0.33]
+; SANDY-NEXT:    addw $511, (%rsi) # imm = 0x1FF
+; SANDY-NEXT:    # sched: [7:1.00]
+; SANDY-NEXT:    addw $7, %di # sched: [1:0.33]
+; SANDY-NEXT:    addw $7, (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    addw %di, %di # sched: [1:0.33]
+; SANDY-NEXT:    addw %di, (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    addw (%rsi), %di # sched: [6:0.50]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    orl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT:    # kill: def %ax killed %ax killed %eax
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
-; HASWELL-LABEL: test_bsf16:
+; HASWELL-LABEL: test_add_16:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    bsfw %di, %ax # sched: [3:1.00]
-; HASWELL-NEXT:    bsfw (%rsi), %cx # sched: [8:1.00]
+; HASWELL-NEXT:    addw $511, %ax # imm = 0x1FF
+; HASWELL-NEXT:    # sched: [1:0.25]
+; HASWELL-NEXT:    addw $511, %di # imm = 0x1FF
+; HASWELL-NEXT:    # sched: [1:0.25]
+; HASWELL-NEXT:    addw $511, (%rsi) # imm = 0x1FF
+; HASWELL-NEXT:    # sched: [7:1.00]
+; HASWELL-NEXT:    addw $7, %di # sched: [1:0.25]
+; HASWELL-NEXT:    addw $7, (%rsi) # sched: [7:1.00]
+; HASWELL-NEXT:    addw %di, %di # sched: [1:0.25]
+; HASWELL-NEXT:    addw %di, (%rsi) # sched: [7:1.00]
+; HASWELL-NEXT:    addw (%rsi), %di # sched: [6:0.50]
 ; HASWELL-NEXT:    #NO_APP
-; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    # kill: def %ax killed %ax killed %eax
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; BROADWELL-LABEL: test_bsf16:
+; BROADWELL-LABEL: test_add_16:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    bsfw %di, %ax # sched: [3:1.00]
-; BROADWELL-NEXT:    bsfw (%rsi), %cx # sched: [8:1.00]
+; BROADWELL-NEXT:    addw $511, %ax # imm = 0x1FF
+; BROADWELL-NEXT:    # sched: [1:0.25]
+; BROADWELL-NEXT:    addw $511, %di # imm = 0x1FF
+; BROADWELL-NEXT:    # sched: [1:0.25]
+; BROADWELL-NEXT:    addw $511, (%rsi) # imm = 0x1FF
+; BROADWELL-NEXT:    # sched: [6:1.00]
+; BROADWELL-NEXT:    addw $7, %di # sched: [1:0.25]
+; BROADWELL-NEXT:    addw $7, (%rsi) # sched: [6:1.00]
+; BROADWELL-NEXT:    addw %di, %di # sched: [1:0.25]
+; BROADWELL-NEXT:    addw %di, (%rsi) # sched: [6:1.00]
+; BROADWELL-NEXT:    addw (%rsi), %di # sched: [6:0.50]
 ; BROADWELL-NEXT:    #NO_APP
-; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    # kill: def %ax killed %ax killed %eax
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; SKYLAKE-LABEL: test_bsf16:
+; SKYLAKE-LABEL: test_add_16:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    bsfw %di, %ax # sched: [3:1.00]
-; SKYLAKE-NEXT:    bsfw (%rsi), %cx # sched: [8:1.00]
+; SKYLAKE-NEXT:    addw $511, %ax # imm = 0x1FF
+; SKYLAKE-NEXT:    # sched: [1:0.25]
+; SKYLAKE-NEXT:    addw $511, %di # imm = 0x1FF
+; SKYLAKE-NEXT:    # sched: [1:0.25]
+; SKYLAKE-NEXT:    addw $511, (%rsi) # imm = 0x1FF
+; SKYLAKE-NEXT:    # sched: [6:1.00]
+; SKYLAKE-NEXT:    addw $7, %di # sched: [1:0.25]
+; SKYLAKE-NEXT:    addw $7, (%rsi) # sched: [6:1.00]
+; SKYLAKE-NEXT:    addw %di, %di # sched: [1:0.25]
+; SKYLAKE-NEXT:    addw %di, (%rsi) # sched: [6:1.00]
+; SKYLAKE-NEXT:    addw (%rsi), %di # sched: [6:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
-; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    # kill: def %ax killed %ax killed %eax
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
-; SKX-LABEL: test_bsf16:
+; SKX-LABEL: test_add_16:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    bsfw %di, %ax # sched: [3:1.00]
-; SKX-NEXT:    bsfw (%rsi), %cx # sched: [8:1.00]
+; SKX-NEXT:    addw $511, %ax # imm = 0x1FF
+; SKX-NEXT:    # sched: [1:0.25]
+; SKX-NEXT:    addw $511, %di # imm = 0x1FF
+; SKX-NEXT:    # sched: [1:0.25]
+; SKX-NEXT:    addw $511, (%rsi) # imm = 0x1FF
+; SKX-NEXT:    # sched: [6:1.00]
+; SKX-NEXT:    addw $7, %di # sched: [1:0.25]
+; SKX-NEXT:    addw $7, (%rsi) # sched: [6:1.00]
+; SKX-NEXT:    addw %di, %di # sched: [1:0.25]
+; SKX-NEXT:    addw %di, (%rsi) # sched: [6:1.00]
+; SKX-NEXT:    addw (%rsi), %di # sched: [6:0.50]
 ; SKX-NEXT:    #NO_APP
-; SKX-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; SKX-NEXT:    # kill: def %ax killed %ax killed %eax
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
-; BTVER2-LABEL: test_bsf16:
+; BTVER2-LABEL: test_add_16:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    bsfw %di, %ax # sched: [1:0.50]
-; BTVER2-NEXT:    bsfw (%rsi), %cx # sched: [4:1.00]
+; BTVER2-NEXT:    addw $511, %ax # imm = 0x1FF
+; BTVER2-NEXT:    # sched: [1:0.50]
+; BTVER2-NEXT:    addw $511, %di # imm = 0x1FF
+; BTVER2-NEXT:    # sched: [1:0.50]
+; BTVER2-NEXT:    addw $511, (%rsi) # imm = 0x1FF
+; BTVER2-NEXT:    # sched: [4:1.00]
+; BTVER2-NEXT:    addw $7, %di # sched: [1:0.50]
+; BTVER2-NEXT:    addw $7, (%rsi) # sched: [4:1.00]
+; BTVER2-NEXT:    addw %di, %di # sched: [1:0.50]
+; BTVER2-NEXT:    addw %di, (%rsi) # sched: [4:1.00]
+; BTVER2-NEXT:    addw (%rsi), %di # sched: [4:1.00]
 ; BTVER2-NEXT:    #NO_APP
-; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    # kill: def %ax killed %ax killed %eax
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
-; ZNVER1-LABEL: test_bsf16:
+; ZNVER1-LABEL: test_add_16:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    bsfw %di, %ax # sched: [3:0.25]
-; ZNVER1-NEXT:    bsfw (%rsi), %cx # sched: [7:0.50]
+; ZNVER1-NEXT:    addw $511, %ax # imm = 0x1FF
+; ZNVER1-NEXT:    # sched: [1:0.25]
+; ZNVER1-NEXT:    addw $511, %di # imm = 0x1FF
+; ZNVER1-NEXT:    # sched: [1:0.25]
+; ZNVER1-NEXT:    addw $511, (%rsi) # imm = 0x1FF
+; ZNVER1-NEXT:    # sched: [5:0.50]
+; ZNVER1-NEXT:    addw $7, %di # sched: [1:0.25]
+; ZNVER1-NEXT:    addw $7, (%rsi) # sched: [5:0.50]
+; ZNVER1-NEXT:    addw %di, %di # sched: [1:0.25]
+; ZNVER1-NEXT:    addw %di, (%rsi) # sched: [5:0.50]
+; ZNVER1-NEXT:    addw (%rsi), %di # sched: [5:0.50]
 ; ZNVER1-NEXT:    #NO_APP
-; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    # kill: def %ax killed %ax killed %eax
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call { i16, i16 } asm sideeffect "bsf $2, $0 \0A\09 bsf $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i16 %a0, i16* %a1)
-  %2 = extractvalue { i16, i16 } %1, 0
-  %3 = extractvalue { i16, i16 } %1, 1
-  %4 = or i16 %2, %3
-  ret i16 %4
+  tail call void asm "addw $2, %AX \0A\09 addw $2, $0 \0A\09 addw $2, $1 \0A\09 addw $3, $0 \0A\09 addw $3, $1 \0A\09 addw $0, $0 \0A\09 addw $0, $1 \0A\09 addw $1, $0", "r,*m,i,i"(i16 %a0, i16* %a1, i16 511, i8 7) nounwind
+  ret void
 }
-define i32 @test_bsf32(i32 %a0, i32* %a1) optsize {
-; GENERIC-LABEL: test_bsf32:
+define void @test_add_32(i32 %a0, i32* %a1) optsize {
+; GENERIC-LABEL: test_add_32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    bsfl %edi, %eax # sched: [3:1.00]
-; GENERIC-NEXT:    bsfl (%rsi), %ecx # sched: [8:1.00]
+; GENERIC-NEXT:    addl $665536, %eax # imm = 0xA27C0
+; GENERIC-NEXT:    # sched: [1:0.33]
+; GENERIC-NEXT:    addl $665536, %edi # imm = 0xA27C0
+; GENERIC-NEXT:    # sched: [1:0.33]
+; GENERIC-NEXT:    addl $665536, (%rsi) # imm = 0xA27C0
+; GENERIC-NEXT:    # sched: [7:1.00]
+; GENERIC-NEXT:    addl $7, %edi # sched: [1:0.33]
+; GENERIC-NEXT:    addl $7, (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    addl %edi, %edi # sched: [1:0.33]
+; GENERIC-NEXT:    addl %edi, (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    addl (%rsi), %edi # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
-; ATOM-LABEL: test_bsf32:
+; ATOM-LABEL: test_add_32:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    bsfl %edi, %eax # sched: [16:8.00]
-; ATOM-NEXT:    bsfl (%rsi), %ecx # sched: [16:8.00]
+; ATOM-NEXT:    addl $665536, %eax # imm = 0xA27C0
+; ATOM-NEXT:    # sched: [1:0.50]
+; ATOM-NEXT:    addl $665536, %edi # imm = 0xA27C0
+; ATOM-NEXT:    # sched: [1:0.50]
+; ATOM-NEXT:    addl $665536, (%rsi) # imm = 0xA27C0
+; ATOM-NEXT:    # sched: [1:1.00]
+; ATOM-NEXT:    addl $7, %edi # sched: [1:0.50]
+; ATOM-NEXT:    addl $7, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    addl %edi, %edi # sched: [1:0.50]
+; ATOM-NEXT:    addl %edi, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    addl (%rsi), %edi # sched: [1:1.00]
 ; ATOM-NEXT:    #NO_APP
-; ATOM-NEXT:    orl %ecx, %eax # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
-; SLM-LABEL: test_bsf32:
+; SLM-LABEL: test_add_32:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    bsfl %edi, %eax # sched: [1:1.00]
-; SLM-NEXT:    bsfl (%rsi), %ecx # sched: [4:1.00]
+; SLM-NEXT:    addl $665536, %eax # imm = 0xA27C0
+; SLM-NEXT:    # sched: [1:0.50]
+; SLM-NEXT:    addl $665536, %edi # imm = 0xA27C0
+; SLM-NEXT:    # sched: [1:0.50]
+; SLM-NEXT:    addl $665536, (%rsi) # imm = 0xA27C0
+; SLM-NEXT:    # sched: [4:2.00]
+; SLM-NEXT:    addl $7, %edi # sched: [1:0.50]
+; SLM-NEXT:    addl $7, (%rsi) # sched: [4:2.00]
+; SLM-NEXT:    addl %edi, %edi # sched: [1:0.50]
+; SLM-NEXT:    addl %edi, (%rsi) # sched: [4:2.00]
+; SLM-NEXT:    addl (%rsi), %edi # sched: [4:1.00]
 ; SLM-NEXT:    #NO_APP
-; SLM-NEXT:    orl %ecx, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
-; SANDY-LABEL: test_bsf32:
+; SANDY-LABEL: test_add_32:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    bsfl %edi, %eax # sched: [3:1.00]
-; SANDY-NEXT:    bsfl (%rsi), %ecx # sched: [8:1.00]
+; SANDY-NEXT:    addl $665536, %eax # imm = 0xA27C0
+; SANDY-NEXT:    # sched: [1:0.33]
+; SANDY-NEXT:    addl $665536, %edi # imm = 0xA27C0
+; SANDY-NEXT:    # sched: [1:0.33]
+; SANDY-NEXT:    addl $665536, (%rsi) # imm = 0xA27C0
+; SANDY-NEXT:    # sched: [7:1.00]
+; SANDY-NEXT:    addl $7, %edi # sched: [1:0.33]
+; SANDY-NEXT:    addl $7, (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    addl %edi, %edi # sched: [1:0.33]
+; SANDY-NEXT:    addl %edi, (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    addl (%rsi), %edi # sched: [6:0.50]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    orl %ecx, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
-; HASWELL-LABEL: test_bsf32:
+; HASWELL-LABEL: test_add_32:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    bsfl %edi, %eax # sched: [3:1.00]
-; HASWELL-NEXT:    bsfl (%rsi), %ecx # sched: [8:1.00]
+; HASWELL-NEXT:    addl $665536, %eax # imm = 0xA27C0
+; HASWELL-NEXT:    # sched: [1:0.25]
+; HASWELL-NEXT:    addl $665536, %edi # imm = 0xA27C0
+; HASWELL-NEXT:    # sched: [1:0.25]
+; HASWELL-NEXT:    addl $665536, (%rsi) # imm = 0xA27C0
+; HASWELL-NEXT:    # sched: [7:1.00]
+; HASWELL-NEXT:    addl $7, %edi # sched: [1:0.25]
+; HASWELL-NEXT:    addl $7, (%rsi) # sched: [7:1.00]
+; HASWELL-NEXT:    addl %edi, %edi # sched: [1:0.25]
+; HASWELL-NEXT:    addl %edi, (%rsi) # sched: [7:1.00]
+; HASWELL-NEXT:    addl (%rsi), %edi # sched: [6:0.50]
 ; HASWELL-NEXT:    #NO_APP
-; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; BROADWELL-LABEL: test_bsf32:
+; BROADWELL-LABEL: test_add_32:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    bsfl %edi, %eax # sched: [3:1.00]
-; BROADWELL-NEXT:    bsfl (%rsi), %ecx # sched: [8:1.00]
+; BROADWELL-NEXT:    addl $665536, %eax # imm = 0xA27C0
+; BROADWELL-NEXT:    # sched: [1:0.25]
+; BROADWELL-NEXT:    addl $665536, %edi # imm = 0xA27C0
+; BROADWELL-NEXT:    # sched: [1:0.25]
+; BROADWELL-NEXT:    addl $665536, (%rsi) # imm = 0xA27C0
+; BROADWELL-NEXT:    # sched: [6:1.00]
+; BROADWELL-NEXT:    addl $7, %edi # sched: [1:0.25]
+; BROADWELL-NEXT:    addl $7, (%rsi) # sched: [6:1.00]
+; BROADWELL-NEXT:    addl %edi, %edi # sched: [1:0.25]
+; BROADWELL-NEXT:    addl %edi, (%rsi) # sched: [6:1.00]
+; BROADWELL-NEXT:    addl (%rsi), %edi # sched: [6:0.50]
 ; BROADWELL-NEXT:    #NO_APP
-; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; SKYLAKE-LABEL: test_bsf32:
+; SKYLAKE-LABEL: test_add_32:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    bsfl %edi, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT:    bsfl (%rsi), %ecx # sched: [8:1.00]
+; SKYLAKE-NEXT:    addl $665536, %eax # imm = 0xA27C0
+; SKYLAKE-NEXT:    # sched: [1:0.25]
+; SKYLAKE-NEXT:    addl $665536, %edi # imm = 0xA27C0
+; SKYLAKE-NEXT:    # sched: [1:0.25]
+; SKYLAKE-NEXT:    addl $665536, (%rsi) # imm = 0xA27C0
+; SKYLAKE-NEXT:    # sched: [6:1.00]
+; SKYLAKE-NEXT:    addl $7, %edi # sched: [1:0.25]
+; SKYLAKE-NEXT:    addl $7, (%rsi) # sched: [6:1.00]
+; SKYLAKE-NEXT:    addl %edi, %edi # sched: [1:0.25]
+; SKYLAKE-NEXT:    addl %edi, (%rsi) # sched: [6:1.00]
+; SKYLAKE-NEXT:    addl (%rsi), %edi # sched: [6:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
-; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
-; SKX-LABEL: test_bsf32:
+; SKX-LABEL: test_add_32:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    bsfl %edi, %eax # sched: [3:1.00]
-; SKX-NEXT:    bsfl (%rsi), %ecx # sched: [8:1.00]
+; SKX-NEXT:    addl $665536, %eax # imm = 0xA27C0
+; SKX-NEXT:    # sched: [1:0.25]
+; SKX-NEXT:    addl $665536, %edi # imm = 0xA27C0
+; SKX-NEXT:    # sched: [1:0.25]
+; SKX-NEXT:    addl $665536, (%rsi) # imm = 0xA27C0
+; SKX-NEXT:    # sched: [6:1.00]
+; SKX-NEXT:    addl $7, %edi # sched: [1:0.25]
+; SKX-NEXT:    addl $7, (%rsi) # sched: [6:1.00]
+; SKX-NEXT:    addl %edi, %edi # sched: [1:0.25]
+; SKX-NEXT:    addl %edi, (%rsi) # sched: [6:1.00]
+; SKX-NEXT:    addl (%rsi), %edi # sched: [6:0.50]
 ; SKX-NEXT:    #NO_APP
-; SKX-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
-; BTVER2-LABEL: test_bsf32:
+; BTVER2-LABEL: test_add_32:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    bsfl %edi, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    bsfl (%rsi), %ecx # sched: [4:1.00]
+; BTVER2-NEXT:    addl $665536, %eax # imm = 0xA27C0
+; BTVER2-NEXT:    # sched: [1:0.50]
+; BTVER2-NEXT:    addl $665536, %edi # imm = 0xA27C0
+; BTVER2-NEXT:    # sched: [1:0.50]
+; BTVER2-NEXT:    addl $665536, (%rsi) # imm = 0xA27C0
+; BTVER2-NEXT:    # sched: [4:1.00]
+; BTVER2-NEXT:    addl $7, %edi # sched: [1:0.50]
+; BTVER2-NEXT:    addl $7, (%rsi) # sched: [4:1.00]
+; BTVER2-NEXT:    addl %edi, %edi # sched: [1:0.50]
+; BTVER2-NEXT:    addl %edi, (%rsi) # sched: [4:1.00]
+; BTVER2-NEXT:    addl (%rsi), %edi # sched: [4:1.00]
 ; BTVER2-NEXT:    #NO_APP
-; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
-; ZNVER1-LABEL: test_bsf32:
+; ZNVER1-LABEL: test_add_32:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    bsfl %edi, %eax # sched: [3:0.25]
-; ZNVER1-NEXT:    bsfl (%rsi), %ecx # sched: [7:0.50]
+; ZNVER1-NEXT:    addl $665536, %eax # imm = 0xA27C0
+; ZNVER1-NEXT:    # sched: [1:0.25]
+; ZNVER1-NEXT:    addl $665536, %edi # imm = 0xA27C0
+; ZNVER1-NEXT:    # sched: [1:0.25]
+; ZNVER1-NEXT:    addl $665536, (%rsi) # imm = 0xA27C0
+; ZNVER1-NEXT:    # sched: [5:0.50]
+; ZNVER1-NEXT:    addl $7, %edi # sched: [1:0.25]
+; ZNVER1-NEXT:    addl $7, (%rsi) # sched: [5:0.50]
+; ZNVER1-NEXT:    addl %edi, %edi # sched: [1:0.25]
+; ZNVER1-NEXT:    addl %edi, (%rsi) # sched: [5:0.50]
+; ZNVER1-NEXT:    addl (%rsi), %edi # sched: [5:0.50]
 ; ZNVER1-NEXT:    #NO_APP
-; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call { i32, i32 } asm sideeffect "bsf $2, $0 \0A\09 bsf $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i32 %a0, i32* %a1)
-  %2 = extractvalue { i32, i32 } %1, 0
-  %3 = extractvalue { i32, i32 } %1, 1
-  %4 = or i32 %2, %3
-  ret i32 %4
+  tail call void asm "addl $2, %EAX \0A\09 addl $2, $0 \0A\09 addl $2, $1 \0A\09 addl $3, $0 \0A\09 addl $3, $1 \0A\09 addl $0, $0 \0A\09 addl $0, $1 \0A\09 addl $1, $0", "r,*m,i,i"(i32 %a0, i32* %a1, i32 665536, i8 7) nounwind
+  ret void
 }
-define i64 @test_bsf64(i64 %a0, i64* %a1) optsize {
-; GENERIC-LABEL: test_bsf64:
+define void @test_add_64(i64 %a0, i64* %a1) optsize {
+; GENERIC-LABEL: test_add_64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    bsfq %rdi, %rax # sched: [3:1.00]
-; GENERIC-NEXT:    bsfq (%rsi), %rcx # sched: [8:1.00]
+; GENERIC-NEXT:    addq $665536, %rax # imm = 0xA27C0
+; GENERIC-NEXT:    # sched: [1:0.33]
+; GENERIC-NEXT:    addq $665536, %rdi # imm = 0xA27C0
+; GENERIC-NEXT:    # sched: [1:0.33]
+; GENERIC-NEXT:    addq $665536, (%rsi) # imm = 0xA27C0
+; GENERIC-NEXT:    # sched: [7:1.00]
+; GENERIC-NEXT:    addq $7, %rdi # sched: [1:0.33]
+; GENERIC-NEXT:    addq $7, (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    addq %rdi, %rdi # sched: [1:0.33]
+; GENERIC-NEXT:    addq %rdi, (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    addq (%rsi), %rdi # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
-; GENERIC-NEXT:    orq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
-; ATOM-LABEL: test_bsf64:
+; ATOM-LABEL: test_add_64:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    bsfq %rdi, %rax # sched: [16:8.00]
-; ATOM-NEXT:    bsfq (%rsi), %rcx # sched: [16:8.00]
+; ATOM-NEXT:    addq $665536, %rax # imm = 0xA27C0
+; ATOM-NEXT:    # sched: [1:0.50]
+; ATOM-NEXT:    addq $665536, %rdi # imm = 0xA27C0
+; ATOM-NEXT:    # sched: [1:0.50]
+; ATOM-NEXT:    addq $665536, (%rsi) # imm = 0xA27C0
+; ATOM-NEXT:    # sched: [1:1.00]
+; ATOM-NEXT:    addq $7, %rdi # sched: [1:0.50]
+; ATOM-NEXT:    addq $7, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    addq %rdi, %rdi # sched: [1:0.50]
+; ATOM-NEXT:    addq %rdi, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    addq (%rsi), %rdi # sched: [1:1.00]
 ; ATOM-NEXT:    #NO_APP
-; ATOM-NEXT:    orq %rcx, %rax # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
-; SLM-LABEL: test_bsf64:
+; SLM-LABEL: test_add_64:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    bsfq %rdi, %rax # sched: [1:1.00]
-; SLM-NEXT:    bsfq (%rsi), %rcx # sched: [4:1.00]
+; SLM-NEXT:    addq $665536, %rax # imm = 0xA27C0
+; SLM-NEXT:    # sched: [1:0.50]
+; SLM-NEXT:    addq $665536, %rdi # imm = 0xA27C0
+; SLM-NEXT:    # sched: [1:0.50]
+; SLM-NEXT:    addq $665536, (%rsi) # imm = 0xA27C0
+; SLM-NEXT:    # sched: [4:2.00]
+; SLM-NEXT:    addq $7, %rdi # sched: [1:0.50]
+; SLM-NEXT:    addq $7, (%rsi) # sched: [4:2.00]
+; SLM-NEXT:    addq %rdi, %rdi # sched: [1:0.50]
+; SLM-NEXT:    addq %rdi, (%rsi) # sched: [4:2.00]
+; SLM-NEXT:    addq (%rsi), %rdi # sched: [4:1.00]
 ; SLM-NEXT:    #NO_APP
-; SLM-NEXT:    orq %rcx, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
-; SANDY-LABEL: test_bsf64:
+; SANDY-LABEL: test_add_64:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    bsfq %rdi, %rax # sched: [3:1.00]
-; SANDY-NEXT:    bsfq (%rsi), %rcx # sched: [8:1.00]
+; SANDY-NEXT:    addq $665536, %rax # imm = 0xA27C0
+; SANDY-NEXT:    # sched: [1:0.33]
+; SANDY-NEXT:    addq $665536, %rdi # imm = 0xA27C0
+; SANDY-NEXT:    # sched: [1:0.33]
+; SANDY-NEXT:    addq $665536, (%rsi) # imm = 0xA27C0
+; SANDY-NEXT:    # sched: [7:1.00]
+; SANDY-NEXT:    addq $7, %rdi # sched: [1:0.33]
+; SANDY-NEXT:    addq $7, (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    addq %rdi, %rdi # sched: [1:0.33]
+; SANDY-NEXT:    addq %rdi, (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    addq (%rsi), %rdi # sched: [6:0.50]
 ; SANDY-NEXT:    #NO_APP
-; SANDY-NEXT:    orq %rcx, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
-; HASWELL-LABEL: test_bsf64:
+; HASWELL-LABEL: test_add_64:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    bsfq %rdi, %rax # sched: [3:1.00]
-; HASWELL-NEXT:    bsfq (%rsi), %rcx # sched: [8:1.00]
+; HASWELL-NEXT:    addq $665536, %rax # imm = 0xA27C0
+; HASWELL-NEXT:    # sched: [1:0.25]
+; HASWELL-NEXT:    addq $665536, %rdi # imm = 0xA27C0
+; HASWELL-NEXT:    # sched: [1:0.25]
+; HASWELL-NEXT:    addq $665536, (%rsi) # imm = 0xA27C0
+; HASWELL-NEXT:    # sched: [7:1.00]
+; HASWELL-NEXT:    addq $7, %rdi # sched: [1:0.25]
+; HASWELL-NEXT:    addq $7, (%rsi) # sched: [7:1.00]
+; HASWELL-NEXT:    addq %rdi, %rdi # sched: [1:0.25]
+; HASWELL-NEXT:    addq %rdi, (%rsi) # sched: [7:1.00]
+; HASWELL-NEXT:    addq (%rsi), %rdi # sched: [6:0.50]
 ; HASWELL-NEXT:    #NO_APP
-; HASWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; BROADWELL-LABEL: test_bsf64:
+; BROADWELL-LABEL: test_add_64:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    bsfq %rdi, %rax # sched: [3:1.00]
-; BROADWELL-NEXT:    bsfq (%rsi), %rcx # sched: [8:1.00]
+; BROADWELL-NEXT:    addq $665536, %rax # imm = 0xA27C0
+; BROADWELL-NEXT:    # sched: [1:0.25]
+; BROADWELL-NEXT:    addq $665536, %rdi # imm = 0xA27C0
+; BROADWELL-NEXT:    # sched: [1:0.25]
+; BROADWELL-NEXT:    addq $665536, (%rsi) # imm = 0xA27C0
+; BROADWELL-NEXT:    # sched: [6:1.00]
+; BROADWELL-NEXT:    addq $7, %rdi # sched: [1:0.25]
+; BROADWELL-NEXT:    addq $7, (%rsi) # sched: [6:1.00]
+; BROADWELL-NEXT:    addq %rdi, %rdi # sched: [1:0.25]
+; BROADWELL-NEXT:    addq %rdi, (%rsi) # sched: [6:1.00]
+; BROADWELL-NEXT:    addq (%rsi), %rdi # sched: [6:0.50]
 ; BROADWELL-NEXT:    #NO_APP
-; BROADWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; SKYLAKE-LABEL: test_bsf64:
+; SKYLAKE-LABEL: test_add_64:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    bsfq %rdi, %rax # sched: [3:1.00]
-; SKYLAKE-NEXT:    bsfq (%rsi), %rcx # sched: [8:1.00]
+; SKYLAKE-NEXT:    addq $665536, %rax # imm = 0xA27C0
+; SKYLAKE-NEXT:    # sched: [1:0.25]
+; SKYLAKE-NEXT:    addq $665536, %rdi # imm = 0xA27C0
+; SKYLAKE-NEXT:    # sched: [1:0.25]
+; SKYLAKE-NEXT:    addq $665536, (%rsi) # imm = 0xA27C0
+; SKYLAKE-NEXT:    # sched: [6:1.00]
+; SKYLAKE-NEXT:    addq $7, %rdi # sched: [1:0.25]
+; SKYLAKE-NEXT:    addq $7, (%rsi) # sched: [6:1.00]
+; SKYLAKE-NEXT:    addq %rdi, %rdi # sched: [1:0.25]
+; SKYLAKE-NEXT:    addq %rdi, (%rsi) # sched: [6:1.00]
+; SKYLAKE-NEXT:    addq (%rsi), %rdi # sched: [6:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
-; SKYLAKE-NEXT:    orq %rcx, %rax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
-; SKX-LABEL: test_bsf64:
+; SKX-LABEL: test_add_64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    bsfq %rdi, %rax # sched: [3:1.00]
-; SKX-NEXT:    bsfq (%rsi), %rcx # sched: [8:1.00]
+; SKX-NEXT:    addq $665536, %rax # imm = 0xA27C0
+; SKX-NEXT:    # sched: [1:0.25]
+; SKX-NEXT:    addq $665536, %rdi # imm = 0xA27C0
+; SKX-NEXT:    # sched: [1:0.25]
+; SKX-NEXT:    addq $665536, (%rsi) # imm = 0xA27C0
+; SKX-NEXT:    # sched: [6:1.00]
+; SKX-NEXT:    addq $7, %rdi # sched: [1:0.25]
+; SKX-NEXT:    addq $7, (%rsi) # sched: [6:1.00]
+; SKX-NEXT:    addq %rdi, %rdi # sched: [1:0.25]
+; SKX-NEXT:    addq %rdi, (%rsi) # sched: [6:1.00]
+; SKX-NEXT:    addq (%rsi), %rdi # sched: [6:0.50]
 ; SKX-NEXT:    #NO_APP
-; SKX-NEXT:    orq %rcx, %rax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
-; BTVER2-LABEL: test_bsf64:
+; BTVER2-LABEL: test_add_64:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    bsfq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    bsfq (%rsi), %rcx # sched: [4:1.00]
+; BTVER2-NEXT:    addq $665536, %rax # imm = 0xA27C0
+; BTVER2-NEXT:    # sched: [1:0.50]
+; BTVER2-NEXT:    addq $665536, %rdi # imm = 0xA27C0
+; BTVER2-NEXT:    # sched: [1:0.50]
+; BTVER2-NEXT:    addq $665536, (%rsi) # imm = 0xA27C0
+; BTVER2-NEXT:    # sched: [4:1.00]
+; BTVER2-NEXT:    addq $7, %rdi # sched: [1:0.50]
+; BTVER2-NEXT:    addq $7, (%rsi) # sched: [4:1.00]
+; BTVER2-NEXT:    addq %rdi, %rdi # sched: [1:0.50]
+; BTVER2-NEXT:    addq %rdi, (%rsi) # sched: [4:1.00]
+; BTVER2-NEXT:    addq (%rsi), %rdi # sched: [4:1.00]
 ; BTVER2-NEXT:    #NO_APP
-; BTVER2-NEXT:    orq %rcx, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
-; ZNVER1-LABEL: test_bsf64:
+; ZNVER1-LABEL: test_add_64:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    bsfq %rdi, %rax # sched: [3:0.25]
-; ZNVER1-NEXT:    bsfq (%rsi), %rcx # sched: [7:0.50]
+; ZNVER1-NEXT:    addq $665536, %rax # imm = 0xA27C0
+; ZNVER1-NEXT:    # sched: [1:0.25]
+; ZNVER1-NEXT:    addq $665536, %rdi # imm = 0xA27C0
+; ZNVER1-NEXT:    # sched: [1:0.25]
+; ZNVER1-NEXT:    addq $665536, (%rsi) # imm = 0xA27C0
+; ZNVER1-NEXT:    # sched: [5:0.50]
+; ZNVER1-NEXT:    addq $7, %rdi # sched: [1:0.25]
+; ZNVER1-NEXT:    addq $7, (%rsi) # sched: [5:0.50]
+; ZNVER1-NEXT:    addq %rdi, %rdi # sched: [1:0.25]
+; ZNVER1-NEXT:    addq %rdi, (%rsi) # sched: [5:0.50]
+; ZNVER1-NEXT:    addq (%rsi), %rdi # sched: [5:0.50]
 ; ZNVER1-NEXT:    #NO_APP
-; ZNVER1-NEXT:    orq %rcx, %rax # sched: [1:0.25]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call { i64, i64 } asm sideeffect "bsf $2, $0 \0A\09 bsf $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i64 %a0, i64* %a1)
-  %2 = extractvalue { i64, i64 } %1, 0
-  %3 = extractvalue { i64, i64 } %1, 1
-  %4 = or i64 %2, %3
-  ret i64 %4
+  tail call void asm "addq $2, %RAX \0A\09 addq $2, $0 \0A\09 addq $2, $1 \0A\09 addq $3, $0 \0A\09 addq $3, $1 \0A\09 addq $0, $0 \0A\09 addq $0, $1 \0A\09 addq $1, $0", "r,*m,i,i"(i64 %a0, i64* %a1, i32 665536, i8 7) nounwind
+  ret void
 }
 
-define i16 @test_bsr16(i16 %a0, i16* %a1) optsize {
-; GENERIC-LABEL: test_bsr16:
+define void @test_adox(i32 %a0, i32* %a1, i64 %a2, i64* %a3) optsize {
+; GENERIC-LABEL: test_adox:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    bsrw %di, %ax # sched: [3:1.00]
-; GENERIC-NEXT:    bsrw (%rsi), %cx # sched: [8:1.00]
+; GENERIC-NEXT:    adoxl %edi, %edi # sched: [1:0.33]
+; GENERIC-NEXT:    adoxq %rdx, %rdx # sched: [1:0.33]
+; GENERIC-NEXT:    adoxl (%rsi), %edi # sched: [5:0.50]
+; GENERIC-NEXT:    adoxq (%rcx), %rdx # sched: [5:0.50]
+; GENERIC-NEXT:    #NO_APP
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; ATOM-LABEL: test_adox:
+; ATOM:       # %bb.0:
+; ATOM-NEXT:    #APP
+; ATOM-NEXT:    adoxl %edi, %edi # sched: [1:0.50]
+; ATOM-NEXT:    adoxq %rdx, %rdx # sched: [1:0.50]
+; ATOM-NEXT:    adoxl (%rsi), %edi # sched: [1:1.00]
+; ATOM-NEXT:    adoxq (%rcx), %rdx # sched: [1:1.00]
+; ATOM-NEXT:    #NO_APP
+; ATOM-NEXT:    retq # sched: [79:39.50]
+;
+; SLM-LABEL: test_adox:
+; SLM:       # %bb.0:
+; SLM-NEXT:    #APP
+; SLM-NEXT:    adoxl %edi, %edi # sched: [1:0.50]
+; SLM-NEXT:    adoxq %rdx, %rdx # sched: [1:0.50]
+; SLM-NEXT:    adoxl (%rsi), %edi # sched: [4:1.00]
+; SLM-NEXT:    adoxq (%rcx), %rdx # sched: [4:1.00]
+; SLM-NEXT:    #NO_APP
+; SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_adox:
+; SANDY:       # %bb.0:
+; SANDY-NEXT:    #APP
+; SANDY-NEXT:    adoxl %edi, %edi # sched: [1:0.33]
+; SANDY-NEXT:    adoxq %rdx, %rdx # sched: [1:0.33]
+; SANDY-NEXT:    adoxl (%rsi), %edi # sched: [5:0.50]
+; SANDY-NEXT:    adoxq (%rcx), %rdx # sched: [5:0.50]
+; SANDY-NEXT:    #NO_APP
+; SANDY-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_adox:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    #APP
+; HASWELL-NEXT:    adoxl %edi, %edi # sched: [1:0.25]
+; HASWELL-NEXT:    adoxq %rdx, %rdx # sched: [1:0.25]
+; HASWELL-NEXT:    adoxl (%rsi), %edi # sched: [6:0.50]
+; HASWELL-NEXT:    adoxq (%rcx), %rdx # sched: [6:0.50]
+; HASWELL-NEXT:    #NO_APP
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_adox:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    #APP
+; BROADWELL-NEXT:    adoxl %edi, %edi # sched: [1:0.50]
+; BROADWELL-NEXT:    adoxq %rdx, %rdx # sched: [1:0.50]
+; BROADWELL-NEXT:    adoxl (%rsi), %edi # sched: [6:0.50]
+; BROADWELL-NEXT:    adoxq (%rcx), %rdx # sched: [6:0.50]
+; BROADWELL-NEXT:    #NO_APP
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_adox:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    #APP
+; SKYLAKE-NEXT:    adoxl %edi, %edi # sched: [1:0.50]
+; SKYLAKE-NEXT:    adoxq %rdx, %rdx # sched: [1:0.50]
+; SKYLAKE-NEXT:    adoxl (%rsi), %edi # sched: [6:0.50]
+; SKYLAKE-NEXT:    adoxq (%rcx), %rdx # sched: [6:0.50]
+; SKYLAKE-NEXT:    #NO_APP
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; SKX-LABEL: test_adox:
+; SKX:       # %bb.0:
+; SKX-NEXT:    #APP
+; SKX-NEXT:    adoxl %edi, %edi # sched: [1:0.50]
+; SKX-NEXT:    adoxq %rdx, %rdx # sched: [1:0.50]
+; SKX-NEXT:    adoxl (%rsi), %edi # sched: [6:0.50]
+; SKX-NEXT:    adoxq (%rcx), %rdx # sched: [6:0.50]
+; SKX-NEXT:    #NO_APP
+; SKX-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_adox:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    #APP
+; BTVER2-NEXT:    adoxl %edi, %edi # sched: [1:0.50]
+; BTVER2-NEXT:    adoxq %rdx, %rdx # sched: [1:0.50]
+; BTVER2-NEXT:    adoxl (%rsi), %edi # sched: [4:1.00]
+; BTVER2-NEXT:    adoxq (%rcx), %rdx # sched: [4:1.00]
+; BTVER2-NEXT:    #NO_APP
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_adox:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    #APP
+; ZNVER1-NEXT:    adoxl %edi, %edi # sched: [1:0.25]
+; ZNVER1-NEXT:    adoxq %rdx, %rdx # sched: [1:0.25]
+; ZNVER1-NEXT:    adoxl (%rsi), %edi # sched: [5:0.50]
+; ZNVER1-NEXT:    adoxq (%rcx), %rdx # sched: [5:0.50]
+; ZNVER1-NEXT:    #NO_APP
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  tail call void asm "adox $0, $0 \0A\09 adox $2, $2 \0A\09 adox $1, $0 \0A\09 adox $3, $2", "r,*m,r,*m"(i32 %a0, i32* %a1, i64 %a2, i64* %a3) nounwind
+  ret void
+}
+
+; TODO - test_and
+
+define i16 @test_bsf16(i16 %a0, i16* %a1) optsize {
+; GENERIC-LABEL: test_bsf16:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    #APP
+; GENERIC-NEXT:    bsfw %di, %ax # sched: [3:1.00]
+; GENERIC-NEXT:    bsfw (%rsi), %cx # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: def %ax killed %ax killed %eax
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
-; ATOM-LABEL: test_bsr16:
+; ATOM-LABEL: test_bsf16:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    bsrw %di, %ax # sched: [16:8.00]
-; ATOM-NEXT:    bsrw (%rsi), %cx # sched: [16:8.00]
+; ATOM-NEXT:    bsfw %di, %ax # sched: [16:8.00]
+; ATOM-NEXT:    bsfw (%rsi), %cx # sched: [16:8.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    orl %ecx, %eax # sched: [1:0.50]
 ; ATOM-NEXT:    # kill: def %ax killed %ax killed %eax
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
-; SLM-LABEL: test_bsr16:
+; SLM-LABEL: test_bsf16:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    bsrw %di, %ax # sched: [1:1.00]
-; SLM-NEXT:    bsrw (%rsi), %cx # sched: [4:1.00]
+; SLM-NEXT:    bsfw %di, %ax # sched: [1:1.00]
+; SLM-NEXT:    bsfw (%rsi), %cx # sched: [4:1.00]
 ; SLM-NEXT:    #NO_APP
 ; SLM-NEXT:    orl %ecx, %eax # sched: [1:0.50]
 ; SLM-NEXT:    # kill: def %ax killed %ax killed %eax
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
-; SANDY-LABEL: test_bsr16:
+; SANDY-LABEL: test_bsf16:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    bsrw %di, %ax # sched: [3:1.00]
-; SANDY-NEXT:    bsrw (%rsi), %cx # sched: [8:1.00]
+; SANDY-NEXT:    bsfw %di, %ax # sched: [3:1.00]
+; SANDY-NEXT:    bsfw (%rsi), %cx # sched: [8:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    orl %ecx, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    # kill: def %ax killed %ax killed %eax
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
-; HASWELL-LABEL: test_bsr16:
+; HASWELL-LABEL: test_bsf16:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    bsrw %di, %ax # sched: [3:1.00]
-; HASWELL-NEXT:    bsrw (%rsi), %cx # sched: [8:1.00]
+; HASWELL-NEXT:    bsfw %di, %ax # sched: [3:1.00]
+; HASWELL-NEXT:    bsfw (%rsi), %cx # sched: [8:1.00]
 ; HASWELL-NEXT:    #NO_APP
 ; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    # kill: def %ax killed %ax killed %eax
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; BROADWELL-LABEL: test_bsr16:
+; BROADWELL-LABEL: test_bsf16:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    bsrw %di, %ax # sched: [3:1.00]
-; BROADWELL-NEXT:    bsrw (%rsi), %cx # sched: [8:1.00]
+; BROADWELL-NEXT:    bsfw %di, %ax # sched: [3:1.00]
+; BROADWELL-NEXT:    bsfw (%rsi), %cx # sched: [8:1.00]
 ; BROADWELL-NEXT:    #NO_APP
 ; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; BROADWELL-NEXT:    # kill: def %ax killed %ax killed %eax
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; SKYLAKE-LABEL: test_bsr16:
+; SKYLAKE-LABEL: test_bsf16:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    bsrw %di, %ax # sched: [3:1.00]
-; SKYLAKE-NEXT:    bsrw (%rsi), %cx # sched: [8:1.00]
+; SKYLAKE-NEXT:    bsfw %di, %ax # sched: [3:1.00]
+; SKYLAKE-NEXT:    bsfw (%rsi), %cx # sched: [8:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
 ; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    # kill: def %ax killed %ax killed %eax
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
-; SKX-LABEL: test_bsr16:
+; SKX-LABEL: test_bsf16:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    bsrw %di, %ax # sched: [3:1.00]
-; SKX-NEXT:    bsrw (%rsi), %cx # sched: [8:1.00]
+; SKX-NEXT:    bsfw %di, %ax # sched: [3:1.00]
+; SKX-NEXT:    bsfw (%rsi), %cx # sched: [8:1.00]
 ; SKX-NEXT:    #NO_APP
 ; SKX-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; SKX-NEXT:    # kill: def %ax killed %ax killed %eax
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
-; BTVER2-LABEL: test_bsr16:
+; BTVER2-LABEL: test_bsf16:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    bsrw %di, %ax # sched: [1:0.50]
-; BTVER2-NEXT:    bsrw (%rsi), %cx # sched: [4:1.00]
+; BTVER2-NEXT:    bsfw %di, %ax # sched: [1:0.50]
+; BTVER2-NEXT:    bsfw (%rsi), %cx # sched: [4:1.00]
 ; BTVER2-NEXT:    #NO_APP
 ; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    # kill: def %ax killed %ax killed %eax
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
-; ZNVER1-LABEL: test_bsr16:
+; ZNVER1-LABEL: test_bsf16:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    bsrw %di, %ax # sched: [3:0.25]
-; ZNVER1-NEXT:    bsrw (%rsi), %cx # sched: [7:0.50]
+; ZNVER1-NEXT:    bsfw %di, %ax # sched: [3:0.25]
+; ZNVER1-NEXT:    bsfw (%rsi), %cx # sched: [7:0.50]
 ; ZNVER1-NEXT:    #NO_APP
 ; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; ZNVER1-NEXT:    # kill: def %ax killed %ax killed %eax
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call { i16, i16 } asm sideeffect "bsr $2, $0 \0A\09 bsr $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i16 %a0, i16* %a1)
+  %1 = call { i16, i16 } asm sideeffect "bsf $2, $0 \0A\09 bsf $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i16 %a0, i16* %a1)
   %2 = extractvalue { i16, i16 } %1, 0
   %3 = extractvalue { i16, i16 } %1, 1
   %4 = or i16 %2, %3
   ret i16 %4
 }
-define i32 @test_bsr32(i32 %a0, i32* %a1) optsize {
-; GENERIC-LABEL: test_bsr32:
+define i32 @test_bsf32(i32 %a0, i32* %a1) optsize {
+; GENERIC-LABEL: test_bsf32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    bsrl %edi, %eax # sched: [3:1.00]
-; GENERIC-NEXT:    bsrl (%rsi), %ecx # sched: [8:1.00]
+; GENERIC-NEXT:    bsfl %edi, %eax # sched: [3:1.00]
+; GENERIC-NEXT:    bsfl (%rsi), %ecx # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
-; ATOM-LABEL: test_bsr32:
+; ATOM-LABEL: test_bsf32:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    bsrl %edi, %eax # sched: [16:8.00]
-; ATOM-NEXT:    bsrl (%rsi), %ecx # sched: [16:8.00]
+; ATOM-NEXT:    bsfl %edi, %eax # sched: [16:8.00]
+; ATOM-NEXT:    bsfl (%rsi), %ecx # sched: [16:8.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    orl %ecx, %eax # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
-; SLM-LABEL: test_bsr32:
+; SLM-LABEL: test_bsf32:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    bsrl %edi, %eax # sched: [1:1.00]
-; SLM-NEXT:    bsrl (%rsi), %ecx # sched: [4:1.00]
+; SLM-NEXT:    bsfl %edi, %eax # sched: [1:1.00]
+; SLM-NEXT:    bsfl (%rsi), %ecx # sched: [4:1.00]
 ; SLM-NEXT:    #NO_APP
 ; SLM-NEXT:    orl %ecx, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
-; SANDY-LABEL: test_bsr32:
+; SANDY-LABEL: test_bsf32:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    bsrl %edi, %eax # sched: [3:1.00]
-; SANDY-NEXT:    bsrl (%rsi), %ecx # sched: [8:1.00]
+; SANDY-NEXT:    bsfl %edi, %eax # sched: [3:1.00]
+; SANDY-NEXT:    bsfl (%rsi), %ecx # sched: [8:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    orl %ecx, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
-; HASWELL-LABEL: test_bsr32:
+; HASWELL-LABEL: test_bsf32:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    bsrl %edi, %eax # sched: [3:1.00]
-; HASWELL-NEXT:    bsrl (%rsi), %ecx # sched: [8:1.00]
+; HASWELL-NEXT:    bsfl %edi, %eax # sched: [3:1.00]
+; HASWELL-NEXT:    bsfl (%rsi), %ecx # sched: [8:1.00]
 ; HASWELL-NEXT:    #NO_APP
 ; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; BROADWELL-LABEL: test_bsr32:
+; BROADWELL-LABEL: test_bsf32:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    bsrl %edi, %eax # sched: [3:1.00]
-; BROADWELL-NEXT:    bsrl (%rsi), %ecx # sched: [8:1.00]
+; BROADWELL-NEXT:    bsfl %edi, %eax # sched: [3:1.00]
+; BROADWELL-NEXT:    bsfl (%rsi), %ecx # sched: [8:1.00]
 ; BROADWELL-NEXT:    #NO_APP
 ; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; SKYLAKE-LABEL: test_bsr32:
+; SKYLAKE-LABEL: test_bsf32:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    bsrl %edi, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT:    bsrl (%rsi), %ecx # sched: [8:1.00]
+; SKYLAKE-NEXT:    bsfl %edi, %eax # sched: [3:1.00]
+; SKYLAKE-NEXT:    bsfl (%rsi), %ecx # sched: [8:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
 ; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
-; SKX-LABEL: test_bsr32:
+; SKX-LABEL: test_bsf32:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    bsrl %edi, %eax # sched: [3:1.00]
-; SKX-NEXT:    bsrl (%rsi), %ecx # sched: [8:1.00]
+; SKX-NEXT:    bsfl %edi, %eax # sched: [3:1.00]
+; SKX-NEXT:    bsfl (%rsi), %ecx # sched: [8:1.00]
 ; SKX-NEXT:    #NO_APP
 ; SKX-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
-; BTVER2-LABEL: test_bsr32:
+; BTVER2-LABEL: test_bsf32:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    bsrl %edi, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    bsrl (%rsi), %ecx # sched: [4:1.00]
+; BTVER2-NEXT:    bsfl %edi, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    bsfl (%rsi), %ecx # sched: [4:1.00]
 ; BTVER2-NEXT:    #NO_APP
 ; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
-; ZNVER1-LABEL: test_bsr32:
+; ZNVER1-LABEL: test_bsf32:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    bsrl %edi, %eax # sched: [3:0.25]
-; ZNVER1-NEXT:    bsrl (%rsi), %ecx # sched: [7:0.50]
+; ZNVER1-NEXT:    bsfl %edi, %eax # sched: [3:0.25]
+; ZNVER1-NEXT:    bsfl (%rsi), %ecx # sched: [7:0.50]
 ; ZNVER1-NEXT:    #NO_APP
 ; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call { i32, i32 } asm sideeffect "bsr $2, $0 \0A\09 bsr $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i32 %a0, i32* %a1)
+  %1 = call { i32, i32 } asm sideeffect "bsf $2, $0 \0A\09 bsf $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i32 %a0, i32* %a1)
   %2 = extractvalue { i32, i32 } %1, 0
   %3 = extractvalue { i32, i32 } %1, 1
   %4 = or i32 %2, %3
   ret i32 %4
 }
-define i64 @test_bsr64(i64 %a0, i64* %a1) optsize {
-; GENERIC-LABEL: test_bsr64:
+define i64 @test_bsf64(i64 %a0, i64* %a1) optsize {
+; GENERIC-LABEL: test_bsf64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    bsrq %rdi, %rax # sched: [3:1.00]
-; GENERIC-NEXT:    bsrq (%rsi), %rcx # sched: [8:1.00]
+; GENERIC-NEXT:    bsfq %rdi, %rax # sched: [3:1.00]
+; GENERIC-NEXT:    bsfq (%rsi), %rcx # sched: [8:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    orq %rcx, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
-; ATOM-LABEL: test_bsr64:
+; ATOM-LABEL: test_bsf64:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    bsrq %rdi, %rax # sched: [16:8.00]
-; ATOM-NEXT:    bsrq (%rsi), %rcx # sched: [16:8.00]
+; ATOM-NEXT:    bsfq %rdi, %rax # sched: [16:8.00]
+; ATOM-NEXT:    bsfq (%rsi), %rcx # sched: [16:8.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    orq %rcx, %rax # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
-; SLM-LABEL: test_bsr64:
+; SLM-LABEL: test_bsf64:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    bsrq %rdi, %rax # sched: [1:1.00]
-; SLM-NEXT:    bsrq (%rsi), %rcx # sched: [4:1.00]
+; SLM-NEXT:    bsfq %rdi, %rax # sched: [1:1.00]
+; SLM-NEXT:    bsfq (%rsi), %rcx # sched: [4:1.00]
 ; SLM-NEXT:    #NO_APP
 ; SLM-NEXT:    orq %rcx, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
-; SANDY-LABEL: test_bsr64:
+; SANDY-LABEL: test_bsf64:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    bsrq %rdi, %rax # sched: [3:1.00]
-; SANDY-NEXT:    bsrq (%rsi), %rcx # sched: [8:1.00]
+; SANDY-NEXT:    bsfq %rdi, %rax # sched: [3:1.00]
+; SANDY-NEXT:    bsfq (%rsi), %rcx # sched: [8:1.00]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    orq %rcx, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
-; HASWELL-LABEL: test_bsr64:
+; HASWELL-LABEL: test_bsf64:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    bsrq %rdi, %rax # sched: [3:1.00]
-; HASWELL-NEXT:    bsrq (%rsi), %rcx # sched: [8:1.00]
+; HASWELL-NEXT:    bsfq %rdi, %rax # sched: [3:1.00]
+; HASWELL-NEXT:    bsfq (%rsi), %rcx # sched: [8:1.00]
 ; HASWELL-NEXT:    #NO_APP
 ; HASWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; BROADWELL-LABEL: test_bsr64:
+; BROADWELL-LABEL: test_bsf64:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    bsrq %rdi, %rax # sched: [3:1.00]
-; BROADWELL-NEXT:    bsrq (%rsi), %rcx # sched: [8:1.00]
+; BROADWELL-NEXT:    bsfq %rdi, %rax # sched: [3:1.00]
+; BROADWELL-NEXT:    bsfq (%rsi), %rcx # sched: [8:1.00]
 ; BROADWELL-NEXT:    #NO_APP
 ; BROADWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; SKYLAKE-LABEL: test_bsr64:
+; SKYLAKE-LABEL: test_bsf64:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    bsrq %rdi, %rax # sched: [3:1.00]
-; SKYLAKE-NEXT:    bsrq (%rsi), %rcx # sched: [8:1.00]
+; SKYLAKE-NEXT:    bsfq %rdi, %rax # sched: [3:1.00]
+; SKYLAKE-NEXT:    bsfq (%rsi), %rcx # sched: [8:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
 ; SKYLAKE-NEXT:    orq %rcx, %rax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
-; SKX-LABEL: test_bsr64:
+; SKX-LABEL: test_bsf64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    bsrq %rdi, %rax # sched: [3:1.00]
-; SKX-NEXT:    bsrq (%rsi), %rcx # sched: [8:1.00]
+; SKX-NEXT:    bsfq %rdi, %rax # sched: [3:1.00]
+; SKX-NEXT:    bsfq (%rsi), %rcx # sched: [8:1.00]
 ; SKX-NEXT:    #NO_APP
 ; SKX-NEXT:    orq %rcx, %rax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
-; BTVER2-LABEL: test_bsr64:
+; BTVER2-LABEL: test_bsf64:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    bsrq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    bsrq (%rsi), %rcx # sched: [4:1.00]
+; BTVER2-NEXT:    bsfq %rdi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    bsfq (%rsi), %rcx # sched: [4:1.00]
 ; BTVER2-NEXT:    #NO_APP
 ; BTVER2-NEXT:    orq %rcx, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
-; ZNVER1-LABEL: test_bsr64:
+; ZNVER1-LABEL: test_bsf64:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    bsrq %rdi, %rax # sched: [3:0.25]
-; ZNVER1-NEXT:    bsrq (%rsi), %rcx # sched: [7:0.50]
+; ZNVER1-NEXT:    bsfq %rdi, %rax # sched: [3:0.25]
+; ZNVER1-NEXT:    bsfq (%rsi), %rcx # sched: [7:0.50]
 ; ZNVER1-NEXT:    #NO_APP
 ; ZNVER1-NEXT:    orq %rcx, %rax # sched: [1:0.25]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = call { i64, i64 } asm sideeffect "bsr $2, $0 \0A\09 bsr $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i64 %a0, i64* %a1)
+  %1 = call { i64, i64 } asm sideeffect "bsf $2, $0 \0A\09 bsf $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i64 %a0, i64* %a1)
   %2 = extractvalue { i64, i64 } %1, 0
   %3 = extractvalue { i64, i64 } %1, 1
   %4 = or i64 %2, %3
   ret i64 %4
 }
 
-define i32 @test_bswap32(i32 %a0) optsize {
-; GENERIC-LABEL: test_bswap32:
+define i16 @test_bsr16(i16 %a0, i16* %a1) optsize {
+; GENERIC-LABEL: test_bsr16:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    bswapl %edi # sched: [2:1.00]
-; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    #APP
+; GENERIC-NEXT:    bsrw %di, %ax # sched: [3:1.00]
+; GENERIC-NEXT:    bsrw (%rsi), %cx # sched: [8:1.00]
+; GENERIC-NEXT:    #NO_APP
+; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    # kill: def %ax killed %ax killed %eax
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
-; ATOM-LABEL: test_bswap32:
+; ATOM-LABEL: test_bsr16:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    bswapl %edi # sched: [1:1.00]
-; ATOM-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; ATOM-NEXT:    #APP
+; ATOM-NEXT:    bsrw %di, %ax # sched: [16:8.00]
+; ATOM-NEXT:    bsrw (%rsi), %cx # sched: [16:8.00]
+; ATOM-NEXT:    #NO_APP
+; ATOM-NEXT:    orl %ecx, %eax # sched: [1:0.50]
+; ATOM-NEXT:    # kill: def %ax killed %ax killed %eax
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
-; SLM-LABEL: test_bswap32:
+; SLM-LABEL: test_bsr16:
 ; SLM:       # %bb.0:
-; SLM-NEXT:    bswapl %edi # sched: [1:0.50]
-; SLM-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; SLM-NEXT:    #APP
+; SLM-NEXT:    bsrw %di, %ax # sched: [1:1.00]
+; SLM-NEXT:    bsrw (%rsi), %cx # sched: [4:1.00]
+; SLM-NEXT:    #NO_APP
+; SLM-NEXT:    orl %ecx, %eax # sched: [1:0.50]
+; SLM-NEXT:    # kill: def %ax killed %ax killed %eax
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
-; SANDY-LABEL: test_bswap32:
+; SANDY-LABEL: test_bsr16:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    bswapl %edi # sched: [2:1.00]
-; SANDY-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; SANDY-NEXT:    #APP
+; SANDY-NEXT:    bsrw %di, %ax # sched: [3:1.00]
+; SANDY-NEXT:    bsrw (%rsi), %cx # sched: [8:1.00]
+; SANDY-NEXT:    #NO_APP
+; SANDY-NEXT:    orl %ecx, %eax # sched: [1:0.33]
+; SANDY-NEXT:    # kill: def %ax killed %ax killed %eax
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
-; HASWELL-LABEL: test_bswap32:
+; HASWELL-LABEL: test_bsr16:
 ; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    bswapl %edi # sched: [2:0.50]
-; HASWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    #APP
+; HASWELL-NEXT:    bsrw %di, %ax # sched: [3:1.00]
+; HASWELL-NEXT:    bsrw (%rsi), %cx # sched: [8:1.00]
+; HASWELL-NEXT:    #NO_APP
+; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    # kill: def %ax killed %ax killed %eax
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; BROADWELL-LABEL: test_bswap32:
+; BROADWELL-LABEL: test_bsr16:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    bswapl %edi # sched: [2:0.50]
-; BROADWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    #APP
+; BROADWELL-NEXT:    bsrw %di, %ax # sched: [3:1.00]
+; BROADWELL-NEXT:    bsrw (%rsi), %cx # sched: [8:1.00]
+; BROADWELL-NEXT:    #NO_APP
+; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    # kill: def %ax killed %ax killed %eax
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; SKYLAKE-LABEL: test_bswap32:
+; SKYLAKE-LABEL: test_bsr16:
 ; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    bswapl %edi # sched: [2:0.50]
-; SKYLAKE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    #APP
+; SKYLAKE-NEXT:    bsrw %di, %ax # sched: [3:1.00]
+; SKYLAKE-NEXT:    bsrw (%rsi), %cx # sched: [8:1.00]
+; SKYLAKE-NEXT:    #NO_APP
+; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    # kill: def %ax killed %ax killed %eax
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
-; SKX-LABEL: test_bswap32:
+; SKX-LABEL: test_bsr16:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    bswapl %edi # sched: [2:0.50]
-; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKX-NEXT:    #APP
+; SKX-NEXT:    bsrw %di, %ax # sched: [3:1.00]
+; SKX-NEXT:    bsrw (%rsi), %cx # sched: [8:1.00]
+; SKX-NEXT:    #NO_APP
+; SKX-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; SKX-NEXT:    # kill: def %ax killed %ax killed %eax
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
-; BTVER2-LABEL: test_bswap32:
+; BTVER2-LABEL: test_bsr16:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    bswapl %edi # sched: [1:0.50]
-; BTVER2-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    #APP
+; BTVER2-NEXT:    bsrw %di, %ax # sched: [1:0.50]
+; BTVER2-NEXT:    bsrw (%rsi), %cx # sched: [4:1.00]
+; BTVER2-NEXT:    #NO_APP
+; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    # kill: def %ax killed %ax killed %eax
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
-; ZNVER1-LABEL: test_bswap32:
+; ZNVER1-LABEL: test_bsr16:
 ; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    bswapl %edi # sched: [1:1.00]
-; ZNVER1-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    #APP
+; ZNVER1-NEXT:    bsrw %di, %ax # sched: [3:0.25]
+; ZNVER1-NEXT:    bsrw (%rsi), %cx # sched: [7:0.50]
+; ZNVER1-NEXT:    #NO_APP
+; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    # kill: def %ax killed %ax killed %eax
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = tail call i32 asm "bswap $0", "=r,0"(i32 %a0) nounwind
-  ret i32 %1
+  %1 = call { i16, i16 } asm sideeffect "bsr $2, $0 \0A\09 bsr $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i16 %a0, i16* %a1)
+  %2 = extractvalue { i16, i16 } %1, 0
+  %3 = extractvalue { i16, i16 } %1, 1
+  %4 = or i16 %2, %3
+  ret i16 %4
 }
-define i64 @test_bswap64(i64 %a0) optsize {
-; GENERIC-LABEL: test_bswap64:
+define i32 @test_bsr32(i32 %a0, i32* %a1) optsize {
+; GENERIC-LABEL: test_bsr32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    bswapq %rdi # sched: [2:1.00]
-; GENERIC-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    #APP
+; GENERIC-NEXT:    bsrl %edi, %eax # sched: [3:1.00]
+; GENERIC-NEXT:    bsrl (%rsi), %ecx # sched: [8:1.00]
+; GENERIC-NEXT:    #NO_APP
+; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
-; ATOM-LABEL: test_bswap64:
+; ATOM-LABEL: test_bsr32:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    bswapq %rdi # sched: [1:1.00]
-; ATOM-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; ATOM-NEXT:    #APP
+; ATOM-NEXT:    bsrl %edi, %eax # sched: [16:8.00]
+; ATOM-NEXT:    bsrl (%rsi), %ecx # sched: [16:8.00]
+; ATOM-NEXT:    #NO_APP
+; ATOM-NEXT:    orl %ecx, %eax # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
-; SLM-LABEL: test_bswap64:
+; SLM-LABEL: test_bsr32:
 ; SLM:       # %bb.0:
-; SLM-NEXT:    bswapq %rdi # sched: [1:0.50]
-; SLM-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; SLM-NEXT:    #APP
+; SLM-NEXT:    bsrl %edi, %eax # sched: [1:1.00]
+; SLM-NEXT:    bsrl (%rsi), %ecx # sched: [4:1.00]
+; SLM-NEXT:    #NO_APP
+; SLM-NEXT:    orl %ecx, %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
-; SANDY-LABEL: test_bswap64:
+; SANDY-LABEL: test_bsr32:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    bswapq %rdi # sched: [2:1.00]
-; SANDY-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; SANDY-NEXT:    #APP
+; SANDY-NEXT:    bsrl %edi, %eax # sched: [3:1.00]
+; SANDY-NEXT:    bsrl (%rsi), %ecx # sched: [8:1.00]
+; SANDY-NEXT:    #NO_APP
+; SANDY-NEXT:    orl %ecx, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
-; HASWELL-LABEL: test_bswap64:
+; HASWELL-LABEL: test_bsr32:
 ; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    bswapq %rdi # sched: [2:0.50]
-; HASWELL-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; HASWELL-NEXT:    #APP
+; HASWELL-NEXT:    bsrl %edi, %eax # sched: [3:1.00]
+; HASWELL-NEXT:    bsrl (%rsi), %ecx # sched: [8:1.00]
+; HASWELL-NEXT:    #NO_APP
+; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; BROADWELL-LABEL: test_bswap64:
+; BROADWELL-LABEL: test_bsr32:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    bswapq %rdi # sched: [2:0.50]
-; BROADWELL-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; BROADWELL-NEXT:    #APP
+; BROADWELL-NEXT:    bsrl %edi, %eax # sched: [3:1.00]
+; BROADWELL-NEXT:    bsrl (%rsi), %ecx # sched: [8:1.00]
+; BROADWELL-NEXT:    #NO_APP
+; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; SKYLAKE-LABEL: test_bswap64:
+; SKYLAKE-LABEL: test_bsr32:
 ; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    bswapq %rdi # sched: [2:0.50]
-; SKYLAKE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKYLAKE-NEXT:    #APP
+; SKYLAKE-NEXT:    bsrl %edi, %eax # sched: [3:1.00]
+; SKYLAKE-NEXT:    bsrl (%rsi), %ecx # sched: [8:1.00]
+; SKYLAKE-NEXT:    #NO_APP
+; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
-; SKX-LABEL: test_bswap64:
+; SKX-LABEL: test_bsr32:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    bswapq %rdi # sched: [2:0.50]
-; SKX-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKX-NEXT:    #APP
+; SKX-NEXT:    bsrl %edi, %eax # sched: [3:1.00]
+; SKX-NEXT:    bsrl (%rsi), %ecx # sched: [8:1.00]
+; SKX-NEXT:    #NO_APP
+; SKX-NEXT:    orl %ecx, %eax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
-; BTVER2-LABEL: test_bswap64:
+; BTVER2-LABEL: test_bsr32:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    bswapq %rdi # sched: [1:0.50]
-; BTVER2-NEXT:    movq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bswap64:
-; ZNVER1:       # %bb.0:
+; BTVER2-NEXT:    #APP
+; BTVER2-NEXT:    bsrl %edi, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    bsrl (%rsi), %ecx # sched: [4:1.00]
+; BTVER2-NEXT:    #NO_APP
+; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_bsr32:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    #APP
+; ZNVER1-NEXT:    bsrl %edi, %eax # sched: [3:0.25]
+; ZNVER1-NEXT:    bsrl (%rsi), %ecx # sched: [7:0.50]
+; ZNVER1-NEXT:    #NO_APP
+; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = call { i32, i32 } asm sideeffect "bsr $2, $0 \0A\09 bsr $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i32 %a0, i32* %a1)
+  %2 = extractvalue { i32, i32 } %1, 0
+  %3 = extractvalue { i32, i32 } %1, 1
+  %4 = or i32 %2, %3
+  ret i32 %4
+}
+define i64 @test_bsr64(i64 %a0, i64* %a1) optsize {
+; GENERIC-LABEL: test_bsr64:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    #APP
+; GENERIC-NEXT:    bsrq %rdi, %rax # sched: [3:1.00]
+; GENERIC-NEXT:    bsrq (%rsi), %rcx # sched: [8:1.00]
+; GENERIC-NEXT:    #NO_APP
+; GENERIC-NEXT:    orq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; ATOM-LABEL: test_bsr64:
+; ATOM:       # %bb.0:
+; ATOM-NEXT:    #APP
+; ATOM-NEXT:    bsrq %rdi, %rax # sched: [16:8.00]
+; ATOM-NEXT:    bsrq (%rsi), %rcx # sched: [16:8.00]
+; ATOM-NEXT:    #NO_APP
+; ATOM-NEXT:    orq %rcx, %rax # sched: [1:0.50]
+; ATOM-NEXT:    retq # sched: [79:39.50]
+;
+; SLM-LABEL: test_bsr64:
+; SLM:       # %bb.0:
+; SLM-NEXT:    #APP
+; SLM-NEXT:    bsrq %rdi, %rax # sched: [1:1.00]
+; SLM-NEXT:    bsrq (%rsi), %rcx # sched: [4:1.00]
+; SLM-NEXT:    #NO_APP
+; SLM-NEXT:    orq %rcx, %rax # sched: [1:0.50]
+; SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_bsr64:
+; SANDY:       # %bb.0:
+; SANDY-NEXT:    #APP
+; SANDY-NEXT:    bsrq %rdi, %rax # sched: [3:1.00]
+; SANDY-NEXT:    bsrq (%rsi), %rcx # sched: [8:1.00]
+; SANDY-NEXT:    #NO_APP
+; SANDY-NEXT:    orq %rcx, %rax # sched: [1:0.33]
+; SANDY-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_bsr64:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    #APP
+; HASWELL-NEXT:    bsrq %rdi, %rax # sched: [3:1.00]
+; HASWELL-NEXT:    bsrq (%rsi), %rcx # sched: [8:1.00]
+; HASWELL-NEXT:    #NO_APP
+; HASWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_bsr64:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    #APP
+; BROADWELL-NEXT:    bsrq %rdi, %rax # sched: [3:1.00]
+; BROADWELL-NEXT:    bsrq (%rsi), %rcx # sched: [8:1.00]
+; BROADWELL-NEXT:    #NO_APP
+; BROADWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_bsr64:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    #APP
+; SKYLAKE-NEXT:    bsrq %rdi, %rax # sched: [3:1.00]
+; SKYLAKE-NEXT:    bsrq (%rsi), %rcx # sched: [8:1.00]
+; SKYLAKE-NEXT:    #NO_APP
+; SKYLAKE-NEXT:    orq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; SKX-LABEL: test_bsr64:
+; SKX:       # %bb.0:
+; SKX-NEXT:    #APP
+; SKX-NEXT:    bsrq %rdi, %rax # sched: [3:1.00]
+; SKX-NEXT:    bsrq (%rsi), %rcx # sched: [8:1.00]
+; SKX-NEXT:    #NO_APP
+; SKX-NEXT:    orq %rcx, %rax # sched: [1:0.25]
+; SKX-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_bsr64:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    #APP
+; BTVER2-NEXT:    bsrq %rdi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    bsrq (%rsi), %rcx # sched: [4:1.00]
+; BTVER2-NEXT:    #NO_APP
+; BTVER2-NEXT:    orq %rcx, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_bsr64:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    #APP
+; ZNVER1-NEXT:    bsrq %rdi, %rax # sched: [3:0.25]
+; ZNVER1-NEXT:    bsrq (%rsi), %rcx # sched: [7:0.50]
+; ZNVER1-NEXT:    #NO_APP
+; ZNVER1-NEXT:    orq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = call { i64, i64 } asm sideeffect "bsr $2, $0 \0A\09 bsr $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i64 %a0, i64* %a1)
+  %2 = extractvalue { i64, i64 } %1, 0
+  %3 = extractvalue { i64, i64 } %1, 1
+  %4 = or i64 %2, %3
+  ret i64 %4
+}
+
+define i32 @test_bswap32(i32 %a0) optsize {
+; GENERIC-LABEL: test_bswap32:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    bswapl %edi # sched: [2:1.00]
+; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; ATOM-LABEL: test_bswap32:
+; ATOM:       # %bb.0:
+; ATOM-NEXT:    bswapl %edi # sched: [1:1.00]
+; ATOM-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; ATOM-NEXT:    retq # sched: [79:39.50]
+;
+; SLM-LABEL: test_bswap32:
+; SLM:       # %bb.0:
+; SLM-NEXT:    bswapl %edi # sched: [1:0.50]
+; SLM-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_bswap32:
+; SANDY:       # %bb.0:
+; SANDY-NEXT:    bswapl %edi # sched: [2:1.00]
+; SANDY-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; SANDY-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_bswap32:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    bswapl %edi # sched: [2:0.50]
+; HASWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_bswap32:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    bswapl %edi # sched: [2:0.50]
+; BROADWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_bswap32:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    bswapl %edi # sched: [2:0.50]
+; SKYLAKE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; SKX-LABEL: test_bswap32:
+; SKX:       # %bb.0:
+; SKX-NEXT:    bswapl %edi # sched: [2:0.50]
+; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKX-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_bswap32:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    bswapl %edi # sched: [1:0.50]
+; BTVER2-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_bswap32:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    bswapl %edi # sched: [1:1.00]
+; ZNVER1-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = tail call i32 asm "bswap $0", "=r,0"(i32 %a0) nounwind
+  ret i32 %1
+}
+define i64 @test_bswap64(i64 %a0) optsize {
+; GENERIC-LABEL: test_bswap64:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    bswapq %rdi # sched: [2:1.00]
+; GENERIC-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; ATOM-LABEL: test_bswap64:
+; ATOM:       # %bb.0:
+; ATOM-NEXT:    bswapq %rdi # sched: [1:1.00]
+; ATOM-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; ATOM-NEXT:    retq # sched: [79:39.50]
+;
+; SLM-LABEL: test_bswap64:
+; SLM:       # %bb.0:
+; SLM-NEXT:    bswapq %rdi # sched: [1:0.50]
+; SLM-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_bswap64:
+; SANDY:       # %bb.0:
+; SANDY-NEXT:    bswapq %rdi # sched: [2:1.00]
+; SANDY-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; SANDY-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_bswap64:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    bswapq %rdi # sched: [2:0.50]
+; HASWELL-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_bswap64:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    bswapq %rdi # sched: [2:0.50]
+; BROADWELL-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_bswap64:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    bswapq %rdi # sched: [2:0.50]
+; SKYLAKE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; SKX-LABEL: test_bswap64:
+; SKX:       # %bb.0:
+; SKX-NEXT:    bswapq %rdi # sched: [2:0.50]
+; SKX-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKX-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_bswap64:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    bswapq %rdi # sched: [1:0.50]
+; BTVER2-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_bswap64:
+; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    bswapq %rdi # sched: [1:1.00]
 ; ZNVER1-NEXT:    movq %rdi, %rax # sched: [1:0.25]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
@@ -9563,342 +10204,984 @@ define void @test_shld_shrd_32(i32 %a0,
 ; ZNVER1-NEXT:    shrdl $7, %esi, (%rdx) # sched: [5:0.50]
 ; ZNVER1-NEXT:    #NO_APP
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  call void asm sideeffect "shld $1, $0 \0A\09 shrd $1, $0 \0A\09 shld $1, $2 \0A\09 shrd $1, $2 \0A\09 shld $3, $1, $0 \0A\09 shrd $3, $1, $0 \0A\09 shld $3, $1, $2 \0A\09 shrd $3, $1, $2", "r,r,*m,i"(i32 %a0, i32 %a1, i32 *%a2, i8 7)
+  call void asm sideeffect "shld $1, $0 \0A\09 shrd $1, $0 \0A\09 shld $1, $2 \0A\09 shrd $1, $2 \0A\09 shld $3, $1, $0 \0A\09 shrd $3, $1, $0 \0A\09 shld $3, $1, $2 \0A\09 shrd $3, $1, $2", "r,r,*m,i"(i32 %a0, i32 %a1, i32 *%a2, i8 7)
+  ret void
+}
+define void @test_shld_shrd_64(i64 %a0, i64 %a1, i64 *%a2) optsize {
+; GENERIC-LABEL: test_shld_shrd_64:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    #APP
+; GENERIC-NEXT:    shldq %cl, %rsi, %rdi # sched: [4:1.50]
+; GENERIC-NEXT:    shrdq %cl, %rsi, %rdi # sched: [4:1.50]
+; GENERIC-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [10:1.50]
+; GENERIC-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [10:1.50]
+; GENERIC-NEXT:    shldq $7, %rsi, %rdi # sched: [2:0.67]
+; GENERIC-NEXT:    shrdq $7, %rsi, %rdi # sched: [2:0.67]
+; GENERIC-NEXT:    shldq $7, %rsi, (%rdx) # sched: [8:1.00]
+; GENERIC-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [8:1.00]
+; GENERIC-NEXT:    #NO_APP
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; ATOM-LABEL: test_shld_shrd_64:
+; ATOM:       # %bb.0:
+; ATOM-NEXT:    #APP
+; ATOM-NEXT:    shldq %cl, %rsi, %rdi # sched: [8:4.00]
+; ATOM-NEXT:    shrdq %cl, %rsi, %rdi # sched: [8:4.00]
+; ATOM-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [9:4.50]
+; ATOM-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [9:4.50]
+; ATOM-NEXT:    shldq $7, %rsi, %rdi # sched: [9:4.50]
+; ATOM-NEXT:    shrdq $7, %rsi, %rdi # sched: [9:4.50]
+; ATOM-NEXT:    shldq $7, %rsi, (%rdx) # sched: [9:4.50]
+; ATOM-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [9:4.50]
+; ATOM-NEXT:    #NO_APP
+; ATOM-NEXT:    retq # sched: [79:39.50]
+;
+; SLM-LABEL: test_shld_shrd_64:
+; SLM:       # %bb.0:
+; SLM-NEXT:    #APP
+; SLM-NEXT:    shldq %cl, %rsi, %rdi # sched: [1:1.00]
+; SLM-NEXT:    shrdq %cl, %rsi, %rdi # sched: [1:1.00]
+; SLM-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [4:2.00]
+; SLM-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [4:2.00]
+; SLM-NEXT:    shldq $7, %rsi, %rdi # sched: [1:1.00]
+; SLM-NEXT:    shrdq $7, %rsi, %rdi # sched: [1:1.00]
+; SLM-NEXT:    shldq $7, %rsi, (%rdx) # sched: [4:2.00]
+; SLM-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [4:2.00]
+; SLM-NEXT:    #NO_APP
+; SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_shld_shrd_64:
+; SANDY:       # %bb.0:
+; SANDY-NEXT:    #APP
+; SANDY-NEXT:    shldq %cl, %rsi, %rdi # sched: [4:1.50]
+; SANDY-NEXT:    shrdq %cl, %rsi, %rdi # sched: [4:1.50]
+; SANDY-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [10:1.50]
+; SANDY-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [10:1.50]
+; SANDY-NEXT:    shldq $7, %rsi, %rdi # sched: [2:0.67]
+; SANDY-NEXT:    shrdq $7, %rsi, %rdi # sched: [2:0.67]
+; SANDY-NEXT:    shldq $7, %rsi, (%rdx) # sched: [8:1.00]
+; SANDY-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [8:1.00]
+; SANDY-NEXT:    #NO_APP
+; SANDY-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_shld_shrd_64:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    #APP
+; HASWELL-NEXT:    shldq %cl, %rsi, %rdi # sched: [6:1.00]
+; HASWELL-NEXT:    shrdq %cl, %rsi, %rdi # sched: [6:1.00]
+; HASWELL-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [12:1.00]
+; HASWELL-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [12:1.00]
+; HASWELL-NEXT:    shldq $7, %rsi, %rdi # sched: [3:1.00]
+; HASWELL-NEXT:    shrdq $7, %rsi, %rdi # sched: [3:1.00]
+; HASWELL-NEXT:    shldq $7, %rsi, (%rdx) # sched: [10:1.00]
+; HASWELL-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [10:1.00]
+; HASWELL-NEXT:    #NO_APP
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_shld_shrd_64:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    #APP
+; BROADWELL-NEXT:    shldq %cl, %rsi, %rdi # sched: [6:1.00]
+; BROADWELL-NEXT:    shrdq %cl, %rsi, %rdi # sched: [6:1.00]
+; BROADWELL-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [11:1.00]
+; BROADWELL-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [11:1.00]
+; BROADWELL-NEXT:    shldq $7, %rsi, %rdi # sched: [3:1.00]
+; BROADWELL-NEXT:    shrdq $7, %rsi, %rdi # sched: [3:1.00]
+; BROADWELL-NEXT:    shldq $7, %rsi, (%rdx) # sched: [9:1.00]
+; BROADWELL-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [9:1.00]
+; BROADWELL-NEXT:    #NO_APP
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_shld_shrd_64:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    #APP
+; SKYLAKE-NEXT:    shldq %cl, %rsi, %rdi # sched: [6:1.00]
+; SKYLAKE-NEXT:    shrdq %cl, %rsi, %rdi # sched: [6:1.00]
+; SKYLAKE-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [11:1.00]
+; SKYLAKE-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [11:1.00]
+; SKYLAKE-NEXT:    shldq $7, %rsi, %rdi # sched: [3:1.00]
+; SKYLAKE-NEXT:    shrdq $7, %rsi, %rdi # sched: [3:1.00]
+; SKYLAKE-NEXT:    shldq $7, %rsi, (%rdx) # sched: [9:1.00]
+; SKYLAKE-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [9:1.00]
+; SKYLAKE-NEXT:    #NO_APP
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; SKX-LABEL: test_shld_shrd_64:
+; SKX:       # %bb.0:
+; SKX-NEXT:    #APP
+; SKX-NEXT:    shldq %cl, %rsi, %rdi # sched: [6:1.00]
+; SKX-NEXT:    shrdq %cl, %rsi, %rdi # sched: [6:1.00]
+; SKX-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [11:1.00]
+; SKX-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [11:1.00]
+; SKX-NEXT:    shldq $7, %rsi, %rdi # sched: [3:1.00]
+; SKX-NEXT:    shrdq $7, %rsi, %rdi # sched: [3:1.00]
+; SKX-NEXT:    shldq $7, %rsi, (%rdx) # sched: [9:1.00]
+; SKX-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [9:1.00]
+; SKX-NEXT:    #NO_APP
+; SKX-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_shld_shrd_64:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    #APP
+; BTVER2-NEXT:    shldq %cl, %rsi, %rdi # sched: [4:4.00]
+; BTVER2-NEXT:    shrdq %cl, %rsi, %rdi # sched: [4:4.00]
+; BTVER2-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [9:11.00]
+; BTVER2-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [9:11.00]
+; BTVER2-NEXT:    shldq $7, %rsi, %rdi # sched: [3:3.00]
+; BTVER2-NEXT:    shrdq $7, %rsi, %rdi # sched: [3:3.00]
+; BTVER2-NEXT:    shldq $7, %rsi, (%rdx) # sched: [9:11.00]
+; BTVER2-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [9:11.00]
+; BTVER2-NEXT:    #NO_APP
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_shld_shrd_64:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    #APP
+; ZNVER1-NEXT:    shldq %cl, %rsi, %rdi # sched: [100:?]
+; ZNVER1-NEXT:    shrdq %cl, %rsi, %rdi # sched: [100:?]
+; ZNVER1-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [100:?]
+; ZNVER1-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [100:?]
+; ZNVER1-NEXT:    shldq $7, %rsi, %rdi # sched: [1:0.25]
+; ZNVER1-NEXT:    shrdq $7, %rsi, %rdi # sched: [1:0.25]
+; ZNVER1-NEXT:    shldq $7, %rsi, (%rdx) # sched: [5:0.50]
+; ZNVER1-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [5:0.50]
+; ZNVER1-NEXT:    #NO_APP
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  call void asm sideeffect "shld $1, $0 \0A\09 shrd $1, $0 \0A\09 shld $1, $2 \0A\09 shrd $1, $2 \0A\09 shld $3, $1, $0 \0A\09 shrd $3, $1, $0 \0A\09 shld $3, $1, $2 \0A\09 shrd $3, $1, $2", "r,r,*m,i"(i64 %a0, i64 %a1, i64 *%a2, i8 7)
+  ret void
+}
+
+define void @test_stc_std() optsize {
+; GENERIC-LABEL: test_stc_std:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    #APP
+; GENERIC-NEXT:    stc # sched: [1:0.33]
+; GENERIC-NEXT:    std # sched: [1:0.33]
+; GENERIC-NEXT:    #NO_APP
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; ATOM-LABEL: test_stc_std:
+; ATOM:       # %bb.0:
+; ATOM-NEXT:    #APP
+; ATOM-NEXT:    stc # sched: [1:0.50]
+; ATOM-NEXT:    std # sched: [21:10.50]
+; ATOM-NEXT:    #NO_APP
+; ATOM-NEXT:    retq # sched: [79:39.50]
+;
+; SLM-LABEL: test_stc_std:
+; SLM:       # %bb.0:
+; SLM-NEXT:    #APP
+; SLM-NEXT:    stc # sched: [1:0.50]
+; SLM-NEXT:    std # sched: [1:0.50]
+; SLM-NEXT:    #NO_APP
+; SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_stc_std:
+; SANDY:       # %bb.0:
+; SANDY-NEXT:    #APP
+; SANDY-NEXT:    stc # sched: [1:0.33]
+; SANDY-NEXT:    std # sched: [1:0.33]
+; SANDY-NEXT:    #NO_APP
+; SANDY-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_stc_std:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    #APP
+; HASWELL-NEXT:    stc # sched: [1:0.25]
+; HASWELL-NEXT:    std # sched: [6:1.50]
+; HASWELL-NEXT:    #NO_APP
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_stc_std:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    #APP
+; BROADWELL-NEXT:    stc # sched: [1:0.25]
+; BROADWELL-NEXT:    std # sched: [6:1.50]
+; BROADWELL-NEXT:    #NO_APP
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_stc_std:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    #APP
+; SKYLAKE-NEXT:    stc # sched: [1:0.25]
+; SKYLAKE-NEXT:    std # sched: [6:1.50]
+; SKYLAKE-NEXT:    #NO_APP
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; SKX-LABEL: test_stc_std:
+; SKX:       # %bb.0:
+; SKX-NEXT:    #APP
+; SKX-NEXT:    stc # sched: [1:0.25]
+; SKX-NEXT:    std # sched: [6:1.50]
+; SKX-NEXT:    #NO_APP
+; SKX-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_stc_std:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    #APP
+; BTVER2-NEXT:    stc # sched: [1:0.50]
+; BTVER2-NEXT:    std # sched: [1:0.50]
+; BTVER2-NEXT:    #NO_APP
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_stc_std:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    #APP
+; ZNVER1-NEXT:    stc # sched: [1:0.25]
+; ZNVER1-NEXT:    std # sched: [1:0.25]
+; ZNVER1-NEXT:    #NO_APP
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  call void asm sideeffect "stc \0A\09 std", ""()
+  ret void
+}
+
+define void @test_stos() optsize {
+; GENERIC-LABEL: test_stos:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    #APP
+; GENERIC-NEXT:    stosb %al, %es:(%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    stosw %ax, %es:(%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    stosl %eax, %es:(%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    stosq %rax, %es:(%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    #NO_APP
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; ATOM-LABEL: test_stos:
+; ATOM:       # %bb.0:
+; ATOM-NEXT:    #APP
+; ATOM-NEXT:    stosb %al, %es:(%rdi) # sched: [1:0.50]
+; ATOM-NEXT:    stosw %ax, %es:(%rdi) # sched: [1:0.50]
+; ATOM-NEXT:    stosl %eax, %es:(%rdi) # sched: [1:0.50]
+; ATOM-NEXT:    stosq %rax, %es:(%rdi) # sched: [1:0.50]
+; ATOM-NEXT:    #NO_APP
+; ATOM-NEXT:    retq # sched: [79:39.50]
+;
+; SLM-LABEL: test_stos:
+; SLM:       # %bb.0:
+; SLM-NEXT:    #APP
+; SLM-NEXT:    stosb %al, %es:(%rdi) # sched: [100:1.00]
+; SLM-NEXT:    stosw %ax, %es:(%rdi) # sched: [100:1.00]
+; SLM-NEXT:    stosl %eax, %es:(%rdi) # sched: [100:1.00]
+; SLM-NEXT:    stosq %rax, %es:(%rdi) # sched: [100:1.00]
+; SLM-NEXT:    #NO_APP
+; SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_stos:
+; SANDY:       # %bb.0:
+; SANDY-NEXT:    #APP
+; SANDY-NEXT:    stosb %al, %es:(%rdi) # sched: [5:1.00]
+; SANDY-NEXT:    stosw %ax, %es:(%rdi) # sched: [5:1.00]
+; SANDY-NEXT:    stosl %eax, %es:(%rdi) # sched: [5:1.00]
+; SANDY-NEXT:    stosq %rax, %es:(%rdi) # sched: [5:1.00]
+; SANDY-NEXT:    #NO_APP
+; SANDY-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_stos:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    #APP
+; HASWELL-NEXT:    stosb %al, %es:(%rdi) # sched: [2:1.00]
+; HASWELL-NEXT:    stosw %ax, %es:(%rdi) # sched: [2:1.00]
+; HASWELL-NEXT:    stosl %eax, %es:(%rdi) # sched: [2:1.00]
+; HASWELL-NEXT:    stosq %rax, %es:(%rdi) # sched: [2:1.00]
+; HASWELL-NEXT:    #NO_APP
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_stos:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    #APP
+; BROADWELL-NEXT:    stosb %al, %es:(%rdi) # sched: [2:1.00]
+; BROADWELL-NEXT:    stosw %ax, %es:(%rdi) # sched: [2:1.00]
+; BROADWELL-NEXT:    stosl %eax, %es:(%rdi) # sched: [2:1.00]
+; BROADWELL-NEXT:    stosq %rax, %es:(%rdi) # sched: [2:1.00]
+; BROADWELL-NEXT:    #NO_APP
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_stos:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    #APP
+; SKYLAKE-NEXT:    stosb %al, %es:(%rdi) # sched: [2:1.00]
+; SKYLAKE-NEXT:    stosw %ax, %es:(%rdi) # sched: [2:1.00]
+; SKYLAKE-NEXT:    stosl %eax, %es:(%rdi) # sched: [2:1.00]
+; SKYLAKE-NEXT:    stosq %rax, %es:(%rdi) # sched: [2:1.00]
+; SKYLAKE-NEXT:    #NO_APP
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; SKX-LABEL: test_stos:
+; SKX:       # %bb.0:
+; SKX-NEXT:    #APP
+; SKX-NEXT:    stosb %al, %es:(%rdi) # sched: [2:1.00]
+; SKX-NEXT:    stosw %ax, %es:(%rdi) # sched: [2:1.00]
+; SKX-NEXT:    stosl %eax, %es:(%rdi) # sched: [2:1.00]
+; SKX-NEXT:    stosq %rax, %es:(%rdi) # sched: [2:1.00]
+; SKX-NEXT:    #NO_APP
+; SKX-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_stos:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    #APP
+; BTVER2-NEXT:    stosb %al, %es:(%rdi) # sched: [100:0.17]
+; BTVER2-NEXT:    stosw %ax, %es:(%rdi) # sched: [100:0.17]
+; BTVER2-NEXT:    stosl %eax, %es:(%rdi) # sched: [100:0.17]
+; BTVER2-NEXT:    stosq %rax, %es:(%rdi) # sched: [100:0.17]
+; BTVER2-NEXT:    #NO_APP
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_stos:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    #APP
+; ZNVER1-NEXT:    stosb %al, %es:(%rdi) # sched: [100:?]
+; ZNVER1-NEXT:    stosw %ax, %es:(%rdi) # sched: [100:?]
+; ZNVER1-NEXT:    stosl %eax, %es:(%rdi) # sched: [100:?]
+; ZNVER1-NEXT:    stosq %rax, %es:(%rdi) # sched: [100:?]
+; ZNVER1-NEXT:    #NO_APP
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  call void asm sideeffect "stosb \0A\09 stosw \0A\09 stosl \0A\09 stosq", ""()
+  ret void
+}
+
+define void @test_sub_8(i8 %a0, i8* %a1) optsize {
+; GENERIC-LABEL: test_sub_8:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    #APP
+; GENERIC-NEXT:    subb $7, %al # sched: [1:0.33]
+; GENERIC-NEXT:    subb $7, %dil # sched: [1:0.33]
+; GENERIC-NEXT:    subb $7, (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    subb %dil, %dil # sched: [1:0.33]
+; GENERIC-NEXT:    subb %dil, (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    subb (%rsi), %dil # sched: [6:0.50]
+; GENERIC-NEXT:    #NO_APP
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; ATOM-LABEL: test_sub_8:
+; ATOM:       # %bb.0:
+; ATOM-NEXT:    #APP
+; ATOM-NEXT:    subb $7, %al # sched: [1:0.50]
+; ATOM-NEXT:    subb $7, %dil # sched: [1:0.50]
+; ATOM-NEXT:    subb $7, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    subb %dil, %dil # sched: [1:0.50]
+; ATOM-NEXT:    subb %dil, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    subb (%rsi), %dil # sched: [1:1.00]
+; ATOM-NEXT:    #NO_APP
+; ATOM-NEXT:    retq # sched: [79:39.50]
+;
+; SLM-LABEL: test_sub_8:
+; SLM:       # %bb.0:
+; SLM-NEXT:    #APP
+; SLM-NEXT:    subb $7, %al # sched: [1:0.50]
+; SLM-NEXT:    subb $7, %dil # sched: [1:0.50]
+; SLM-NEXT:    subb $7, (%rsi) # sched: [4:2.00]
+; SLM-NEXT:    subb %dil, %dil # sched: [1:0.50]
+; SLM-NEXT:    subb %dil, (%rsi) # sched: [4:2.00]
+; SLM-NEXT:    subb (%rsi), %dil # sched: [4:1.00]
+; SLM-NEXT:    #NO_APP
+; SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_sub_8:
+; SANDY:       # %bb.0:
+; SANDY-NEXT:    #APP
+; SANDY-NEXT:    subb $7, %al # sched: [1:0.33]
+; SANDY-NEXT:    subb $7, %dil # sched: [1:0.33]
+; SANDY-NEXT:    subb $7, (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    subb %dil, %dil # sched: [1:0.33]
+; SANDY-NEXT:    subb %dil, (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    subb (%rsi), %dil # sched: [6:0.50]
+; SANDY-NEXT:    #NO_APP
+; SANDY-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_sub_8:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    #APP
+; HASWELL-NEXT:    subb $7, %al # sched: [1:0.25]
+; HASWELL-NEXT:    subb $7, %dil # sched: [1:0.25]
+; HASWELL-NEXT:    subb $7, (%rsi) # sched: [7:1.00]
+; HASWELL-NEXT:    subb %dil, %dil # sched: [1:0.25]
+; HASWELL-NEXT:    subb %dil, (%rsi) # sched: [7:1.00]
+; HASWELL-NEXT:    subb (%rsi), %dil # sched: [6:0.50]
+; HASWELL-NEXT:    #NO_APP
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_sub_8:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    #APP
+; BROADWELL-NEXT:    subb $7, %al # sched: [1:0.25]
+; BROADWELL-NEXT:    subb $7, %dil # sched: [1:0.25]
+; BROADWELL-NEXT:    subb $7, (%rsi) # sched: [6:1.00]
+; BROADWELL-NEXT:    subb %dil, %dil # sched: [1:0.25]
+; BROADWELL-NEXT:    subb %dil, (%rsi) # sched: [6:1.00]
+; BROADWELL-NEXT:    subb (%rsi), %dil # sched: [6:0.50]
+; BROADWELL-NEXT:    #NO_APP
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_sub_8:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    #APP
+; SKYLAKE-NEXT:    subb $7, %al # sched: [1:0.25]
+; SKYLAKE-NEXT:    subb $7, %dil # sched: [1:0.25]
+; SKYLAKE-NEXT:    subb $7, (%rsi) # sched: [6:1.00]
+; SKYLAKE-NEXT:    subb %dil, %dil # sched: [1:0.25]
+; SKYLAKE-NEXT:    subb %dil, (%rsi) # sched: [6:1.00]
+; SKYLAKE-NEXT:    subb (%rsi), %dil # sched: [6:0.50]
+; SKYLAKE-NEXT:    #NO_APP
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; SKX-LABEL: test_sub_8:
+; SKX:       # %bb.0:
+; SKX-NEXT:    #APP
+; SKX-NEXT:    subb $7, %al # sched: [1:0.25]
+; SKX-NEXT:    subb $7, %dil # sched: [1:0.25]
+; SKX-NEXT:    subb $7, (%rsi) # sched: [6:1.00]
+; SKX-NEXT:    subb %dil, %dil # sched: [1:0.25]
+; SKX-NEXT:    subb %dil, (%rsi) # sched: [6:1.00]
+; SKX-NEXT:    subb (%rsi), %dil # sched: [6:0.50]
+; SKX-NEXT:    #NO_APP
+; SKX-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_sub_8:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    #APP
+; BTVER2-NEXT:    subb $7, %al # sched: [1:0.50]
+; BTVER2-NEXT:    subb $7, %dil # sched: [1:0.50]
+; BTVER2-NEXT:    subb $7, (%rsi) # sched: [4:1.00]
+; BTVER2-NEXT:    subb %dil, %dil # sched: [1:0.50]
+; BTVER2-NEXT:    subb %dil, (%rsi) # sched: [4:1.00]
+; BTVER2-NEXT:    subb (%rsi), %dil # sched: [4:1.00]
+; BTVER2-NEXT:    #NO_APP
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_sub_8:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    #APP
+; ZNVER1-NEXT:    subb $7, %al # sched: [1:0.25]
+; ZNVER1-NEXT:    subb $7, %dil # sched: [1:0.25]
+; ZNVER1-NEXT:    subb $7, (%rsi) # sched: [5:0.50]
+; ZNVER1-NEXT:    subb %dil, %dil # sched: [1:0.25]
+; ZNVER1-NEXT:    subb %dil, (%rsi) # sched: [5:0.50]
+; ZNVER1-NEXT:    subb (%rsi), %dil # sched: [5:0.50]
+; ZNVER1-NEXT:    #NO_APP
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  tail call void asm "subb $2, %AL \0A\09 subb $2, $0 \0A\09 subb $2, $1 \0A\09 subb $0, $0 \0A\09 subb $0, $1 \0A\09 subb $1, $0", "r,*m,i"(i8 %a0, i8* %a1, i8 7) nounwind
   ret void
 }
-define void @test_shld_shrd_64(i64 %a0, i64 %a1, i64 *%a2) optsize {
-; GENERIC-LABEL: test_shld_shrd_64:
+define void @test_sub_16(i16 %a0, i16* %a1) optsize {
+; GENERIC-LABEL: test_sub_16:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    shldq %cl, %rsi, %rdi # sched: [4:1.50]
-; GENERIC-NEXT:    shrdq %cl, %rsi, %rdi # sched: [4:1.50]
-; GENERIC-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [10:1.50]
-; GENERIC-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [10:1.50]
-; GENERIC-NEXT:    shldq $7, %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT:    shrdq $7, %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT:    shldq $7, %rsi, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [8:1.00]
+; GENERIC-NEXT:    subw $511, %ax # imm = 0x1FF
+; GENERIC-NEXT:    # sched: [1:0.33]
+; GENERIC-NEXT:    subw $511, %di # imm = 0x1FF
+; GENERIC-NEXT:    # sched: [1:0.33]
+; GENERIC-NEXT:    subw $511, (%rsi) # imm = 0x1FF
+; GENERIC-NEXT:    # sched: [7:1.00]
+; GENERIC-NEXT:    subw $7, %di # sched: [1:0.33]
+; GENERIC-NEXT:    subw $7, (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    subw %di, %di # sched: [1:0.33]
+; GENERIC-NEXT:    subw %di, (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    subw (%rsi), %di # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
-; ATOM-LABEL: test_shld_shrd_64:
+; ATOM-LABEL: test_sub_16:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    shldq %cl, %rsi, %rdi # sched: [8:4.00]
-; ATOM-NEXT:    shrdq %cl, %rsi, %rdi # sched: [8:4.00]
-; ATOM-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [9:4.50]
-; ATOM-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [9:4.50]
-; ATOM-NEXT:    shldq $7, %rsi, %rdi # sched: [9:4.50]
-; ATOM-NEXT:    shrdq $7, %rsi, %rdi # sched: [9:4.50]
-; ATOM-NEXT:    shldq $7, %rsi, (%rdx) # sched: [9:4.50]
-; ATOM-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [9:4.50]
+; ATOM-NEXT:    subw $511, %ax # imm = 0x1FF
+; ATOM-NEXT:    # sched: [1:0.50]
+; ATOM-NEXT:    subw $511, %di # imm = 0x1FF
+; ATOM-NEXT:    # sched: [1:0.50]
+; ATOM-NEXT:    subw $511, (%rsi) # imm = 0x1FF
+; ATOM-NEXT:    # sched: [1:1.00]
+; ATOM-NEXT:    subw $7, %di # sched: [1:0.50]
+; ATOM-NEXT:    subw $7, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    subw %di, %di # sched: [1:0.50]
+; ATOM-NEXT:    subw %di, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    subw (%rsi), %di # sched: [1:1.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
-; SLM-LABEL: test_shld_shrd_64:
+; SLM-LABEL: test_sub_16:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    shldq %cl, %rsi, %rdi # sched: [1:1.00]
-; SLM-NEXT:    shrdq %cl, %rsi, %rdi # sched: [1:1.00]
-; SLM-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT:    shldq $7, %rsi, %rdi # sched: [1:1.00]
-; SLM-NEXT:    shrdq $7, %rsi, %rdi # sched: [1:1.00]
-; SLM-NEXT:    shldq $7, %rsi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [4:2.00]
+; SLM-NEXT:    subw $511, %ax # imm = 0x1FF
+; SLM-NEXT:    # sched: [1:0.50]
+; SLM-NEXT:    subw $511, %di # imm = 0x1FF
+; SLM-NEXT:    # sched: [1:0.50]
+; SLM-NEXT:    subw $511, (%rsi) # imm = 0x1FF
+; SLM-NEXT:    # sched: [4:2.00]
+; SLM-NEXT:    subw $7, %di # sched: [1:0.50]
+; SLM-NEXT:    subw $7, (%rsi) # sched: [4:2.00]
+; SLM-NEXT:    subw %di, %di # sched: [1:0.50]
+; SLM-NEXT:    subw %di, (%rsi) # sched: [4:2.00]
+; SLM-NEXT:    subw (%rsi), %di # sched: [4:1.00]
 ; SLM-NEXT:    #NO_APP
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
-; SANDY-LABEL: test_shld_shrd_64:
+; SANDY-LABEL: test_sub_16:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    shldq %cl, %rsi, %rdi # sched: [4:1.50]
-; SANDY-NEXT:    shrdq %cl, %rsi, %rdi # sched: [4:1.50]
-; SANDY-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [10:1.50]
-; SANDY-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [10:1.50]
-; SANDY-NEXT:    shldq $7, %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT:    shrdq $7, %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT:    shldq $7, %rsi, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [8:1.00]
+; SANDY-NEXT:    subw $511, %ax # imm = 0x1FF
+; SANDY-NEXT:    # sched: [1:0.33]
+; SANDY-NEXT:    subw $511, %di # imm = 0x1FF
+; SANDY-NEXT:    # sched: [1:0.33]
+; SANDY-NEXT:    subw $511, (%rsi) # imm = 0x1FF
+; SANDY-NEXT:    # sched: [7:1.00]
+; SANDY-NEXT:    subw $7, %di # sched: [1:0.33]
+; SANDY-NEXT:    subw $7, (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    subw %di, %di # sched: [1:0.33]
+; SANDY-NEXT:    subw %di, (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    subw (%rsi), %di # sched: [6:0.50]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
-; HASWELL-LABEL: test_shld_shrd_64:
+; HASWELL-LABEL: test_sub_16:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    shldq %cl, %rsi, %rdi # sched: [6:1.00]
-; HASWELL-NEXT:    shrdq %cl, %rsi, %rdi # sched: [6:1.00]
-; HASWELL-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [12:1.00]
-; HASWELL-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [12:1.00]
-; HASWELL-NEXT:    shldq $7, %rsi, %rdi # sched: [3:1.00]
-; HASWELL-NEXT:    shrdq $7, %rsi, %rdi # sched: [3:1.00]
-; HASWELL-NEXT:    shldq $7, %rsi, (%rdx) # sched: [10:1.00]
-; HASWELL-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [10:1.00]
+; HASWELL-NEXT:    subw $511, %ax # imm = 0x1FF
+; HASWELL-NEXT:    # sched: [1:0.25]
+; HASWELL-NEXT:    subw $511, %di # imm = 0x1FF
+; HASWELL-NEXT:    # sched: [1:0.25]
+; HASWELL-NEXT:    subw $511, (%rsi) # imm = 0x1FF
+; HASWELL-NEXT:    # sched: [7:1.00]
+; HASWELL-NEXT:    subw $7, %di # sched: [1:0.25]
+; HASWELL-NEXT:    subw $7, (%rsi) # sched: [7:1.00]
+; HASWELL-NEXT:    subw %di, %di # sched: [1:0.25]
+; HASWELL-NEXT:    subw %di, (%rsi) # sched: [7:1.00]
+; HASWELL-NEXT:    subw (%rsi), %di # sched: [6:0.50]
 ; HASWELL-NEXT:    #NO_APP
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; BROADWELL-LABEL: test_shld_shrd_64:
+; BROADWELL-LABEL: test_sub_16:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    shldq %cl, %rsi, %rdi # sched: [6:1.00]
-; BROADWELL-NEXT:    shrdq %cl, %rsi, %rdi # sched: [6:1.00]
-; BROADWELL-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [11:1.00]
-; BROADWELL-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [11:1.00]
-; BROADWELL-NEXT:    shldq $7, %rsi, %rdi # sched: [3:1.00]
-; BROADWELL-NEXT:    shrdq $7, %rsi, %rdi # sched: [3:1.00]
-; BROADWELL-NEXT:    shldq $7, %rsi, (%rdx) # sched: [9:1.00]
-; BROADWELL-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [9:1.00]
+; BROADWELL-NEXT:    subw $511, %ax # imm = 0x1FF
+; BROADWELL-NEXT:    # sched: [1:0.25]
+; BROADWELL-NEXT:    subw $511, %di # imm = 0x1FF
+; BROADWELL-NEXT:    # sched: [1:0.25]
+; BROADWELL-NEXT:    subw $511, (%rsi) # imm = 0x1FF
+; BROADWELL-NEXT:    # sched: [6:1.00]
+; BROADWELL-NEXT:    subw $7, %di # sched: [1:0.25]
+; BROADWELL-NEXT:    subw $7, (%rsi) # sched: [6:1.00]
+; BROADWELL-NEXT:    subw %di, %di # sched: [1:0.25]
+; BROADWELL-NEXT:    subw %di, (%rsi) # sched: [6:1.00]
+; BROADWELL-NEXT:    subw (%rsi), %di # sched: [6:0.50]
 ; BROADWELL-NEXT:    #NO_APP
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; SKYLAKE-LABEL: test_shld_shrd_64:
+; SKYLAKE-LABEL: test_sub_16:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    shldq %cl, %rsi, %rdi # sched: [6:1.00]
-; SKYLAKE-NEXT:    shrdq %cl, %rsi, %rdi # sched: [6:1.00]
-; SKYLAKE-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [11:1.00]
-; SKYLAKE-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [11:1.00]
-; SKYLAKE-NEXT:    shldq $7, %rsi, %rdi # sched: [3:1.00]
-; SKYLAKE-NEXT:    shrdq $7, %rsi, %rdi # sched: [3:1.00]
-; SKYLAKE-NEXT:    shldq $7, %rsi, (%rdx) # sched: [9:1.00]
-; SKYLAKE-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [9:1.00]
+; SKYLAKE-NEXT:    subw $511, %ax # imm = 0x1FF
+; SKYLAKE-NEXT:    # sched: [1:0.25]
+; SKYLAKE-NEXT:    subw $511, %di # imm = 0x1FF
+; SKYLAKE-NEXT:    # sched: [1:0.25]
+; SKYLAKE-NEXT:    subw $511, (%rsi) # imm = 0x1FF
+; SKYLAKE-NEXT:    # sched: [6:1.00]
+; SKYLAKE-NEXT:    subw $7, %di # sched: [1:0.25]
+; SKYLAKE-NEXT:    subw $7, (%rsi) # sched: [6:1.00]
+; SKYLAKE-NEXT:    subw %di, %di # sched: [1:0.25]
+; SKYLAKE-NEXT:    subw %di, (%rsi) # sched: [6:1.00]
+; SKYLAKE-NEXT:    subw (%rsi), %di # sched: [6:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
-; SKX-LABEL: test_shld_shrd_64:
+; SKX-LABEL: test_sub_16:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    shldq %cl, %rsi, %rdi # sched: [6:1.00]
-; SKX-NEXT:    shrdq %cl, %rsi, %rdi # sched: [6:1.00]
-; SKX-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [11:1.00]
-; SKX-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [11:1.00]
-; SKX-NEXT:    shldq $7, %rsi, %rdi # sched: [3:1.00]
-; SKX-NEXT:    shrdq $7, %rsi, %rdi # sched: [3:1.00]
-; SKX-NEXT:    shldq $7, %rsi, (%rdx) # sched: [9:1.00]
-; SKX-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [9:1.00]
+; SKX-NEXT:    subw $511, %ax # imm = 0x1FF
+; SKX-NEXT:    # sched: [1:0.25]
+; SKX-NEXT:    subw $511, %di # imm = 0x1FF
+; SKX-NEXT:    # sched: [1:0.25]
+; SKX-NEXT:    subw $511, (%rsi) # imm = 0x1FF
+; SKX-NEXT:    # sched: [6:1.00]
+; SKX-NEXT:    subw $7, %di # sched: [1:0.25]
+; SKX-NEXT:    subw $7, (%rsi) # sched: [6:1.00]
+; SKX-NEXT:    subw %di, %di # sched: [1:0.25]
+; SKX-NEXT:    subw %di, (%rsi) # sched: [6:1.00]
+; SKX-NEXT:    subw (%rsi), %di # sched: [6:0.50]
 ; SKX-NEXT:    #NO_APP
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
-; BTVER2-LABEL: test_shld_shrd_64:
+; BTVER2-LABEL: test_sub_16:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    shldq %cl, %rsi, %rdi # sched: [4:4.00]
-; BTVER2-NEXT:    shrdq %cl, %rsi, %rdi # sched: [4:4.00]
-; BTVER2-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [9:11.00]
-; BTVER2-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [9:11.00]
-; BTVER2-NEXT:    shldq $7, %rsi, %rdi # sched: [3:3.00]
-; BTVER2-NEXT:    shrdq $7, %rsi, %rdi # sched: [3:3.00]
-; BTVER2-NEXT:    shldq $7, %rsi, (%rdx) # sched: [9:11.00]
-; BTVER2-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [9:11.00]
+; BTVER2-NEXT:    subw $511, %ax # imm = 0x1FF
+; BTVER2-NEXT:    # sched: [1:0.50]
+; BTVER2-NEXT:    subw $511, %di # imm = 0x1FF
+; BTVER2-NEXT:    # sched: [1:0.50]
+; BTVER2-NEXT:    subw $511, (%rsi) # imm = 0x1FF
+; BTVER2-NEXT:    # sched: [4:1.00]
+; BTVER2-NEXT:    subw $7, %di # sched: [1:0.50]
+; BTVER2-NEXT:    subw $7, (%rsi) # sched: [4:1.00]
+; BTVER2-NEXT:    subw %di, %di # sched: [1:0.50]
+; BTVER2-NEXT:    subw %di, (%rsi) # sched: [4:1.00]
+; BTVER2-NEXT:    subw (%rsi), %di # sched: [4:1.00]
 ; BTVER2-NEXT:    #NO_APP
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
-; ZNVER1-LABEL: test_shld_shrd_64:
+; ZNVER1-LABEL: test_sub_16:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    shldq %cl, %rsi, %rdi # sched: [100:?]
-; ZNVER1-NEXT:    shrdq %cl, %rsi, %rdi # sched: [100:?]
-; ZNVER1-NEXT:    shldq %cl, %rsi, (%rdx) # sched: [100:?]
-; ZNVER1-NEXT:    shrdq %cl, %rsi, (%rdx) # sched: [100:?]
-; ZNVER1-NEXT:    shldq $7, %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT:    shrdq $7, %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT:    shldq $7, %rsi, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT:    shrdq $7, %rsi, (%rdx) # sched: [5:0.50]
+; ZNVER1-NEXT:    subw $511, %ax # imm = 0x1FF
+; ZNVER1-NEXT:    # sched: [1:0.25]
+; ZNVER1-NEXT:    subw $511, %di # imm = 0x1FF
+; ZNVER1-NEXT:    # sched: [1:0.25]
+; ZNVER1-NEXT:    subw $511, (%rsi) # imm = 0x1FF
+; ZNVER1-NEXT:    # sched: [5:0.50]
+; ZNVER1-NEXT:    subw $7, %di # sched: [1:0.25]
+; ZNVER1-NEXT:    subw $7, (%rsi) # sched: [5:0.50]
+; ZNVER1-NEXT:    subw %di, %di # sched: [1:0.25]
+; ZNVER1-NEXT:    subw %di, (%rsi) # sched: [5:0.50]
+; ZNVER1-NEXT:    subw (%rsi), %di # sched: [5:0.50]
 ; ZNVER1-NEXT:    #NO_APP
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  call void asm sideeffect "shld $1, $0 \0A\09 shrd $1, $0 \0A\09 shld $1, $2 \0A\09 shrd $1, $2 \0A\09 shld $3, $1, $0 \0A\09 shrd $3, $1, $0 \0A\09 shld $3, $1, $2 \0A\09 shrd $3, $1, $2", "r,r,*m,i"(i64 %a0, i64 %a1, i64 *%a2, i8 7)
+  tail call void asm "subw $2, %AX \0A\09 subw $2, $0 \0A\09 subw $2, $1 \0A\09 subw $3, $0 \0A\09 subw $3, $1 \0A\09 subw $0, $0 \0A\09 subw $0, $1 \0A\09 subw $1, $0", "r,*m,i,i"(i16 %a0, i16* %a1, i16 511, i8 7) nounwind
   ret void
 }
-
-define void @test_stc_std() optsize {
-; GENERIC-LABEL: test_stc_std:
+define void @test_sub_32(i32 %a0, i32* %a1) optsize {
+; GENERIC-LABEL: test_sub_32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    stc # sched: [1:0.33]
-; GENERIC-NEXT:    std # sched: [1:0.33]
+; GENERIC-NEXT:    subl $665536, %eax # imm = 0xA27C0
+; GENERIC-NEXT:    # sched: [1:0.33]
+; GENERIC-NEXT:    subl $665536, %edi # imm = 0xA27C0
+; GENERIC-NEXT:    # sched: [1:0.33]
+; GENERIC-NEXT:    subl $665536, (%rsi) # imm = 0xA27C0
+; GENERIC-NEXT:    # sched: [7:1.00]
+; GENERIC-NEXT:    subl $7, %edi # sched: [1:0.33]
+; GENERIC-NEXT:    subl $7, (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    subl %edi, %edi # sched: [1:0.33]
+; GENERIC-NEXT:    subl %edi, (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    subl (%rsi), %edi # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
-; ATOM-LABEL: test_stc_std:
+; ATOM-LABEL: test_sub_32:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    stc # sched: [1:0.50]
-; ATOM-NEXT:    std # sched: [21:10.50]
+; ATOM-NEXT:    subl $665536, %eax # imm = 0xA27C0
+; ATOM-NEXT:    # sched: [1:0.50]
+; ATOM-NEXT:    subl $665536, %edi # imm = 0xA27C0
+; ATOM-NEXT:    # sched: [1:0.50]
+; ATOM-NEXT:    subl $665536, (%rsi) # imm = 0xA27C0
+; ATOM-NEXT:    # sched: [1:1.00]
+; ATOM-NEXT:    subl $7, %edi # sched: [1:0.50]
+; ATOM-NEXT:    subl $7, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    subl %edi, %edi # sched: [1:0.50]
+; ATOM-NEXT:    subl %edi, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    subl (%rsi), %edi # sched: [1:1.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
-; SLM-LABEL: test_stc_std:
+; SLM-LABEL: test_sub_32:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    stc # sched: [1:0.50]
-; SLM-NEXT:    std # sched: [1:0.50]
+; SLM-NEXT:    subl $665536, %eax # imm = 0xA27C0
+; SLM-NEXT:    # sched: [1:0.50]
+; SLM-NEXT:    subl $665536, %edi # imm = 0xA27C0
+; SLM-NEXT:    # sched: [1:0.50]
+; SLM-NEXT:    subl $665536, (%rsi) # imm = 0xA27C0
+; SLM-NEXT:    # sched: [4:2.00]
+; SLM-NEXT:    subl $7, %edi # sched: [1:0.50]
+; SLM-NEXT:    subl $7, (%rsi) # sched: [4:2.00]
+; SLM-NEXT:    subl %edi, %edi # sched: [1:0.50]
+; SLM-NEXT:    subl %edi, (%rsi) # sched: [4:2.00]
+; SLM-NEXT:    subl (%rsi), %edi # sched: [4:1.00]
 ; SLM-NEXT:    #NO_APP
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
-; SANDY-LABEL: test_stc_std:
+; SANDY-LABEL: test_sub_32:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    stc # sched: [1:0.33]
-; SANDY-NEXT:    std # sched: [1:0.33]
+; SANDY-NEXT:    subl $665536, %eax # imm = 0xA27C0
+; SANDY-NEXT:    # sched: [1:0.33]
+; SANDY-NEXT:    subl $665536, %edi # imm = 0xA27C0
+; SANDY-NEXT:    # sched: [1:0.33]
+; SANDY-NEXT:    subl $665536, (%rsi) # imm = 0xA27C0
+; SANDY-NEXT:    # sched: [7:1.00]
+; SANDY-NEXT:    subl $7, %edi # sched: [1:0.33]
+; SANDY-NEXT:    subl $7, (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    subl %edi, %edi # sched: [1:0.33]
+; SANDY-NEXT:    subl %edi, (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    subl (%rsi), %edi # sched: [6:0.50]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
-; HASWELL-LABEL: test_stc_std:
+; HASWELL-LABEL: test_sub_32:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    stc # sched: [1:0.25]
-; HASWELL-NEXT:    std # sched: [6:1.50]
+; HASWELL-NEXT:    subl $665536, %eax # imm = 0xA27C0
+; HASWELL-NEXT:    # sched: [1:0.25]
+; HASWELL-NEXT:    subl $665536, %edi # imm = 0xA27C0
+; HASWELL-NEXT:    # sched: [1:0.25]
+; HASWELL-NEXT:    subl $665536, (%rsi) # imm = 0xA27C0
+; HASWELL-NEXT:    # sched: [7:1.00]
+; HASWELL-NEXT:    subl $7, %edi # sched: [1:0.25]
+; HASWELL-NEXT:    subl $7, (%rsi) # sched: [7:1.00]
+; HASWELL-NEXT:    subl %edi, %edi # sched: [1:0.25]
+; HASWELL-NEXT:    subl %edi, (%rsi) # sched: [7:1.00]
+; HASWELL-NEXT:    subl (%rsi), %edi # sched: [6:0.50]
 ; HASWELL-NEXT:    #NO_APP
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; BROADWELL-LABEL: test_stc_std:
+; BROADWELL-LABEL: test_sub_32:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    stc # sched: [1:0.25]
-; BROADWELL-NEXT:    std # sched: [6:1.50]
+; BROADWELL-NEXT:    subl $665536, %eax # imm = 0xA27C0
+; BROADWELL-NEXT:    # sched: [1:0.25]
+; BROADWELL-NEXT:    subl $665536, %edi # imm = 0xA27C0
+; BROADWELL-NEXT:    # sched: [1:0.25]
+; BROADWELL-NEXT:    subl $665536, (%rsi) # imm = 0xA27C0
+; BROADWELL-NEXT:    # sched: [6:1.00]
+; BROADWELL-NEXT:    subl $7, %edi # sched: [1:0.25]
+; BROADWELL-NEXT:    subl $7, (%rsi) # sched: [6:1.00]
+; BROADWELL-NEXT:    subl %edi, %edi # sched: [1:0.25]
+; BROADWELL-NEXT:    subl %edi, (%rsi) # sched: [6:1.00]
+; BROADWELL-NEXT:    subl (%rsi), %edi # sched: [6:0.50]
 ; BROADWELL-NEXT:    #NO_APP
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; SKYLAKE-LABEL: test_stc_std:
+; SKYLAKE-LABEL: test_sub_32:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    stc # sched: [1:0.25]
-; SKYLAKE-NEXT:    std # sched: [6:1.50]
+; SKYLAKE-NEXT:    subl $665536, %eax # imm = 0xA27C0
+; SKYLAKE-NEXT:    # sched: [1:0.25]
+; SKYLAKE-NEXT:    subl $665536, %edi # imm = 0xA27C0
+; SKYLAKE-NEXT:    # sched: [1:0.25]
+; SKYLAKE-NEXT:    subl $665536, (%rsi) # imm = 0xA27C0
+; SKYLAKE-NEXT:    # sched: [6:1.00]
+; SKYLAKE-NEXT:    subl $7, %edi # sched: [1:0.25]
+; SKYLAKE-NEXT:    subl $7, (%rsi) # sched: [6:1.00]
+; SKYLAKE-NEXT:    subl %edi, %edi # sched: [1:0.25]
+; SKYLAKE-NEXT:    subl %edi, (%rsi) # sched: [6:1.00]
+; SKYLAKE-NEXT:    subl (%rsi), %edi # sched: [6:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
-; SKX-LABEL: test_stc_std:
+; SKX-LABEL: test_sub_32:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    stc # sched: [1:0.25]
-; SKX-NEXT:    std # sched: [6:1.50]
+; SKX-NEXT:    subl $665536, %eax # imm = 0xA27C0
+; SKX-NEXT:    # sched: [1:0.25]
+; SKX-NEXT:    subl $665536, %edi # imm = 0xA27C0
+; SKX-NEXT:    # sched: [1:0.25]
+; SKX-NEXT:    subl $665536, (%rsi) # imm = 0xA27C0
+; SKX-NEXT:    # sched: [6:1.00]
+; SKX-NEXT:    subl $7, %edi # sched: [1:0.25]
+; SKX-NEXT:    subl $7, (%rsi) # sched: [6:1.00]
+; SKX-NEXT:    subl %edi, %edi # sched: [1:0.25]
+; SKX-NEXT:    subl %edi, (%rsi) # sched: [6:1.00]
+; SKX-NEXT:    subl (%rsi), %edi # sched: [6:0.50]
 ; SKX-NEXT:    #NO_APP
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
-; BTVER2-LABEL: test_stc_std:
+; BTVER2-LABEL: test_sub_32:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    stc # sched: [1:0.50]
-; BTVER2-NEXT:    std # sched: [1:0.50]
+; BTVER2-NEXT:    subl $665536, %eax # imm = 0xA27C0
+; BTVER2-NEXT:    # sched: [1:0.50]
+; BTVER2-NEXT:    subl $665536, %edi # imm = 0xA27C0
+; BTVER2-NEXT:    # sched: [1:0.50]
+; BTVER2-NEXT:    subl $665536, (%rsi) # imm = 0xA27C0
+; BTVER2-NEXT:    # sched: [4:1.00]
+; BTVER2-NEXT:    subl $7, %edi # sched: [1:0.50]
+; BTVER2-NEXT:    subl $7, (%rsi) # sched: [4:1.00]
+; BTVER2-NEXT:    subl %edi, %edi # sched: [1:0.50]
+; BTVER2-NEXT:    subl %edi, (%rsi) # sched: [4:1.00]
+; BTVER2-NEXT:    subl (%rsi), %edi # sched: [4:1.00]
 ; BTVER2-NEXT:    #NO_APP
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
-; ZNVER1-LABEL: test_stc_std:
+; ZNVER1-LABEL: test_sub_32:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    stc # sched: [1:0.25]
-; ZNVER1-NEXT:    std # sched: [1:0.25]
+; ZNVER1-NEXT:    subl $665536, %eax # imm = 0xA27C0
+; ZNVER1-NEXT:    # sched: [1:0.25]
+; ZNVER1-NEXT:    subl $665536, %edi # imm = 0xA27C0
+; ZNVER1-NEXT:    # sched: [1:0.25]
+; ZNVER1-NEXT:    subl $665536, (%rsi) # imm = 0xA27C0
+; ZNVER1-NEXT:    # sched: [5:0.50]
+; ZNVER1-NEXT:    subl $7, %edi # sched: [1:0.25]
+; ZNVER1-NEXT:    subl $7, (%rsi) # sched: [5:0.50]
+; ZNVER1-NEXT:    subl %edi, %edi # sched: [1:0.25]
+; ZNVER1-NEXT:    subl %edi, (%rsi) # sched: [5:0.50]
+; ZNVER1-NEXT:    subl (%rsi), %edi # sched: [5:0.50]
 ; ZNVER1-NEXT:    #NO_APP
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  call void asm sideeffect "stc \0A\09 std", ""()
+  tail call void asm "subl $2, %EAX \0A\09 subl $2, $0 \0A\09 subl $2, $1 \0A\09 subl $3, $0 \0A\09 subl $3, $1 \0A\09 subl $0, $0 \0A\09 subl $0, $1 \0A\09 subl $1, $0", "r,*m,i,i"(i32 %a0, i32* %a1, i32 665536, i8 7) nounwind
   ret void
 }
-
-define void @test_stos() optsize {
-; GENERIC-LABEL: test_stos:
+define void @test_sub_64(i64 %a0, i64* %a1) optsize {
+; GENERIC-LABEL: test_sub_64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    stosb %al, %es:(%rdi) # sched: [5:1.00]
-; GENERIC-NEXT:    stosw %ax, %es:(%rdi) # sched: [5:1.00]
-; GENERIC-NEXT:    stosl %eax, %es:(%rdi) # sched: [5:1.00]
-; GENERIC-NEXT:    stosq %rax, %es:(%rdi) # sched: [5:1.00]
+; GENERIC-NEXT:    subq $665536, %rax # imm = 0xA27C0
+; GENERIC-NEXT:    # sched: [1:0.33]
+; GENERIC-NEXT:    subq $665536, %rdi # imm = 0xA27C0
+; GENERIC-NEXT:    # sched: [1:0.33]
+; GENERIC-NEXT:    subq $665536, (%rsi) # imm = 0xA27C0
+; GENERIC-NEXT:    # sched: [7:1.00]
+; GENERIC-NEXT:    subq $7, %rdi # sched: [1:0.33]
+; GENERIC-NEXT:    subq $7, (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    subq %rdi, %rdi # sched: [1:0.33]
+; GENERIC-NEXT:    subq %rdi, (%rsi) # sched: [7:1.00]
+; GENERIC-NEXT:    subq (%rsi), %rdi # sched: [6:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
-; ATOM-LABEL: test_stos:
+; ATOM-LABEL: test_sub_64:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    stosb %al, %es:(%rdi) # sched: [1:0.50]
-; ATOM-NEXT:    stosw %ax, %es:(%rdi) # sched: [1:0.50]
-; ATOM-NEXT:    stosl %eax, %es:(%rdi) # sched: [1:0.50]
-; ATOM-NEXT:    stosq %rax, %es:(%rdi) # sched: [1:0.50]
+; ATOM-NEXT:    subq $665536, %rax # imm = 0xA27C0
+; ATOM-NEXT:    # sched: [1:0.50]
+; ATOM-NEXT:    subq $665536, %rdi # imm = 0xA27C0
+; ATOM-NEXT:    # sched: [1:0.50]
+; ATOM-NEXT:    subq $665536, (%rsi) # imm = 0xA27C0
+; ATOM-NEXT:    # sched: [1:1.00]
+; ATOM-NEXT:    subq $7, %rdi # sched: [1:0.50]
+; ATOM-NEXT:    subq $7, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    subq %rdi, %rdi # sched: [1:0.50]
+; ATOM-NEXT:    subq %rdi, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    subq (%rsi), %rdi # sched: [1:1.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
-; SLM-LABEL: test_stos:
+; SLM-LABEL: test_sub_64:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    stosb %al, %es:(%rdi) # sched: [100:1.00]
-; SLM-NEXT:    stosw %ax, %es:(%rdi) # sched: [100:1.00]
-; SLM-NEXT:    stosl %eax, %es:(%rdi) # sched: [100:1.00]
-; SLM-NEXT:    stosq %rax, %es:(%rdi) # sched: [100:1.00]
+; SLM-NEXT:    subq $665536, %rax # imm = 0xA27C0
+; SLM-NEXT:    # sched: [1:0.50]
+; SLM-NEXT:    subq $665536, %rdi # imm = 0xA27C0
+; SLM-NEXT:    # sched: [1:0.50]
+; SLM-NEXT:    subq $665536, (%rsi) # imm = 0xA27C0
+; SLM-NEXT:    # sched: [4:2.00]
+; SLM-NEXT:    subq $7, %rdi # sched: [1:0.50]
+; SLM-NEXT:    subq $7, (%rsi) # sched: [4:2.00]
+; SLM-NEXT:    subq %rdi, %rdi # sched: [1:0.50]
+; SLM-NEXT:    subq %rdi, (%rsi) # sched: [4:2.00]
+; SLM-NEXT:    subq (%rsi), %rdi # sched: [4:1.00]
 ; SLM-NEXT:    #NO_APP
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
-; SANDY-LABEL: test_stos:
+; SANDY-LABEL: test_sub_64:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    stosb %al, %es:(%rdi) # sched: [5:1.00]
-; SANDY-NEXT:    stosw %ax, %es:(%rdi) # sched: [5:1.00]
-; SANDY-NEXT:    stosl %eax, %es:(%rdi) # sched: [5:1.00]
-; SANDY-NEXT:    stosq %rax, %es:(%rdi) # sched: [5:1.00]
+; SANDY-NEXT:    subq $665536, %rax # imm = 0xA27C0
+; SANDY-NEXT:    # sched: [1:0.33]
+; SANDY-NEXT:    subq $665536, %rdi # imm = 0xA27C0
+; SANDY-NEXT:    # sched: [1:0.33]
+; SANDY-NEXT:    subq $665536, (%rsi) # imm = 0xA27C0
+; SANDY-NEXT:    # sched: [7:1.00]
+; SANDY-NEXT:    subq $7, %rdi # sched: [1:0.33]
+; SANDY-NEXT:    subq $7, (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    subq %rdi, %rdi # sched: [1:0.33]
+; SANDY-NEXT:    subq %rdi, (%rsi) # sched: [7:1.00]
+; SANDY-NEXT:    subq (%rsi), %rdi # sched: [6:0.50]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
-; HASWELL-LABEL: test_stos:
+; HASWELL-LABEL: test_sub_64:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    #APP
-; HASWELL-NEXT:    stosb %al, %es:(%rdi) # sched: [2:1.00]
-; HASWELL-NEXT:    stosw %ax, %es:(%rdi) # sched: [2:1.00]
-; HASWELL-NEXT:    stosl %eax, %es:(%rdi) # sched: [2:1.00]
-; HASWELL-NEXT:    stosq %rax, %es:(%rdi) # sched: [2:1.00]
+; HASWELL-NEXT:    subq $665536, %rax # imm = 0xA27C0
+; HASWELL-NEXT:    # sched: [1:0.25]
+; HASWELL-NEXT:    subq $665536, %rdi # imm = 0xA27C0
+; HASWELL-NEXT:    # sched: [1:0.25]
+; HASWELL-NEXT:    subq $665536, (%rsi) # imm = 0xA27C0
+; HASWELL-NEXT:    # sched: [7:1.00]
+; HASWELL-NEXT:    subq $7, %rdi # sched: [1:0.25]
+; HASWELL-NEXT:    subq $7, (%rsi) # sched: [7:1.00]
+; HASWELL-NEXT:    subq %rdi, %rdi # sched: [1:0.25]
+; HASWELL-NEXT:    subq %rdi, (%rsi) # sched: [7:1.00]
+; HASWELL-NEXT:    subq (%rsi), %rdi # sched: [6:0.50]
 ; HASWELL-NEXT:    #NO_APP
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; BROADWELL-LABEL: test_stos:
+; BROADWELL-LABEL: test_sub_64:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    stosb %al, %es:(%rdi) # sched: [2:1.00]
-; BROADWELL-NEXT:    stosw %ax, %es:(%rdi) # sched: [2:1.00]
-; BROADWELL-NEXT:    stosl %eax, %es:(%rdi) # sched: [2:1.00]
-; BROADWELL-NEXT:    stosq %rax, %es:(%rdi) # sched: [2:1.00]
+; BROADWELL-NEXT:    subq $665536, %rax # imm = 0xA27C0
+; BROADWELL-NEXT:    # sched: [1:0.25]
+; BROADWELL-NEXT:    subq $665536, %rdi # imm = 0xA27C0
+; BROADWELL-NEXT:    # sched: [1:0.25]
+; BROADWELL-NEXT:    subq $665536, (%rsi) # imm = 0xA27C0
+; BROADWELL-NEXT:    # sched: [6:1.00]
+; BROADWELL-NEXT:    subq $7, %rdi # sched: [1:0.25]
+; BROADWELL-NEXT:    subq $7, (%rsi) # sched: [6:1.00]
+; BROADWELL-NEXT:    subq %rdi, %rdi # sched: [1:0.25]
+; BROADWELL-NEXT:    subq %rdi, (%rsi) # sched: [6:1.00]
+; BROADWELL-NEXT:    subq (%rsi), %rdi # sched: [6:0.50]
 ; BROADWELL-NEXT:    #NO_APP
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
-; SKYLAKE-LABEL: test_stos:
+; SKYLAKE-LABEL: test_sub_64:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    stosb %al, %es:(%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT:    stosw %ax, %es:(%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT:    stosl %eax, %es:(%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT:    stosq %rax, %es:(%rdi) # sched: [2:1.00]
+; SKYLAKE-NEXT:    subq $665536, %rax # imm = 0xA27C0
+; SKYLAKE-NEXT:    # sched: [1:0.25]
+; SKYLAKE-NEXT:    subq $665536, %rdi # imm = 0xA27C0
+; SKYLAKE-NEXT:    # sched: [1:0.25]
+; SKYLAKE-NEXT:    subq $665536, (%rsi) # imm = 0xA27C0
+; SKYLAKE-NEXT:    # sched: [6:1.00]
+; SKYLAKE-NEXT:    subq $7, %rdi # sched: [1:0.25]
+; SKYLAKE-NEXT:    subq $7, (%rsi) # sched: [6:1.00]
+; SKYLAKE-NEXT:    subq %rdi, %rdi # sched: [1:0.25]
+; SKYLAKE-NEXT:    subq %rdi, (%rsi) # sched: [6:1.00]
+; SKYLAKE-NEXT:    subq (%rsi), %rdi # sched: [6:0.50]
 ; SKYLAKE-NEXT:    #NO_APP
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
-; SKX-LABEL: test_stos:
+; SKX-LABEL: test_sub_64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    stosb %al, %es:(%rdi) # sched: [2:1.00]
-; SKX-NEXT:    stosw %ax, %es:(%rdi) # sched: [2:1.00]
-; SKX-NEXT:    stosl %eax, %es:(%rdi) # sched: [2:1.00]
-; SKX-NEXT:    stosq %rax, %es:(%rdi) # sched: [2:1.00]
+; SKX-NEXT:    subq $665536, %rax # imm = 0xA27C0
+; SKX-NEXT:    # sched: [1:0.25]
+; SKX-NEXT:    subq $665536, %rdi # imm = 0xA27C0
+; SKX-NEXT:    # sched: [1:0.25]
+; SKX-NEXT:    subq $665536, (%rsi) # imm = 0xA27C0
+; SKX-NEXT:    # sched: [6:1.00]
+; SKX-NEXT:    subq $7, %rdi # sched: [1:0.25]
+; SKX-NEXT:    subq $7, (%rsi) # sched: [6:1.00]
+; SKX-NEXT:    subq %rdi, %rdi # sched: [1:0.25]
+; SKX-NEXT:    subq %rdi, (%rsi) # sched: [6:1.00]
+; SKX-NEXT:    subq (%rsi), %rdi # sched: [6:0.50]
 ; SKX-NEXT:    #NO_APP
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
-; BTVER2-LABEL: test_stos:
+; BTVER2-LABEL: test_sub_64:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    stosb %al, %es:(%rdi) # sched: [100:0.17]
-; BTVER2-NEXT:    stosw %ax, %es:(%rdi) # sched: [100:0.17]
-; BTVER2-NEXT:    stosl %eax, %es:(%rdi) # sched: [100:0.17]
-; BTVER2-NEXT:    stosq %rax, %es:(%rdi) # sched: [100:0.17]
+; BTVER2-NEXT:    subq $665536, %rax # imm = 0xA27C0
+; BTVER2-NEXT:    # sched: [1:0.50]
+; BTVER2-NEXT:    subq $665536, %rdi # imm = 0xA27C0
+; BTVER2-NEXT:    # sched: [1:0.50]
+; BTVER2-NEXT:    subq $665536, (%rsi) # imm = 0xA27C0
+; BTVER2-NEXT:    # sched: [4:1.00]
+; BTVER2-NEXT:    subq $7, %rdi # sched: [1:0.50]
+; BTVER2-NEXT:    subq $7, (%rsi) # sched: [4:1.00]
+; BTVER2-NEXT:    subq %rdi, %rdi # sched: [1:0.50]
+; BTVER2-NEXT:    subq %rdi, (%rsi) # sched: [4:1.00]
+; BTVER2-NEXT:    subq (%rsi), %rdi # sched: [4:1.00]
 ; BTVER2-NEXT:    #NO_APP
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
-; ZNVER1-LABEL: test_stos:
+; ZNVER1-LABEL: test_sub_64:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    #APP
-; ZNVER1-NEXT:    stosb %al, %es:(%rdi) # sched: [100:?]
-; ZNVER1-NEXT:    stosw %ax, %es:(%rdi) # sched: [100:?]
-; ZNVER1-NEXT:    stosl %eax, %es:(%rdi) # sched: [100:?]
-; ZNVER1-NEXT:    stosq %rax, %es:(%rdi) # sched: [100:?]
+; ZNVER1-NEXT:    subq $665536, %rax # imm = 0xA27C0
+; ZNVER1-NEXT:    # sched: [1:0.25]
+; ZNVER1-NEXT:    subq $665536, %rdi # imm = 0xA27C0
+; ZNVER1-NEXT:    # sched: [1:0.25]
+; ZNVER1-NEXT:    subq $665536, (%rsi) # imm = 0xA27C0
+; ZNVER1-NEXT:    # sched: [5:0.50]
+; ZNVER1-NEXT:    subq $7, %rdi # sched: [1:0.25]
+; ZNVER1-NEXT:    subq $7, (%rsi) # sched: [5:0.50]
+; ZNVER1-NEXT:    subq %rdi, %rdi # sched: [1:0.25]
+; ZNVER1-NEXT:    subq %rdi, (%rsi) # sched: [5:0.50]
+; ZNVER1-NEXT:    subq (%rsi), %rdi # sched: [5:0.50]
 ; ZNVER1-NEXT:    #NO_APP
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  call void asm sideeffect "stosb \0A\09 stosw \0A\09 stosl \0A\09 stosq", ""()
+  tail call void asm "subq $2, %RAX \0A\09 subq $2, $0 \0A\09 subq $2, $1 \0A\09 subq $3, $0 \0A\09 subq $3, $1 \0A\09 subq $0, $0 \0A\09 subq $0, $1 \0A\09 subq $1, $0", "r,*m,i,i"(i64 %a0, i64* %a1, i32 665536, i8 7) nounwind
   ret void
 }
 
-; TODO - test_sub
 ; TODO - test_test
 
 ; TODO: ud0, ud1




More information about the llvm-commits mailing list