# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -bottleneck-analysis < %s | FileCheck %s .LBB0_4: vmovups (%rsi,%rax,2), %xmm0 vpermilps $255, %xmm0, %xmm7 vmulps -24(%rsp), %xmm7, %xmm8 vpermilps $170, %xmm0, %xmm6 vpermilps $85, %xmm0, %xmm5 vbroadcastss %xmm0, %xmm0 vfmadd231ps %xmm9, %xmm6, %xmm8 vfmadd213ps %xmm8, %xmm10, %xmm5 vfmadd213ps %xmm5, %xmm11, %xmm0 vfmadd213ps %xmm0, %xmm12, %xmm4 vfmadd213ps %xmm4, %xmm13, %xmm1 vmovaps %xmm7, %xmm4 vfmadd213ps %xmm1, %xmm14, %xmm2 vmovaps %xmm6, %xmm1 vfmadd213ps %xmm2, %xmm15, %xmm3 vpermilps $170, %xmm3, %xmm0 vmovups %xmm3, (%rdx,%rax) vpermilps $255, %xmm3, %xmm2 addq $16, %rax decl %ecx vmovaps %xmm0, %xmm3 jne .LBB0_4 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 2200 # CHECK-NEXT: Total Cycles: 1039 # CHECK-NEXT: Total uOps: 2400 # CHECK: Dispatch Width: 6 # CHECK-NEXT: uOps Per Cycle: 2.31 # CHECK-NEXT: IPC: 2.12 # CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Cycles with backend pressure increase [ 92.69% ] # CHECK-NEXT: Throughput Bottlenecks: # CHECK-NEXT: Resource Pressure [ 46.78% ] # CHECK-NEXT: - SKLPort0 [ 14.24% ] # CHECK-NEXT: - SKLPort1 [ 14.24% ] # CHECK-NEXT: - SKLPort5 [ 46.49% ] # CHECK-NEXT: - SKLPort6 [ 8.66% ] # CHECK-NEXT: Data Dependencies: [ 64.97% ] # CHECK-NEXT: - Register Dependencies [ 64.97% ] # CHECK-NEXT: - Memory Dependencies [ 0.00% ] # CHECK: Critical sequence based on the simulation: # CHECK: Instruction Dependency Information # CHECK-NEXT: +----< 18. addq $16, %rax # CHECK-NEXT: | # CHECK-NEXT: | < loop carried > # CHECK-NEXT: | # CHECK-NEXT: +----> 0. vmovups (%rsi,%rax,2), %xmm0 ## REGISTER dependency: %rax # CHECK-NEXT: | 1. vpermilps $255, %xmm0, %xmm7 # CHECK-NEXT: | 2. vmulps -24(%rsp), %xmm7, %xmm8 # CHECK-NEXT: +----> 3. vpermilps $170, %xmm0, %xmm6 ## REGISTER dependency: %xmm0 # CHECK-NEXT: | 4. vpermilps $85, %xmm0, %xmm5 # CHECK-NEXT: | 5. vbroadcastss %xmm0, %xmm0 # CHECK-NEXT: +----> 6. vfmadd231ps %xmm9, %xmm6, %xmm8 ## REGISTER dependency: %xmm6 # CHECK-NEXT: +----> 7. vfmadd213ps %xmm8, %xmm10, %xmm5 ## REGISTER dependency: %xmm8 # CHECK-NEXT: +----> 8. vfmadd213ps %xmm5, %xmm11, %xmm0 ## REGISTER dependency: %xmm5 # CHECK-NEXT: +----> 9. vfmadd213ps %xmm0, %xmm12, %xmm4 ## REGISTER dependency: %xmm0 # CHECK-NEXT: +----> 10. vfmadd213ps %xmm4, %xmm13, %xmm1 ## REGISTER dependency: %xmm4 # CHECK-NEXT: | 11. vmovaps %xmm7, %xmm4 # CHECK-NEXT: +----> 12. vfmadd213ps %xmm1, %xmm14, %xmm2 ## REGISTER dependency: %xmm1 # CHECK-NEXT: | 13. vmovaps %xmm6, %xmm1 # CHECK-NEXT: +----> 14. vfmadd213ps %xmm2, %xmm15, %xmm3 ## REGISTER dependency: %xmm2 # CHECK-NEXT: +----> 15. vpermilps $170, %xmm3, %xmm0 ## REGISTER dependency: %xmm3 # CHECK-NEXT: | 16. vmovups %xmm3, (%rdx,%rax) # CHECK-NEXT: | 17. vpermilps $255, %xmm3, %xmm2 # CHECK-NEXT: | 18. addq $16, %rax # CHECK-NEXT: | 19. decl %ecx # CHECK-NEXT: +----> 20. vmovaps %xmm0, %xmm3 ## REGISTER dependency: %xmm0 # CHECK-NEXT: 21. jne .LBB0_4 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps # CHECK-NEXT: [2]: Latency # CHECK-NEXT: [3]: RThroughput # CHECK-NEXT: [4]: MayLoad # CHECK-NEXT: [5]: MayStore # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 6 0.50 * vmovups (%rsi,%rax,2), %xmm0 # CHECK-NEXT: 1 1 1.00 vpermilps $255, %xmm0, %xmm7 # CHECK-NEXT: 2 10 0.50 * vmulps -24(%rsp), %xmm7, %xmm8 # CHECK-NEXT: 1 1 1.00 vpermilps $170, %xmm0, %xmm6 # CHECK-NEXT: 1 1 1.00 vpermilps $85, %xmm0, %xmm5 # CHECK-NEXT: 1 1 1.00 vbroadcastss %xmm0, %xmm0 # CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm9, %xmm6, %xmm8 # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm8, %xmm10, %xmm5 # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm5, %xmm11, %xmm0 # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm0, %xmm12, %xmm4 # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm4, %xmm13, %xmm1 # CHECK-NEXT: 1 1 0.33 vmovaps %xmm7, %xmm4 # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm1, %xmm14, %xmm2 # CHECK-NEXT: 1 1 0.33 vmovaps %xmm6, %xmm1 # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm2, %xmm15, %xmm3 # CHECK-NEXT: 1 1 1.00 vpermilps $170, %xmm3, %xmm0 # CHECK-NEXT: 2 1 1.00 * vmovups %xmm3, (%rdx,%rax) # CHECK-NEXT: 1 1 1.00 vpermilps $255, %xmm3, %xmm2 # CHECK-NEXT: 1 1 0.25 addq $16, %rax # CHECK-NEXT: 1 1 0.25 decl %ecx # CHECK-NEXT: 1 1 0.33 vmovaps %xmm0, %xmm3 # CHECK-NEXT: 1 1 0.50 jne .LBB0_4 # CHECK: Resources: # CHECK-NEXT: [0] - SKLDivider # CHECK-NEXT: [1] - SKLFPDivider # CHECK-NEXT: [2] - SKLPort0 # CHECK-NEXT: [3] - SKLPort1 # CHECK-NEXT: [4] - SKLPort2 # CHECK-NEXT: [5] - SKLPort3 # CHECK-NEXT: [6] - SKLPort4 # CHECK-NEXT: [7] - SKLPort5 # CHECK-NEXT: [8] - SKLPort6 # CHECK-NEXT: [9] - SKLPort7 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] # CHECK-NEXT: - - 5.52 5.53 1.01 1.03 1.00 6.02 2.93 0.96 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: # CHECK-NEXT: - - - - 0.04 0.96 - - - - vmovups (%rsi,%rax,2), %xmm0 # CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $255, %xmm0, %xmm7 # CHECK-NEXT: - - 0.03 0.97 0.96 0.04 - - - - vmulps -24(%rsp), %xmm7, %xmm8 # CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $170, %xmm0, %xmm6 # CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $85, %xmm0, %xmm5 # CHECK-NEXT: - - - - - - - 1.00 - - vbroadcastss %xmm0, %xmm0 # CHECK-NEXT: - - 0.95 0.05 - - - - - - vfmadd231ps %xmm9, %xmm6, %xmm8 # CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %xmm8, %xmm10, %xmm5 # CHECK-NEXT: - - 0.92 0.08 - - - - - - vfmadd213ps %xmm5, %xmm11, %xmm0 # CHECK-NEXT: - - 0.95 0.05 - - - - - - vfmadd213ps %xmm0, %xmm12, %xmm4 # CHECK-NEXT: - - 0.51 0.49 - - - - - - vfmadd213ps %xmm4, %xmm13, %xmm1 # CHECK-NEXT: - - 0.52 0.48 - - - - - - vmovaps %xmm7, %xmm4 # CHECK-NEXT: - - 0.49 0.51 - - - - - - vfmadd213ps %xmm1, %xmm14, %xmm2 # CHECK-NEXT: - - 0.04 0.95 - - - 0.01 - - vmovaps %xmm6, %xmm1 # CHECK-NEXT: - - 0.51 0.49 - - - - - - vfmadd213ps %xmm2, %xmm15, %xmm3 # CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $170, %xmm3, %xmm0 # CHECK-NEXT: - - - - 0.01 0.03 1.00 - - 0.96 vmovups %xmm3, (%rdx,%rax) # CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $255, %xmm3, %xmm2 # CHECK-NEXT: - - - - - - - - 1.00 - addq $16, %rax # CHECK-NEXT: - - 0.04 0.01 - - - 0.01 0.94 - decl %ecx # CHECK-NEXT: - - 0.05 0.95 - - - - - - vmovaps %xmm0, %xmm3 # CHECK-NEXT: - - 0.01 - - - - - 0.99 - jne .LBB0_4