186 lines
3.5 KiB
NASM
186 lines
3.5 KiB
NASM
|
; code optimizations
|
||
|
bits 64
|
||
|
|
||
|
SECTION .data
|
||
|
var_a: dq 0
|
||
|
|
||
|
SECTION .text
|
||
|
global _start
|
||
|
_start:
|
||
|
%include "header.asm.inc"
|
||
|
|
||
|
mov rbx, 3
|
||
|
mov rcx, 2
|
||
|
jmp unswitched
|
||
|
|
||
|
; conditional moves (non-optimized)
|
||
|
cmp rbx, rcx
|
||
|
jae ge2
|
||
|
mov rax, 0
|
||
|
jmp end2
|
||
|
ge2:
|
||
|
mov rax, 1
|
||
|
end2:
|
||
|
; conditional moves (optimized)
|
||
|
mov rax, 0
|
||
|
mov rdx, 1
|
||
|
cmp rbx, rcx
|
||
|
cmovae rax, rdx
|
||
|
|
||
|
; tertiary operator (exercise)
|
||
|
; rax = (rbx >= rcx) ? 37 : 13;
|
||
|
xor rax, rax
|
||
|
cmp rbx, rcx
|
||
|
sbb rax, rax
|
||
|
and rax, -24
|
||
|
add rax, 37
|
||
|
|
||
|
; tertiary operator (non-optimized)
|
||
|
; rax = (rbx >= rcx) ? 1 : 0;
|
||
|
cmp rbx, rcx
|
||
|
jae ge1
|
||
|
mov rax, 0
|
||
|
jmp end
|
||
|
ge1:
|
||
|
mov rax, 1
|
||
|
end:
|
||
|
; tertiary operator (optimized)
|
||
|
cmp rbx, rcx
|
||
|
sbb rax, rax
|
||
|
inc rax
|
||
|
|
||
|
; constant folding (non-optimized)
|
||
|
mov rax, 4
|
||
|
add rax, 1
|
||
|
; constant folding (optimized)
|
||
|
mov rax, 5
|
||
|
|
||
|
; common subexpression elimination (non-optimized)
|
||
|
xor rcx, rcx
|
||
|
mov rbx, 8
|
||
|
; first multiplication
|
||
|
mov rax, 4
|
||
|
mul rbx
|
||
|
add rcx, rax
|
||
|
; second multiplication
|
||
|
mov rax, 4
|
||
|
mul rbx
|
||
|
add rcx, rax
|
||
|
; third multiplication
|
||
|
mov rax, 4
|
||
|
mul rbx
|
||
|
add rcx, rax
|
||
|
; common subexpression elimination (optimized)
|
||
|
xor rcx, rcx
|
||
|
mov rbx, 8
|
||
|
mov rax, 4
|
||
|
mul rbx
|
||
|
add rcx, rax
|
||
|
add rcx, rax
|
||
|
add rcx, rax
|
||
|
|
||
|
; register uses (non-optimized)
|
||
|
mov rax, 0
|
||
|
mov [var_a], rax
|
||
|
mov rax, 1
|
||
|
mov [var_a], rax
|
||
|
mov rax, 2
|
||
|
cmp [var_a], rax
|
||
|
je set_a_1
|
||
|
jmp dont_set_a_1
|
||
|
set_a_1:
|
||
|
mov rax, 1
|
||
|
mov [var_a], rax
|
||
|
dont_set_a_1:
|
||
|
; register uses (optimized)
|
||
|
mov rax, 0
|
||
|
mov rax, 1
|
||
|
cmp rax, 2
|
||
|
je set_a_2
|
||
|
jmp dont_set_a_2
|
||
|
set_a_2:
|
||
|
mov rax, 1
|
||
|
dont_set_a_2:
|
||
|
|
||
|
; faster assembly equivalents (non-optimized)
|
||
|
mov rax, 0
|
||
|
mov rbx, 8
|
||
|
mul rbx
|
||
|
mov rbx, 3
|
||
|
mul rbx
|
||
|
; faster assembly equivalents (optimized)
|
||
|
xor rax, rax ; = 0
|
||
|
shl rax, 3 ; *= 8
|
||
|
add rax, rax ; += a
|
||
|
add rax, rax ; += a (a *= 3)
|
||
|
|
||
|
; loop inversion (non-optimized)
|
||
|
xor rax, rax
|
||
|
repeat1:
|
||
|
cmp rax, 1000
|
||
|
jae endloop1
|
||
|
; do something
|
||
|
inc rax
|
||
|
jmp repeat1
|
||
|
endloop1:
|
||
|
; loop inversion (optimized)
|
||
|
xor rax, rax
|
||
|
cmp rax, 1000
|
||
|
jae endloop2
|
||
|
repeat2:
|
||
|
inc rax
|
||
|
; do something
|
||
|
cmp rax, 1000
|
||
|
jb repeat2
|
||
|
endloop2:
|
||
|
|
||
|
; loop unswitching
|
||
|
unswitch:
|
||
|
xor rax, rax
|
||
|
xor rdx, rdx
|
||
|
mov rcx, 3
|
||
|
mov rbx, 1
|
||
|
iter3:
|
||
|
cmp rax, rcx
|
||
|
jae endl3
|
||
|
test rbx, rbx ; if
|
||
|
jnz do_else
|
||
|
add rdx, 1 ; b==0
|
||
|
inc rax
|
||
|
jmp finif
|
||
|
do_else:
|
||
|
add rdx, 2 ; b!=1
|
||
|
inc rax
|
||
|
finif:
|
||
|
jmp iter3
|
||
|
endl3:
|
||
|
|
||
|
nop
|
||
|
nop
|
||
|
nop
|
||
|
nop
|
||
|
|
||
|
; unswitched loop
|
||
|
unswitched:
|
||
|
xor rax, rax
|
||
|
xor rdx, rdx
|
||
|
mov rcx, 3
|
||
|
mov rbx, 0
|
||
|
test rbx, rbx
|
||
|
jnz iterB
|
||
|
iterA:
|
||
|
cmp rax, rcx
|
||
|
jae finuns
|
||
|
add rdx, 1
|
||
|
inc rax
|
||
|
jmp iterA
|
||
|
iterB:
|
||
|
cmp rax, rcx
|
||
|
jae finuns
|
||
|
add rdx, 2
|
||
|
inc rax
|
||
|
jmp iterB
|
||
|
finuns:
|
||
|
|
||
|
%include "sysexit.asm.inc"
|