September 28, 2023
ElleJay
EXTERN printf
EXTERN atoi
SECTION .data
prn: DB `suml: %ld\n\0`
SECTION .text
; param rdi: 64 bit integer
; returns rax: 64 bit integer
suml: MOV rax, rdi
.loop: CMP rdi, 0
JLE .end
SUB rdi, 1
ADD rax, rdi
JMP .loop
.end: RET
GLOBAL main
main: CMP rdi, 2
MOV rax, 1
JNE .end
MOV rdi, [rsi+8]
CALL atoi
MOV rdi, rax
CALL suml
MOV rdi, prn
MOV rsi, rax
MOV rax, 0
CALL printf
MOV rax, 0
.end: RET
SECTION .note.GNU-stack noalloc noexec nowrite progbits
$ ./suml 1
suml: 1
$ ./suml 2
suml: 3
$ ./suml 3
suml: 6
EXTERN printf
EXTERN atoi
SECTION .data
prn: DB `sumr: %ld\n\0`
SECTION .text
sumr: CMP rdi, 0
CMOVLE rax, rdi
JLE .end
MOV rax, rdi
SUB rdi, 1
PUSH rax
CALL sumr
POP rdi
ADD rax, rdi
.end: RET
GLOBAL main
main: CMP rdi, 2
MOV rax, 1
JNE .end
MOV rdi, [rsi+8]
CALL atoi
MOV rdi, rax
CALL sumr
MOV rdi, prn
MOV rsi, rax
MOV rax, 0
CALL printf
MOV rax, 0
.end: RET
SECTION .note.GNU-stack noalloc noexec nowrite progbits
$ ./sumr 1
sumr: 1
$ ./sumr 2
sumr: 3
$ ./sumr 3
sumr: 6
EXTERN printf
EXTERN atoi
SECTION .data
prn: DB `sumtr: %ld\n\0`
SECTION .text
sumtr: MOV rax, rdi
.rec: CMP rdi, 0
JLE .end
SUB rdi, 1
ADD rax, rdi
PUSH rdi
CALL .rec
;JMP .rec
POP rdi
.end: RET
GLOBAL main
main: CMP rdi, 2
MOV rax, 1
JNE .end
MOV rdi, [rsi+8]
CALL atoi
MOV rdi, rax
CALL sumtr
MOV rdi, prn
MOV rsi, rax
MOV rax, 0
CALL printf
MOV rax, 0
.end: RET
SECTION .note.GNU-stack noalloc noexec nowrite progbits
$ ./sumtr 1
sumtr: 1
$ ./sumtr 2
sumtr: 3
$ ./sumtr 3
sumtr: 6
Recursion
sumr: 125000250000
real 0m0.007s
user 0m0.003s
sys 0m0.004s
Tail-Call Recursion
sumtr: 125000250000
real 0m0.007s
user 0m0.004s
sys 0m0.003s
Without PUSH/POP:
sumtr: 125000250000
real 0m0.005s
user 0m0.004s
sys 0m0.001s
Tail Call Eliminated:
sumtr: 125000250000
real 0m0.001s
user 0m0.001s
sys 0m0.000s
Loop
suml: 125000250000
real 0m0.001s
user 0m0.001s
sys 0m0.000s
FoldL
FoldR
foldl: CMP rdx, 0
CMOVLE rax, rsi
; rax: result
JBE .end
;
PUSH rdi
MOV rax, rdi
MOV rdi, rsi
MOV rsi, [rcx]
; rdi: init
; rsi: list[0]
; rax: fn
; rdx: n
; rcx: list
PUSH rdx
PUSH rcx
CALL rax
MOV rsi, rax
POP rcx
POP rdx
POP rdi
;
SUB rdx, 1
ADD rcx, 8
; rdi: fn
; rsi: fn init list[0]
; rdx: n-1
; rcx: list[1:]
CALL foldl
; rax: result
.end: RET
foldl: CMP rdx, 0
CMOVLE rax, rsi
; rax: result
JBE .end
;
PUSH rdi
MOV rax, rdi
MOV rdi, rsi
MOV rsi, [rcx]
; rdi: init
; rsi: list[0]
; rax: fn
; rdx: n
; rcx: list
PUSH rdx
PUSH rcx
CALL rax
MOV rsi, rax
POP rcx
POP rdx
POP rdi
;
SUB rdx, 1
ADD rcx, 8
; rdi: fn
; rsi: fn init list[0]
; rdx: n-1
; rcx: list[1:]
CALL foldl
; rax: result
.end: RET
foldl: CMP rdx, 0
CMOVLE rax, rsi
; rax: result
JBE .end
;
PUSH rdi
MOV rax, rdi
MOV rdi, rsi
MOV rsi, [rcx]
; rdi: init
; rsi: list[0]
; rax: fn
; rdx: n
; rcx: list
PUSH rdx
PUSH rcx
CALL rax
MOV rsi, rax
POP rcx
POP rdx
POP rdi
;
SUB rdx, 1
ADD rcx, 8
; rdi: fn
; rsi: fn init list[0]
; rdx: n-1
; rcx: list[1:]
CALL foldl
; rax: result
.end: RET
foldll: CMP rdx, 0
CMOVLE rax, rsi
; rax: result
JBE .end
MOV r8, rdi
;PUSH rdi
MOV rax, rdi
MOV rdi, rsi
MOV rsi, [rcx]
; rdi: init
; rsi: list[0]
; rax: fn
; rdx: n
; rcx: list
;PUSH rdx
;PUSH rcx
CALL rax
MOV rsi, rax
;POP rcx
;POP rdx
;POP rdi
MOV rdi, r8
SUB rdx, 1
ADD rcx, 8
; rdi: fn
; rsi: fn init list[0]
; rdx: n-1
; rcx: list[1:]
JMP foldll
; rax: result
.end: RET
foldr: 500000
real 0m0.000s
user 0m0.000s
sys 0m0.000s
foldl: 500000
real 0m0.000s
user 0m0.000s
sys 0m0.000s
foldll: 500000
real 0m0.000s
user 0m0.000s
sys 0m0.000s
Record perf(1)
events:
perf record -o meow.syscap ./meow $(seq 1 120000) && \
perf report -s sym,srcline --stdio -i meow.syscap
Radically reduced number of events:
Version | # Event |
---|---|
Fold Right (Recursive) | 11689523 |
Fold Left (Tail Recursive) | 10050866 |
Fold Left (Tail Call Eliminated) | 6905606 |
Overhead | Symbol | Source:Line |
---|---|---|
12.62% | foldr |
foldr.asm:46 |
11.57% | __GI_strtoll |
|
11.42% | foldr |
foldr.asm:45 |
6.91% | __GI_____strtoll_l_internal |
|
6.90% | foldr |
foldr.asm:43 |
6.61% | 0xffffffff9b001917 |
|
6.61% | add |
foldr.asm:23 |
6.25% | main.fill |
foldr.asm:100 |
5.95% | __GI_____strtoll_l_internal |
|
… | … |
Overhead | Symbol | Source:Line |
---|---|---|
28.79% | foldl.end |
foldl.asm:62 |
10.11% | __GI_strtoll |
|
9.92% | __GI_____strtoll_l_internal |
|
9.85% | __GI_____strtoll_l_internal |
|
9.68% | atoi@plt |
|
9.57% | atoi |
|
9.44% | __GI_____strtoll_l_internal |
|
9.44% | __GI_____strtoll_l_internal |
|
3.05% | __GI___tunables_init |
|
… | … |
Overhead | Symbol | Source:Line |
---|---|---|
17.24% | __GI_strtoll |
|
16.52% | __GI_____strtoll_l_internal |
|
15.62% | atoi@plt |
|
15.25% | __GI_____strtoll_l_internal |
|
15.15% | __GI_____strtoll_l_internal |
|
15.04% | __GI_____strtoll_l_internal |
|
4.97% | __GI___tunables_init |
|
0.21% | _start |
|
0.01% | 0xffffffff9b001917 |
What happens to foldl
when it evaluates lazily?