Use switch tables instead of long strings of cmp/je combos
Opened this issue · 2 comments
Instead of doing stuff like:
os_system_config:
cmp rdx, 0
je os_system_config_timecounter
cmp rdx, 1
je os_system_config_argc
cmp rdx, 2
je os_system_config_argv
cmp rdx, 3
je os_system_config_networkcallback_get
cmp rdx, 4
je os_system_config_networkcallback_set
cmp rdx, 5
je os_system_config_clockcallback_get
cmp rdx, 6
je os_system_config_clockcallback_set
cmp rdx, 20
je os_system_config_video_base
cmp rdx, 21
je os_system_config_video_x
cmp rdx, 22
je os_system_config_video_y
cmp rdx, 23
je os_system_config_video_bpp
cmp rdx, 30
je os_system_config_mac
ret
You could do:
os_system_functions dq os_system_config_timecounter, os_system_config_argc, os_system_config_argv, os_system_config_networkcallback_get, os_system_config_networkcallback_set, os_system_config_clockcallback_get, os_system_config_clockcallback_get, os_system_config_clockcallback_set, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_video_x, os_system_config_video_y, os_system_config_video_bpp, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_mac
os_system_config:
jmp qword [os_system_functions+rdx*4]
os_system_config_no_function:
ret
And I believe it would be much faster, I also believe this is what GCC does (or at least, something similar) for switch statements.
i got curious on this question of multiple cmps vs lookup table.
i found that cmps win for rdx<7 and then lookup win for the rest. funny -
i remember back in school, a lecturer said it's faster to do linear search
on n<=7 - i wonder if it's still true.
here's the gas code i used on linux - you can call lookup() and cmps()
$ cat j.s
.intel_syntax noprefix
.globl cmps,lookup
.data
a:;b:;c:;d:;e:;f:;g:;h:;i:;j:;k:;l:;
ret
cmps:
cmp rdi,0
je a
cmp rdi,1
je b
cmp rdi,2
je c
cmp rdi,3
je d
cmp rdi,4
je e
cmp rdi,5
je f
cmp rdi,6
je g
cmp rdi,7
je h
cmp rdi,22
je i
cmp rdi,23
je j
cmp rdi,24
je k
cmp rdi,31
je l
ret
.data;.align 8
funs:
.quad a,b,c,d,e,f,g,h,z,z,z,z,z,z,z,z,z,z,z,z,z,z,i,j,k,z,z,z,z,z,z,l,z
lookup:
jmp qword [funs+rdi*8]
z:
ret
On 7 October 2016 at 20:44, Faissal Bensefia notifications@github.com
wrote:
Instead of doing stuff like:
os_system_config:
cmp rdx, 0
je os_system_config_timecounter
cmp rdx, 1
je os_system_config_argc
cmp rdx, 2
je os_system_config_argv
cmp rdx, 3
je os_system_config_networkcallback_get
cmp rdx, 4
je os_system_config_networkcallback_set
cmp rdx, 5
je os_system_config_clockcallback_get
cmp rdx, 6
je os_system_config_clockcallback_set
cmp rdx, 20
je os_system_config_video_base
cmp rdx, 21
je os_system_config_video_x
cmp rdx, 22
je os_system_config_video_y
cmp rdx, 23
je os_system_config_video_bpp
cmp rdx, 30
je os_system_config_mac
retYou could do:
os_system_functions dq os_system_config_timecounter, os_system_config_argc, os_system_config_argv, os_system_config_networkcallback_get, os_system_config_networkcallback_set, os_system_config_clockcallback_get, os_system_config_clockcallback_get, os_system_config_clockcallback_set, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_video_x, os_system_config_video_y, os_system_config_video_bpp, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_no_function, os_system_config_mac
os_system_config:
jmp qword [os_system_functions+rdx*4]
os_system_config_no_function:
retAnd I believe it would be much faster, I also believe this is what GCC
does (or at least, something similar) for switch statements.—
You are receiving this because you are subscribed to this thread.
Reply to this email directly, view it on GitHub
ReturnInfinity/BareMetal-OS#117, or mute the
thread
https://github.com/notifications/unsubscribe-auth/AAn-H3ioEg_aDGf2VmRNt1Oy2zm_YLdBks5qxhSKgaJpZM4KQ2f6
.
Awesome idea!! I'll give it a test on my end.