Expensive setup code repeated too often
sweeneyde opened this issue · 2 comments
When I run the benchmark below, I notice that most of the time is spent in the zipf_string()
function, I think re-creating the relevant strings in each and every spawned process. Putting a print()
inside zipf_string()
prints continuously. This is significant, especially when running 20*16*5==1600
benchmarks. Is there a recommended way to cache expensively-generated parameters so that each process does not have to create them from scratch?
I am also generally confused about the multiprocessing model of pyperf: I would expect zipf_string to be called exactly twice (not hundreds of times) per benchmark. I couldn't find anything in the documentation about this. Am I missing something? I am running Windows 10.
from random import Random
import pyperf
# shuffled alphabet
alphabet = 'DHUXYEZQCLFKISBVRGNAMWPTOJ'
zipf = [1/x for x in range(1, 1+len(alphabet))]
def zipf_string(length, seed):
letters = Random(seed).choices(alphabet, weights=zipf, k=length)
return ''.join(letters)
needle_lengths = [
2, 3, 4, 6,
8, 12, 16, 24,
32, 48, 64, 96,
128, 192, 256, 384,
512, 768, 1024, 1536,
]
haystack_lengths = [
500, 750, 1000, 1500,
2000, 3000, 4000, 6000,
8000, 12000, 16000, 24000,
32000, 48000, 64000, 96000,
]
def bench(loops, needle, haystack):
range_it = range(loops)
t0 = pyperf.perf_counter()
for _ in range_it:
haystack.count(needle)
haystack.count(needle)
haystack.count(needle)
haystack.count(needle)
haystack.count(needle)
haystack.count(needle)
haystack.count(needle)
haystack.count(needle)
haystack.count(needle)
haystack.count(needle)
return pyperf.perf_counter() - t0
def do_timings():
import pyperf
runner = pyperf.Runner()
for m in needle_lengths:
for n in haystack_lengths:
if n < m:
continue
for s in (1, 2, 3, 4, 5):
seed = (s*n + m) % 1_000_003
needle = zipf_string(m, seed)
haystack = zipf_string(n, seed ** 2)
runner.bench_time_func(
f"needle={m}, haystack={n}, seed={s}",
bench, needle, haystack,
inner_loops=10,
)
if __name__ == "__main__":
do_timings()
For what it's worth, I wound up using two different programs, one to generate the benchmarks, and one to run them:
...
def generate_benchmarks():
output = []
for m in needle_lengths:
for n in haystack_lengths:
if n < m:
continue
for s in (1, 2, 3):
seed = (s*n + m) % 1_000_003
needle = zipf_string(m, seed)
haystack = zipf_string(n, seed ** 2)
name = f"needle={m}, haystack={n}, seed={s}"
output.append((name, needle, haystack))
with open("_generated.py", 'w') as f:
print("benches = [", file=f)
for name, needle, haystack in output:
print(f" {(name, needle, haystack)!r},", file=f)
print("]", file=f)
...
def do_timings():
import pyperf
runner = pyperf.Runner()
from _generated import benches
for name, needle, haystack in benches:
runner.bench_time_func(
name,
bench, needle, haystack,
inner_loops=10,
)
if __name__ == "__main__":
# generate_benchmarks()
do_timings()