Failing to use mp execution
alexandremuzio opened this issue · 4 comments
I am trying to use the MPExecutor but I am getting the following error:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/alferre/anaconda3/envs/mtdev/lib/python3.7/multiprocessing/pool.py", line 121, in worker
result = (True, func(*args, **kwds))
File "/data1/alferre/cc_net/cc_net/execution.py", line 145, in global_fn
return f(*args[1:])
File "/data1/alferre/cc_net/cc_net/mine.py", line 347, in _mine_shard
output=tmp_output if not conf.will_split else None,
File "/data1/alferre/cc_net/cc_net/jsonql.py", line 435, in run_pipes
initargs=(transform,),
File "/home/alferre/anaconda3/envs/mtdev/lib/python3.7/multiprocessing/context.py", line 119, in Pool
context=self.get_context())
File "/home/alferre/anaconda3/envs/mtdev/lib/python3.7/multiprocessing/pool.py", line 176, in __init__
self._repopulate_pool()
File "/home/alferre/anaconda3/envs/mtdev/lib/python3.7/multiprocessing/pool.py", line 241, in _repopulate_pool
w.start()
File "/home/alferre/anaconda3/envs/mtdev/lib/python3.7/multiprocessing/process.py", line 110, in start
'daemonic processes are not allowed to have children'
AssertionError: daemonic processes are not allowed to have children
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/alferre/anaconda3/envs/mtdev/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/home/alferre/anaconda3/envs/mtdev/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/data1/alferre/cc_net/cc_net/__main__.py", line 24, in <module>
main()
File "/data1/alferre/cc_net/cc_net/__main__.py", line 20, in main
func_argparse.parse_and_call(parser)
File "/home/alferre/anaconda3/envs/mtdev/lib/python3.7/site-packages/func_argparse/__init__.py", line 72, in parse_and_call
return command(**parsed_args)
File "/data1/alferre/cc_net/cc_net/mine.py", line 509, in main
regroup(conf)
File "/data1/alferre/cc_net/cc_net/mine.py", line 364, in regroup
mine(conf)
File "/data1/alferre/cc_net/cc_net/mine.py", line 271, in mine
ex(_mine_shard, repeat(conf), hashes_files, *_transpose(missing_outputs))
File "/data1/alferre/cc_net/cc_net/execution.py", line 174, in __call__
global_fn, zip(itertools.repeat(f_name), *args)
File "/home/alferre/anaconda3/envs/mtdev/lib/python3.7/multiprocessing/pool.py", line 748, in next
raise value
AssertionError: daemonic processes are not allowed to have children
I am running the following command
python -m cc_net mine --config /home/alferre/data/cc_net/config/config_alex.json
And this is my config file:
{
"output_dir": "/home/alferre/data/cc_net/data_alex",
"dump": "2019-09",
"num_shards": 1,
"num_segments_per_shard": 1,
"hash_in_mem": 2,
"mine_num_processes": 4,
"lang_whitelist": [
"pt"
],
"execution": "mp",
"target_size": "32M",
"cleanup_after_regroup": false
}
Sorry for the delay, I'm just back from paternity leave. Did you solve your issue in the meantime?
I've never seen such error.
I'll try to reproduce.
Can you share more information about your execution environment ? OS and specific python version ?
Thanks.
I think the error is that you can't have a Multiprocessing process that starts its own pool of process.
In your case that mean you should use "mine_num_processes": 1
if you want several process handling several shards in parallel, or "execution": "debug"
if you want several process working on the same shard.
Can you try those ?
Just type the command export PYTHONOPTIMIZE=1
and run it in the terminal. Then run your python -m cc_net mine --config /home/alferre/data/cc_net/config/config_alex.json
Hi, I got similar error when using execution mp as follows:
Traceback (most recent call last):
File "/usr/lib/python3.7/multiprocessing/pool.py", line 121, in worker
result = (True, func(*args, **kwds))
File "/ssd2/dongzhe/cc_net/cc_net/execution.py", line 145, in global_fn
return f(*args[1:])
File "/ssd2/dongzhe/cc_net/cc_net/mine.py", line 218, in _hashes_shard
file=conf.get_cc_shard(shard),
File "/ssd2/dongzhe/cc_net/cc_net/jsonql.py", line 449, in run_pipes
for res in results:
File "/ssd2/dongzhe/cc_net/cc_net/jsonql.py", line 296, in map
for x in source:
File "/ssd2/dongzhe/cc_net/cc_net/process_wet_file.py", line 199, in __iter__
for doc in parse_warc_file(iter(f), self.min_len):
File "/ssd2/dongzhe/cc_net/cc_net/process_wet_file.py", line 117, in parse_warc_file
for doc in group_by_docs(lines):
File "/ssd2/dongzhe/cc_net/cc_net/process_wet_file.py", line 89, in group_by_docs
for warc in warc_lines:
File "/usr/lib/python3.7/gzip.py", line 300, in read1
return self._buffer.read1(size)
File "/usr/lib/python3.7/_compression.py", line 68, in readinto
data = self.read(len(byte_view))
File "/usr/lib/python3.7/gzip.py", line 493, in read
raise EOFError("Compressed file ended before the "
EOFError: Compressed file ended before the end-of-stream marker was reached
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/ssd2/dongzhe/cc_net/cc_net/__main__.py", line 24, in <module>
main()
File "/ssd2/dongzhe/cc_net/cc_net/__main__.py", line 20, in main
func_argparse.parse_and_call(parser)
File "/home/dongzhe/dz_venv_3.7/lib/python3.7/site-packages/func_argparse/__init__.py", line 72, in parse_and_call
return command(**parsed_args)
File "/ssd2/dongzhe/cc_net/cc_net/mine.py", line 509, in main
regroup(conf)
File "/ssd2/dongzhe/cc_net/cc_net/mine.py", line 364, in regroup
mine(conf)
File "/ssd2/dongzhe/cc_net/cc_net/mine.py", line 257, in mine
hashes_groups = list(jsonql.grouper(hashes(conf), conf.hash_in_mem))
File "/ssd2/dongzhe/cc_net/cc_net/mine.py", line 206, in hashes
ex(_hashes_shard, repeat(conf), *_transpose(missing_outputs))
File "/ssd2/dongzhe/cc_net/cc_net/execution.py", line 174, in __call__
global_fn, zip(itertools.repeat(f_name), *args)
File "/usr/lib/python3.7/multiprocessing/pool.py", line 748, in next
raise value
EOFError: Compressed file ended before the end-of-stream marker was reached
when I ran the command:
python -m cc_net mine --config config/marathon.json
where the marathon.json looks like:
{
"dump": "2019-09",
"num_shards": 1600,
"lang_whitelist": ["en", "ja", "zh"],
"lm_languages": ["en", "ja", "zh"],
"mine_num_processes": 1,
"execution": "mp",
"num_segments_per_shard": -1,
"task_parallelism": 96,
"target_size": "4G"
}
btw, export PYTHONOPTIMIZE=1
was ran in command beforehead.