BorrowMutError: 'already borrowed' while executing CompactFheBool operations within a rayon parallel iterator
0xalexbel opened this issue · 6 comments
Unable to execute CompactFheBool operations within a rayon iterator
A BorrowMutError is randomly raised when executing a CompactFheBool operation inside a rayon parallel iterator. The bug arises when trying to access the local thread RefCell that encapsulates the ShortEngine. The bug arises in Release AND Debug.
The problem does not occur with FheBool or CompressedFheBool.
To Reproduce
- Copy/paste the code below
- Adjust the number of iterations in the main loop (100 is the default, may not be enough)
- Use any of the following Cargo.toml files below
The main.rs file
use tfhe::set_server_key;
use tfhe::{
prelude::FheEncrypt, CompactFheBool, CompactPublicKey,
};
use rayon::iter::IntoParallelRefIterator;
use rayon::iter::ParallelIterator;
fn main() {
// Panic at tfhe-0.5.3/src/shortint/engine/mod.rs:216:63
// already borrowed: BorrowMutError
let config = tfhe::ConfigBuilder::default().build();
let (ck, sk) = tfhe::generate_keys(config);
let pool_sk = sk.clone();
rayon::broadcast(move |_| {
let thread_local_sk = pool_sk.clone();
set_server_key(thread_local_sk);
});
set_server_key(sk);
let compact_public_key = CompactPublicKey::try_new(&ck).unwrap();
// You may have to increase the number of iterations to reach to the problem.
// 100 is enough on my machine (which is not very powerfull)
let v = vec![true; 100];
v.par_iter().for_each(|x| {
let _a_compact_fhe_bool = CompactFheBool::encrypt(*x, &compact_public_key);
});
}
The Cargo.toml debug file
[package]
name = "tfhe_bug"
version = "0.1.0"
edition = "2021"
[dependencies]
tfhe = { version = "0.5.3", features = [ "boolean", "shortint", "integer", "x86_64-unix" ] }
rayon = { version = "1.8.1" }
The Cargo.toml release file
[package]
name = "tfhe_bug"
version = "0.1.0"
edition = "2021"
[dependencies]
tfhe = { version = "0.5.3", features = [ "boolean", "shortint", "integer", "x86_64-unix" ] }
rayon = { version = "1.8.1" }
[profile.dev.package."*"]
opt-level = 3
debug = false
split-debuginfo = '...' # Platform-specific.
strip = "none"
debug-assertions = false
overflow-checks = false
incremental = false
codegen-units = 16
Logs
stack backtrace:
0: rust_begin_unwind
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/std/src/panicking.rs:645:5
1: core::panicking::panic_fmt
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/core/src/panicking.rs:72:14
2: core::cell::panic_already_borrowed
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/core/src/cell.rs:761:5
3: core::cell::RefCell<T>::borrow_mut
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/core/src/cell.rs:1051:25
4: tfhe::shortint::engine::ShortintEngine::with_thread_local_mut::{{closure}}
at /Users/alex/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tfhe-0.5.3/src/shortint/engine/mod.rs:216:51
5: std::thread::local::LocalKey<T>::try_with
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/std/src/thread/local.rs:270:16
6: std::thread::local::LocalKey<T>::with
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/std/src/thread/local.rs:246:9
7: tfhe::shortint::engine::ShortintEngine::with_thread_local_mut
at /Users/alex/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tfhe-0.5.3/src/shortint/engine/mod.rs:216:9
thread '<unnamed>' panicked at /Users/alex/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tfhe-0.5.3/src/shortint/engine/mod.rs:216:63:
already borrowed: BorrowMutError
8: tfhe::shortint::public_key::compact::CompactPublicKey::encrypt_iter
at /Users/alex/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tfhe-0.5.3/src/shortint/public_key/compact.rs:240:13
9: tfhe::integer::public_key::compact::CompactPublicKey::encrypt_radix_compact
at /Users/alex/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tfhe-0.5.3/src/integer/public_key/compact.rs:74:23
10: tfhe::integer::public_key::compact::CompactPublicKey::encrypt_iter_radix_compact
at /Users/alex/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tfhe-0.5.3/src/integer/public_key/compact.rs:100:24
11: tfhe::integer::public_key::compact::CompactPublicKey::encrypt_slice_radix_compact
at /Users/alex/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tfhe-0.5.3/src/integer/public_key/compact.rs:86:9
12: tfhe::high_level_api::keys::inner::IntegerCompactPublicKey::try_encrypt_compact
at /Users/alex/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tfhe-0.5.3/src/high_level_api/keys/inner.rs:228:9
13: <tfhe::high_level_api::booleans::compact::CompactFheBool as tfhe::high_level_api::traits::FheTryEncrypt<bool,tfhe::high_level_api::keys::public::CompactPublicKey>>::try_encrypt
at /Users/alex/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tfhe-0.5.3/src/high_level_api/booleans/compact.rs:59:26
14: <T as tfhe::high_level_api::traits::FheEncrypt<Clear,Key>>::encrypt
at /Users/alex/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tfhe-0.5.3/src/high_level_api/traits.rs:21:9
15: tfhe_bug::main::{{closure}}
at ./src/main.rs:26:35
16: core::ops::function::impls::<impl core::ops::function::FnMut<A> for &F>::call_mut
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/core/src/ops/function.rs:272:13
17: <core::slice::iter::Iter<T> as core::iter::traits::iterator::Iterator>::for_each
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/core/src/slice/iter/macros.rs:254:21
18: <rayon::iter::for_each::ForEachConsumer<F> as rayon::iter::plumbing::Folder<T>>::consume_iter
at /Users/alex/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.9.0/src/iter/for_each.rs:55:9
19: rayon::iter::plumbing::Producer::fold_with
at /Users/alex/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.9.0/src/iter/plumbing/mod.rs:110:9
20: rayon::iter::plumbing::bridge_producer_consumer::helper
at /Users/alex/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.9.0/src/iter/plumbing/mod.rs:438:13
21: rayon::iter::plumbing::bridge_producer_consumer::helper::{{closure}}
at /Users/alex/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.9.0/src/iter/plumbing/mod.rs:427:21
22: rayon_core::join::join_context::call_b::{{closure}}
at /Users/alex/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/join/mod.rs:129:25
23: rayon_core::job::JobResult<T>::call::{{closure}}
at /Users/alex/.cargo/registry/src/index.crates.io-6f17d22bba15001f/rayon-core-1.12.1/src/job.rs:218:41
24: <core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/core/src/panic/unwind_safe.rs:272:9
25: std::panicking::try::do_call
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/std/src/panicking.rs:552:40
26: ___rust_try
Configuration:
- OS: MacOS Big Sur 11.7.10
- rustc 1.76.0 (07dca489a 2024-02-04)
- tfhe 0.5.3
- VSCode Version: 1.85.2
Hello, thanks for the detailled report, we are going to investigate and see what we can do about it
Sadly for now I don't have any workaround other than not using par_iter when encrypting Compact ciphertexts
there is a way to fix this potentially with a Mutex instead of a ref cell, not sure it's gonna be great and not the source of deadlocks, so will have to test and make sure we understand what rayon does with tasks
but a Mutex essentially defeats the thread local storage so, not great
and it deadlocks of course, it's the well known rayon bug from here rayon-rs/rayon#592
using this issue as a bit of a notepad on that issue
the problem arises when there are nested rayon calls IIRC, as the recent examples/addition proposal in the rayon-rs/rayon#592 issue (e.g. rayon-rs/rayon#592 (comment)) for fully blocking thread pool seems to indicate
in our case some threads are stealing some tasks from other threads where the engine has already been borrowed, I'm still unclear on the exact succession of events
could be
- thread 0 starts task 0, borrows engine from its thread local storage
- thread 0 stop/yields from task 0 while still borrowing the engine
- thread 0 starts task 1, borrows engine from its thread local storage -> borrow mut error
could be
- thread 0 starts task 0, 1, keeps executing task 1 and borrows engine from task 1 local storage which happens to be the one from thread 0 (? not sure that makes any sense)
- thread 1 sees task 0 is not being run, steals it, borrows engine from the local storage which still is the one from thread 0 given it started the task, borrows the engine -> crash
example log
looks to be the first case 🤔
Thread #ThreadId(1), borrow cell: 0x7f40d3d6dc40
Thread #ThreadId(1), stops borrow cell
Thread #ThreadId(1), borrow cell: 0x7f40d3d6dc40
Thread #ThreadId(1), stops borrow cell
Thread #ThreadId(1), borrow cell: 0x7f40d3d6dc40
Thread #ThreadId(1), stops borrow cell
Thread #ThreadId(8), borrow cell: 0x7f40d02fc1c0
Thread #ThreadId(13), borrow cell: 0x7f40bb7fb1c0
Thread #ThreadId(6), borrow cell: 0x7f40d07041c0
Thread #ThreadId(11), borrow cell: 0x7f40bbbfd1c0
Thread #ThreadId(5), borrow cell: 0x7f40d09081c0
Thread #ThreadId(7), borrow cell: 0x7f40d05001c0
Thread #ThreadId(4), borrow cell: 0x7f40d0b091c0
Thread #ThreadId(3), borrow cell: 0x7f40d0d0a1c0
Thread #ThreadId(2), borrow cell: 0x7f40d0f0b1c0
Thread #ThreadId(10), borrow cell: 0x7f40bbdfe1c0
Thread #ThreadId(12), borrow cell: 0x7f40bb9fc1c0
Thread #ThreadId(9), borrow cell: 0x7f40bbfff1c0
Thread #ThreadId(4), stops borrow cell
Thread #ThreadId(4), borrow cell: 0x7f40d0b091c0
Thread #ThreadId(5), stops borrow cell
Thread #ThreadId(5), borrow cell: 0x7f40d09081c0
Thread #ThreadId(12), stops borrow cell
Thread #ThreadId(12), borrow cell: 0x7f40bb9fc1c0
Thread #ThreadId(11), stops borrow cell
Thread #ThreadId(11), borrow cell: 0x7f40bbbfd1c0
Thread #ThreadId(3), stops borrow cell
Thread #ThreadId(3), borrow cell: 0x7f40d0d0a1c0
Thread #ThreadId(8), stops borrow cell
Thread #ThreadId(8), borrow cell: 0x7f40d02fc1c0
Thread #ThreadId(10), stops borrow cell
Thread #ThreadId(10), borrow cell: 0x7f40bbdfe1c0
Thread #ThreadId(13), stops borrow cell
Thread #ThreadId(9), stops borrow cell
Thread #ThreadId(9), borrow cell: 0x7f40bbfff1c0
Thread #ThreadId(13), borrow cell: 0x7f40bb7fb1c0
Thread #ThreadId(2), stops borrow cell
Thread #ThreadId(2), borrow cell: 0x7f40d0f0b1c0
Thread #ThreadId(6), stops borrow cell
Thread #ThreadId(6), borrow cell: 0x7f40d07041c0
Thread #ThreadId(4), stops borrow cell
Thread #ThreadId(4), borrow cell: 0x7f40d0b091c0
Thread #ThreadId(5), stops borrow cell
Thread #ThreadId(5), borrow cell: 0x7f40d09081c0
Thread #ThreadId(12), stops borrow cell
Thread #ThreadId(12), borrow cell: 0x7f40bb9fc1c0
Thread #ThreadId(11), stops borrow cell
Thread #ThreadId(11), borrow cell: 0x7f40bbbfd1c0
Thread #ThreadId(8), stops borrow cell
Thread #ThreadId(8), borrow cell: 0x7f40d02fc1c0
Thread #ThreadId(3), stops borrow cell
Thread #ThreadId(3), borrow cell: 0x7f40d0d0a1c0
Thread #ThreadId(10), stops borrow cell
Thread #ThreadId(10), borrow cell: 0x7f40bbdfe1c0
Thread #ThreadId(13), stops borrow cell
Thread #ThreadId(13), borrow cell: 0x7f40bb7fb1c0
Thread #ThreadId(9), stops borrow cell
Thread #ThreadId(9), borrow cell: 0x7f40bbfff1c0
Thread #ThreadId(2), stops borrow cell
Thread #ThreadId(2), borrow cell: 0x7f40d0f0b1c0
Thread #ThreadId(6), stops borrow cell
Thread #ThreadId(6), borrow cell: 0x7f40d07041c0
Thread #ThreadId(4), stops borrow cell
Thread #ThreadId(4), borrow cell: 0x7f40d0b091c0
Thread #ThreadId(5), stops borrow cell
Thread #ThreadId(5), borrow cell: 0x7f40d09081c0
Thread #ThreadId(12), stops borrow cell
Thread #ThreadId(12), borrow cell: 0x7f40bb9fc1c0
Thread #ThreadId(11), stops borrow cell
Thread #ThreadId(11), borrow cell: 0x7f40bbbfd1c0
Thread #ThreadId(8), stops borrow cell
Thread #ThreadId(8), borrow cell: 0x7f40d02fc1c0
Thread #ThreadId(3), stops borrow cell
Thread #ThreadId(3), borrow cell: 0x7f40d0d0a1c0
Thread #ThreadId(10), stops borrow cell
Thread #ThreadId(10), borrow cell: 0x7f40bbdfe1c0
Thread #ThreadId(13), stops borrow cell
Thread #ThreadId(13), borrow cell: 0x7f40bb7fb1c0
Thread #ThreadId(9), stops borrow cell
Thread #ThreadId(9), borrow cell: 0x7f40bbfff1c0
Thread #ThreadId(2), stops borrow cell
Thread #ThreadId(2), borrow cell: 0x7f40d0f0b1c0
Thread #ThreadId(7), stops borrow cell
Thread #ThreadId(7), borrow cell: 0x7f40d05001c0
Thread #ThreadId(6), stops borrow cell
Thread #ThreadId(6), borrow cell: 0x7f40d07041c0
Thread #ThreadId(4), stops borrow cell
Thread #ThreadId(4), borrow cell: 0x7f40d0b091c0
Thread #ThreadId(5), stops borrow cell
Thread #ThreadId(5), borrow cell: 0x7f40d09081c0
Thread #ThreadId(12), stops borrow cell
Thread #ThreadId(12), borrow cell: 0x7f40bb9fc1c0
Thread #ThreadId(11), stops borrow cell
Thread #ThreadId(11), borrow cell: 0x7f40bbbfd1c0
Thread #ThreadId(8), stops borrow cell
Thread #ThreadId(8), borrow cell: 0x7f40d02fc1c0
Thread #ThreadId(3), stops borrow cell
Thread #ThreadId(3), borrow cell: 0x7f40d0d0a1c0
Thread #ThreadId(10), stops borrow cell
Thread #ThreadId(10), borrow cell: 0x7f40bbdfe1c0
Thread #ThreadId(13), stops borrow cell
Thread #ThreadId(13), borrow cell: 0x7f40bb7fb1c0
Thread #ThreadId(9), stops borrow cell
Thread #ThreadId(9), borrow cell: 0x7f40bbfff1c0
Thread #ThreadId(2), stops borrow cell
Thread #ThreadId(2), borrow cell: 0x7f40d0f0b1c0
Thread #ThreadId(7), stops borrow cell
Thread #ThreadId(7), borrow cell: 0x7f40d05001c0
Thread #ThreadId(6), stops borrow cell
Thread #ThreadId(6), borrow cell: 0x7f40d07041c0
Thread #ThreadId(4), stops borrow cell
Thread #ThreadId(4), borrow cell: 0x7f40d0b091c0
Thread #ThreadId(5), stops borrow cell
Thread #ThreadId(5), borrow cell: 0x7f40d09081c0
Thread #ThreadId(12), stops borrow cell
Thread #ThreadId(12), borrow cell: 0x7f40bb9fc1c0
Thread #ThreadId(11), stops borrow cell
Thread #ThreadId(11), borrow cell: 0x7f40bbbfd1c0
Thread #ThreadId(8), stops borrow cell
Thread #ThreadId(8), borrow cell: 0x7f40d02fc1c0
Thread #ThreadId(3), stops borrow cell
Thread #ThreadId(3), borrow cell: 0x7f40d0d0a1c0
Thread #ThreadId(10), stops borrow cell
Thread #ThreadId(10), borrow cell: 0x7f40bbdfe1c0
Thread #ThreadId(13), stops borrow cell
Thread #ThreadId(9), stops borrow cell
Thread #ThreadId(9), borrow cell: 0x7f40bbfff1c0
Thread #ThreadId(2), stops borrow cell
Thread #ThreadId(2), borrow cell: 0x7f40d0f0b1c0
Thread #ThreadId(7), stops borrow cell
Thread #ThreadId(7), borrow cell: 0x7f40d05001c0
Thread #ThreadId(6), stops borrow cell
Thread #ThreadId(6), borrow cell: 0x7f40d07041c0
Thread #ThreadId(13), borrow cell: 0x7f40bb7fb1c0
Thread #ThreadId(4), stops borrow cell
Thread #ThreadId(4), borrow cell: 0x7f40d0b091c0
Thread #ThreadId(2), borrow cell: 0x7f40d0f0b1c0
thread '<unnamed>' panicked at tfhe/src/shortint/engine/mod.rs:219:45:
already borrowed: BorrowMutError
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
Thread #ThreadId(2), borrow cell: 0x7f40d0f0b1c0
thread '<unnamed>' panicked at tfhe/src/shortint/engine/mod.rs:219:45:
already borrowed: BorrowMutError
Thread #ThreadId(2), borrow cell: 0x7f40d0f0b1c0
thread '<unnamed>' panicked at tfhe/src/shortint/engine/mod.rs:219:45:
already borrowed: BorrowMutError
Thread #ThreadId(12), stops borrow cell
Thread #ThreadId(12), borrow cell: 0x7f40bb9fc1c0
Thread #ThreadId(8), stops borrow cell
Thread #ThreadId(8), borrow cell: 0x7f40d02fc1c0
Thread #ThreadId(11), stops borrow cell
Thread #ThreadId(11), borrow cell: 0x7f40bbbfd1c0
Thread #ThreadId(3), stops borrow cell
Thread #ThreadId(3), borrow cell: 0x7f40d0d0a1c0
Thread #ThreadId(10), stops borrow cell
Thread #ThreadId(5), stops borrow cell
Thread #ThreadId(5), borrow cell: 0x7f40d09081c0
Thread #ThreadId(9), stops borrow cell
Thread #ThreadId(9), borrow cell: 0x7f40bbfff1c0
Thread #ThreadId(7), stops borrow cell
Thread #ThreadId(7), borrow cell: 0x7f40d05001c0
Thread #ThreadId(2), stops borrow cell
Thread #ThreadId(2), borrow cell: 0x7f40d0f0b1c0
Thread #ThreadId(3), borrow cell: 0x7f40d0d0a1c0
thread '<unnamed>' panicked at tfhe/src/shortint/engine/mod.rs:219:45:
already borrowed: BorrowMutError
Thread #ThreadId(3), borrow cell: 0x7f40d0d0a1c0
thread '<unnamed>' panicked at tfhe/src/shortint/engine/mod.rs:219:45:
already borrowed: BorrowMutError
Thread #ThreadId(13), stops borrow cell
Thread #ThreadId(13), borrow cell: 0x7f40bb7fb1c0
Thread #ThreadId(9), borrow cell: 0x7f40bbfff1c0
thread '<unnamed>' panicked at tfhe/src/shortint/engine/mod.rs:219:45:
already borrowed: BorrowMutError
Thread #ThreadId(9), borrow cell: 0x7f40bbfff1c0
thread '<unnamed>' panicked at tfhe/src/shortint/engine/mod.rs:219:45:
already borrowed: BorrowMutError
Thread #ThreadId(4), stops borrow cell
Thread #ThreadId(4), borrow cell: 0x7f40d0b091c0
Thread #ThreadId(3), stops borrow cell
Thread #ThreadId(3), borrow cell: 0x7f40d0d0a1c0
Thread #ThreadId(7), borrow cell: 0x7f40d05001c0
thread '<unnamed>' panicked at tfhe/src/shortint/engine/mod.rs:219:45:
already borrowed: BorrowMutError
Thread #ThreadId(11), stops borrow cell
Thread #ThreadId(11), borrow cell: 0x7f40bbbfd1c0
Thread #ThreadId(3), borrow cell: 0x7f40d0d0a1c0
thread '<unnamed>' panicked at tfhe/src/shortint/engine/mod.rs:219:45:
already borrowed: BorrowMutError
Thread #ThreadId(3), borrow cell: 0x7f40d0d0a1c0
thread '<unnamed>' panicked at tfhe/src/shortint/engine/mod.rs:219:45:
already borrowed: BorrowMutError
Thread #ThreadId(3), borrow cell: 0x7f40d0d0a1c0
thread '<unnamed>' panicked at tfhe/src/shortint/engine/mod.rs:219:45:
already borrowed: BorrowMutError
Thread #ThreadId(3), borrow cell: 0x7f40d0d0a1c0
thread '<unnamed>' panicked at tfhe/src/shortint/engine/mod.rs:219:45:
already borrowed: BorrowMutError
Thread #ThreadId(12), stops borrow cell
Thread #ThreadId(12), borrow cell: 0x7f40bb9fc1c0
Thread #ThreadId(3), borrow cell: 0x7f40d0d0a1c0
thread '<unnamed>' panicked at tfhe/src/shortint/engine/mod.rs:219:45:
already borrowed: BorrowMutError
Thread #ThreadId(8), stops borrow cell
Thread #ThreadId(8), borrow cell: 0x7f40d02fc1c0
Thread #ThreadId(7), stops borrow cell
Thread #ThreadId(7), borrow cell: 0x7f40d05001c0
Thread #ThreadId(11), borrow cell: 0x7f40bbbfd1c0
thread '<unnamed>' panicked at tfhe/src/shortint/engine/mod.rs:219:45:
already borrowed: BorrowMutError
Thread #ThreadId(13), stops borrow cell
Thread #ThreadId(6), stops borrow cell
Thread #ThreadId(4), stops borrow cell
Thread #ThreadId(5), stops borrow cell
Thread #ThreadId(2), stops borrow cell
Thread #ThreadId(11), stops borrow cell
Thread #ThreadId(8), stops borrow cell
Thread #ThreadId(9), stops borrow cell
Thread #ThreadId(7), stops borrow cell
Thread #ThreadId(12), stops borrow cell
Thread #ThreadId(3), stops borrow cell