mio (and tokio) sockets are completely broken under wine
whitequark opened this issue ยท 61 comments
Testcase:
#[tokio::main]
async fn main() {
let addr = std::net::SocketAddr::from(([0,0,0,0], 1234));
tokio::net::TcpListener::bind(addr).await.unwrap();
}
On Windows, it succeeds:
> mio-bug.exe
On wine (any version), it crashes:
$ wine mio-bug.exe
thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: Os { code: 2, kind: NotFound, message: "File not found." }', src\main.rs:3:88
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
Note the very unusual and confusing ENOENT
error code. To understand how it gets produced, we can use the following command:
$ strace -e '!write' env WINEDEBUG=relay,ws2_32 wine mio-bug.exe
unedited part of the log
0140:Call ws2_32.socket(00000002,00000001,00000000) ret=14007a67d
0140:Call ntdll.RtlAllocateHeap(00010000,00000008,00000048) ret=7f42dbcaae68
0140:Ret ntdll.RtlAllocateHeap() retval=00058a00 ret=7f42dbcaae68
0140:Call ntdll.RtlInitUnicodeString(0021ddd0,7f42dbcb7b40 L"\\Device\\Afd") ret=7f42dbcaa8cc
0140:Ret ntdll.RtlInitUnicodeString() retval=00000018 ret=7f42dbcaa8cc
0140:Call ntdll.NtOpenFile(0021ddb8,c0100000,0021ddf0,0021dde0,00000000,00000000) ret=7f42dbcaa947
rt_sigprocmask(SIG_BLOCK, [HUP INT USR1 USR2 ALRM CHLD IO], [], 8) = 0
writev(3, [{iov_base=",\0\0\0\26\0\0\0\0\0\0\0\0\0\20\300\2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., iov_len=64}, {iov_base="\\\0D\0e\0v\0i\0c\0e\0\\\0A\0f\0d\0", iov_len=22}], 2) = 86
read(4, "\0\0\0\0\0\0\0\0\200\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 64) = 64
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
0140:Ret ntdll.NtOpenFile() retval=00000000 ret=7f42dbcaa947
0140:Call ntdll.NtDeviceIoControlFile(00000080,00000000,00000000,00000000,0021dde0,00128320,0021ddc0,00000010,00000000,00000000) ret=7f42dbcaa9ab
rt_sigprocmask(SIG_BLOCK, [HUP INT USR1 USR2 ALRM CHLD IO], [], 8) = 0
writev(3, [{iov_base="\201\0\0\0\20\0\0\0\0\0\0\0 \203\22\0\200\0\0\0\0\0\0\0\340\335!\0\0\0\0\0"..., iov_len=64}, {iov_base="\2\0\0\0\1\0\0\0\6\0\0\0\0\0\0\0", iov_len=16}], 2) = 80
read(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 64) = 64
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
0140:Ret ntdll.NtDeviceIoControlFile() retval=00000000 ret=7f42dbcaa9ab
0140:Ret ws2_32.socket() retval=00000080 ret=14007a67d
0140:Call ws2_32.ioctlsocket(00000080,8004667e,0021dfd4) ret=14007a778
rt_sigprocmask(SIG_BLOCK, [HUP INT USR1 USR2 ALRM CHLD IO], [], 8) = 0
read(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 64) = 64
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
rt_sigprocmask(SIG_BLOCK, [HUP INT USR1 USR2 ALRM CHLD IO], [], 8) = 0
read(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 64) = 64
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
0140:Ret ws2_32.ioctlsocket() retval=00000000 ret=14007a778
0140:Call ucrtbase.memset(0021dc70,00000000,00000004) ret=14009031b
0140:Ret ucrtbase.memset() retval=0021dc70 ret=14009031b
0140:Call ucrtbase.memset(0021dd58,00000000,00000008) ret=14007ab78
0140:Ret ucrtbase.memset() retval=0021dd58 ret=14007ab78
0140:Call ws2_32.bind(00000080,0021e088,00000010) ret=140073798
0140:Call ntdll.wine_server_handle_to_fd(00000080,00000000,0021de50,00000000) ret=7f42dbcac3b7
rt_sigprocmask(SIG_BLOCK, [HUP INT USR1 USR2 ALRM CHLD IO], [], 8) = 0
rt_sigprocmask(SIG_BLOCK, [HUP INT USR1 USR2 ALRM CHLD IO], [HUP INT USR1 USR2 ALRM CHLD IO], 8) = 0
read(4, "\0\0\0\0\0\0\0\0\3\0\0\0\0\0\0\0\237\1\22\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 64) = 64
rt_sigprocmask(SIG_SETMASK, [HUP INT USR1 USR2 ALRM CHLD IO], NULL, 8) = 0
recvmsg(9, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="\200\0\0\0", iov_len=4}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[72]}], msg_controllen=24, msg_flags=MSG_CMSG_CLOEXEC}, MSG_CMSG_CLOEXEC) = 4
fcntl(72, F_SETFD, FD_CLOEXEC) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
0140:Ret ntdll.wine_server_handle_to_fd() retval=00000000 ret=7f42dbcac3b7
bind(72, {sa_family=AF_INET, sin_port=htons(1234), sin_addr=inet_addr("0.0.0.0")}, 16) = 0
0140:Call ntdll.wine_server_release_fd(00000080,00000048) ret=7f42dbcac429
close(72) = 0
0140:Ret ntdll.wine_server_release_fd() retval=00000000 ret=7f42dbcac429
0140:Ret ws2_32.bind() retval=00000000 ret=140073798
0140:Call ws2_32.listen(00000080,00000400) ret=1400739a7
0140:Call ntdll.wine_server_handle_to_fd(00000080,00000001,0021ddf0,00000000) ret=7f42dbca5b57
rt_sigprocmask(SIG_BLOCK, [HUP INT USR1 USR2 ALRM CHLD IO], [], 8) = 0
rt_sigprocmask(SIG_BLOCK, [HUP INT USR1 USR2 ALRM CHLD IO], [HUP INT USR1 USR2 ALRM CHLD IO], 8) = 0
read(4, "\0\0\0\0\0\0\0\0\3\0\0\0\0\0\0\0\237\1\22\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 64) = 64
rt_sigprocmask(SIG_SETMASK, [HUP INT USR1 USR2 ALRM CHLD IO], NULL, 8) = 0
recvmsg(9, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="\200\0\0\0", iov_len=4}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[72]}], msg_controllen=24, msg_flags=MSG_CMSG_CLOEXEC}, MSG_CMSG_CLOEXEC) = 4
fcntl(72, F_SETFD, FD_CLOEXEC) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
0140:Ret ntdll.wine_server_handle_to_fd() retval=00000000 ret=7f42dbca5b57
getsockname(72, {sa_family=AF_INET, sin_port=htons(1234), sin_addr=inet_addr("0.0.0.0")}, [128->16]) = 0
listen(72, 1024) = 0
rt_sigprocmask(SIG_BLOCK, [HUP INT USR1 USR2 ALRM CHLD IO], [], 8) = 0
read(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 64) = 64
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
0140:Call ntdll.wine_server_release_fd(00000080,00000048) ret=7f42dbca5bce
close(72) = 0
0140:Ret ntdll.wine_server_release_fd() retval=00000000 ret=7f42dbca5bce
0140:Ret ws2_32.listen() retval=00000000 ret=1400739a7
0140:Call ucrtbase.memcpy(0021db98,00035c30,00000008) ret=14008e0c9
0140:Ret ucrtbase.memcpy() retval=0021db98 ret=14008e0c9
0140:Call ucrtbase.memcpy(00035c30,0021dca8,00000008) ret=14008e0c9
0140:Ret ucrtbase.memcpy() retval=00035c30 ret=14008e0c9
0140:Call ucrtbase.memcpy(0021dc18,00035c30,00000008) ret=14008e0c9
0140:Ret ucrtbase.memcpy() retval=0021dc18 ret=14008e0c9
0140:Call ucrtbase.memcpy(00035c30,0021dd28,00000008) ret=14008e0c9
0140:Ret ucrtbase.memcpy() retval=00035c30 ret=14008e0c9
0140:Call ntdll.RtlAcquireSRWLockExclusive(0003b570) ret=14003e1be
0140:Ret ntdll.RtlAcquireSRWLockExclusive() retval=00000000 ret=14003e1be
0140:Call KERNEL32.GetProcessHeap() ret=1400a0415
0140:Ret KERNEL32.GetProcessHeap() retval=00010000 ret=1400a0415
0140:Call ntdll.RtlAllocateHeap(00010000,00000000,00000c00) ret=14008e83b
0140:Ret ntdll.RtlAllocateHeap() retval=00058a60 ret=14008e83b
0140:Call ucrtbase.memcpy(0021d570,0021d638,00000038) ret=14001eeb5
0140:Ret ucrtbase.memcpy() retval=0021d570 ret=14001eeb5
0140:Call ucrtbase.memcpy(0021d3a0,0021d470,00000038) ret=14002c897
0140:Ret ucrtbase.memcpy() retval=0021d3a0 ret=14002c897
0140:Call ucrtbase.memcpy(0021d438,0021d3a0,00000038) ret=14002c8b8
0140:Ret ucrtbase.memcpy() retval=0021d438 ret=14002c8b8
0140:Call ucrtbase.memcpy(0021d538,0021d438,00000038) ret=140015700
0140:Ret ucrtbase.memcpy() retval=0021d538 ret=140015700
0140:Call ucrtbase.memcpy(0021d5f0,0021d528,00000048) ret=14001eee6
0140:Ret ucrtbase.memcpy() retval=0021d5f0 ret=14001eee6
0140:Call ucrtbase.memcpy(0021d9b0,0021d5f0,00000048) ret=14005a57c
0140:Ret ucrtbase.memcpy() retval=0021d9b0 ret=14005a57c
0140:Call ucrtbase.memcpy(0021d620,0021d950,00000058) ret=14004b545
0140:Ret ucrtbase.memcpy() retval=0021d620 ret=14004b545
0140:Call ucrtbase.memcpy(0021d520,0021d620,00000058) ret=14002c5f7
0140:Ret ucrtbase.memcpy() retval=0021d520 ret=14002c5f7
0140:Call ucrtbase.memcpy(0021d5c8,0021d520,00000058) ret=14002c618
0140:Ret ucrtbase.memcpy() retval=0021d5c8 ret=14002c618
0140:Call ucrtbase.memcpy(0021d8f8,0021d5c8,00000058) ret=14004b576
0140:Ret ucrtbase.memcpy() retval=0021d8f8 ret=14004b576
0140:Call ucrtbase.memcpy(00058a60,0021d5e0,00000060) ret=1400719ae
0140:Ret ucrtbase.memcpy() retval=00058a60 ret=1400719ae
0140:Call ntdll.RtlReleaseSRWLockExclusive(0003b570) ret=14003e19e
0140:Ret ntdll.RtlReleaseSRWLockExclusive() retval=00000000 ret=14003e19e
0140:Call ntdll.RtlAcquireSRWLockExclusive(0003b520) ret=140078bae
0140:Ret ntdll.RtlAcquireSRWLockExclusive() retval=00000000 ret=140078bae
0140:Call ntdll.NtCreateFile(0021cbe8,00100000,1400b9d28,0021cbf0,00000000,00000000,00000003,00000001,00000000,00000000,00000000) ret=14008569f
rt_sigprocmask(SIG_BLOCK, [HUP INT USR1 USR2 ALRM CHLD IO], [], 8) = 0
writev(3, [{iov_base=",\0\0\0\36\0\0\0\0\0\0\0\0\0\20\0\0\0\0\0\0\0\0\0\3\0\0\0\0\0\0\0"..., iov_len=64}, {iov_base="\\\0D\0e\0v\0i\0c\0e\0\\\0A\0f\0d\0\\\0M\0i\0o\0", iov_len=30}], 2) = 94
read(4, "4\0\0\300\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 64) = 64
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
0140:Ret ntdll.NtCreateFile() retval=c0000034 ret=14008569f
0140:Call ntdll.RtlNtStatusToDosError(c0000034) ret=1400856b9
0140:Ret ntdll.RtlNtStatusToDosError() retval=00000002 ret=1400856b9
0140:Call ntdll.RtlReleaseSRWLockExclusive(0003b520) ret=140078b8e
0140:Ret ntdll.RtlReleaseSRWLockExclusive() retval=00000000 ret=140078b8e
0140:Call ntdll.RtlAcquireSRWLockExclusive(0003b570) ret=14003e1be
0140:Ret ntdll.RtlAcquireSRWLockExclusive() retval=00000000 ret=14003e1be
0140:Call ntdll.RtlReleaseSRWLockExclusive(0003b570) ret=14003e19e
0140:Ret ntdll.RtlReleaseSRWLockExclusive() retval=00000000 ret=14003e19e
0140:Call ws2_32.closesocket(00000080) ret=14008f78e
0140:Call ntdll.wine_server_handle_to_fd(00000080,00000001,0021dcec,00000000) ret=7f42dbca3ed4
rt_sigprocmask(SIG_BLOCK, [HUP INT USR1 USR2 ALRM CHLD IO], [], 8) = 0
rt_sigprocmask(SIG_BLOCK, [HUP INT USR1 USR2 ALRM CHLD IO], [HUP INT USR1 USR2 ALRM CHLD IO], 8) = 0
read(4, "\0\0\0\0\0\0\0\0\3\0\0\0\1\0\0\0\237\1\22\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 64) = 64
rt_sigprocmask(SIG_SETMASK, [HUP INT USR1 USR2 ALRM CHLD IO], NULL, 8) = 0
recvmsg(9, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="\200\0\0\0", iov_len=4}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[72]}], msg_controllen=24, msg_flags=MSG_CMSG_CLOEXEC}, MSG_CMSG_CLOEXEC) = 4
fcntl(72, F_SETFD, FD_CLOEXEC) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
dup(72) = 75
0140:Ret ntdll.wine_server_handle_to_fd() retval=00000000 ret=7f42dbca3ed4
0140:Call ntdll.wine_server_release_fd(00000080,0000004b) ret=7f42dbca3ef0
close(75) = 0
0140:Ret ntdll.wine_server_release_fd() retval=00000000 ret=7f42dbca3ef0
0140:Call KERNEL32.CloseHandle(00000080) ret=7f42dbca3f4c
0140:Call ntdll.NtClose(00000080) ret=7b04ed99
rt_sigprocmask(SIG_BLOCK, [HUP INT USR1 USR2 ALRM CHLD IO], [], 8) = 0
read(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 64) = 64
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
close(72) = 0
0140:Ret ntdll.NtClose() retval=00000000 ret=7b04ed99
0140:Ret KERNEL32.CloseHandle() retval=00000001 ret=7f42dbca3f4c
0140:Ret ws2_32.closesocket() retval=00000000 ret=14008f78e
The error code is produced by this part in the middle:
0140:Call ntdll.NtCreateFile(0021cbe8,00100000,1400b9d28,0021cbf0,00000000,00000000,00000003,00000001,00000000,00000000,00000000) ret=14008569f
rt_sigprocmask(SIG_BLOCK, [HUP INT USR1 USR2 ALRM CHLD IO], [], 8) = 0
writev(3, [{iov_base=",\0\0\0\36\0\0\0\0\0\0\0\0\0\20\0\0\0\0\0\0\0\0\0\3\0\0\0\0\0\0\0"..., iov_len=64}, {iov_base="\\\0D\0e\0v\0i\0c\0e\0\\\0A\0f\0d\0\\\0M\0i\0o\0", iov_len=30}], 2) = 94
read(4, "4\0\0\300\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 64) = 64
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
0140:Ret ntdll.NtCreateFile() retval=c0000034 ret=14008569f
0140:Call ntdll.RtlNtStatusToDosError(c0000034) ret=1400856b9
0140:Ret ntdll.RtlNtStatusToDosError() retval=00000002 ret=1400856b9
This corresponds to the following lines in mio:
Lines 177 to 194 in 04050db
The reason this call fails is because, while wine does have \Device\Afd
, mio uses \Device\Afd\Mio
for reasons it never bothers to explain, and wine implements \Device\Afd
as an empty directory.
I would normally consider something like this a defect in wine, but not in this case. Not only you are using a completely undocumented internal Winsock API in ways that are neither clear nor explained (here or in the wepoll codebase), but you also never mention the fact that error codes (the complete list of which you do not know because the API is undocumented) are passed to downstream consumers without any filtering, sanity checking, or indication (the human readable string in io::Error
would be a great place to include the word "AFD"), and if that wasn't bad enough, this is the only backend provided by mio/tokio for networking on Windows. As far as I see, there is no workaround that can be added e.g. to an application using hyper, even if I can patch mio during the build (what am I supposed to do, rewrite the entire backend myself to use overlapped operations?)
Could you please stop for a moment and imagine how much time I have spent tracking down that ENOENT
in async code of a third party Rust application I have never seen before? Not a single thing in Linux, WinAPI, wine, tokio, hyper, or std can possibly return a ENOENT
error for a networking operation, so of course networking was the last thing I would blame. (Mio does return NotFound
in a few places, but those are clearly distinct since they are not OS errors.) That was made harder by the fact that Wine makes debugging more complex, and also because I could at first only reproduce this as a deeply netsed part of a larger modular application that included, among other things, two copies of the Chromium Embedding Framework that ran concurrently to the crashing code.
This is the poster example of why you should not use undocumented APIs: it makes your code more fragile, harder to understand, harder to contribute to, and borderline impossible to debug for downstream users. I will never get these twenty hours of my life back. Don't do this! And if you for some reason have to do it, wrap it in hazard tape and provide a slow but obviously correct fallback.
I would normally consider something like this a defect in wine, but not in this case. Not only you are using a completely undocumented internal Winsock API in ways that are neither clear nor explained (here or in the wepoll codebase), but you also never mention the fact that error codes (the complete list of which you do not know because the API is undocumented) are passed to downstream consumers without any filtering, sanity checking, or indication (the human readable string in
io::Error
would be a great place to include the word "AFD"), and if that wasn't bad enough, this is the only backend provided by mio/tokio for networking on Windows. As far as I see, there is no workaround that can be added e.g. to an application using hyper, even if I can patch mio during the build (what am I supposed to do, rewrite the entire backend myself to use overlapped operations?)Could you please stop for a moment and imagine how much time I have spent tracking down that
ENOENT
in async code of a third party Rust application I have never seen before? Not a single thing in Linux, WinAPI, wine, tokio, hyper, or std can possibly return aENOENT
error for a networking operation, so of course networking was the last thing I would blame. (Mio does returnNotFound
in a few places, but those are clearly distinct since they are not OS errors.) That was made harder by the fact that Wine makes debugging more complex, and also because I could at first only reproduce this as a deeply netsed part of a larger modular application that included, among other things, two copies of the Chromium Embedding Framework that ran concurrently to the crashing code.This is the poster example of why you should not use undocumented APIs: it makes your code more fragile, harder to understand, harder to contribute to, and borderline impossible to debug for downstream users. I will never get these twenty hours of my life back. Don't do this! And if you for some reason have to do it, wrap it in hazard tape and provide a slow but obviously correct fallback.
This isn't helpful and not appreciated either.
If you have a better solution we're always welcoming contributions.
What's not helpful is your implementation that is harmful both for downstream users (as my example demonstrates) and for the ecosystem in general (which may have to adapt, or risk breaking many Rust applications).
Given the complete lack of empathy shown in your response (much less, say, appreciation for a precise and detailed description of an issue almost nobody else would bother tracking down), I am going to stay as far away from tokio as I can rather than contributing to it.
(note that this thread is currently getting a bunch of reacts from being linked here: https://twitter.com/whitequark/status/1353364951591579649)
@Thomasdezeeuw Respectfully, I don't see how anyone not already deeply familiar with the project could contribute a better solution, given that the reasons for using the undocumented API and the odd subpath on \Device\Afd
are not documented here. The reasoning is clearly non-trivial and the AFD interface is sparsely documented at best. It would be helpful if an existing project maintainer could investigate and shed some light on exactly why these choices were made, and why the code does what it does. Without such information and assistance, any invitation to contribute could only be made in bad faith. I hope that is not the case.
What's not helpful is your implementation that is harmful both for downstream users (as my example demonstrates) and for the ecosystem in general (which may have to adapt, or risk breaking many Rust applications).
Given the complete lack of empathy shown in your response (much less, say, appreciation for a precise and detailed description of an issue almost nobody else would bother tracking down), I am going to stay as far away from tokio as I can rather than contributing to it.
Do you really think anyone who wrote the implementation meant it as harmful? It's a bug, it happens. We'll fix this bug, like we do with other bugs.
Also I want to note I do not appreciate the thee paragraphs essentially yelling at someone who put in a lot of work for free, in their free time (and I'm just talking about myself).
Let's keep this discussion civil.
@Thomasdezeeuw Respectfully, I don't see how anyone not already deeply familiar with the project could contribute a better solution, given that the reasons for using the undocumented API and the odd subpath on
\Device\Afd
are not documented here. The reasoning is clearly non-trivial and the AFD interface is sparsely documented at best. It would be helpful if an existing project maintainer could investigate and shed some light on exactly why these choices were made, and why the code does what it does. Without such information and assistance, any invitation to contribute could only be made in bad faith. I hope that is not the case.
I didn't write the code either, but I agree that Mio isn't an easy project to contribute to. My reaction was to the three paragraphs and the end of the issue specifically. I spend my free time on this project, so getting yelled at over is really not appreciated.
Folks, please, let's understand that someone who was working very long hours and very hard at tracking this down, was frustrated when the initial issue was filed. That comes through, but it was filed in good faith, and is very detailed in getting to the root of the issue. Let's please end discussion on the tone of the submission, and stay focused on the means by which we can move the issue forward toward resolution.
Thank you.
Hey there,
I had this issue a few weeks ago and was similarly frustrated. It started out as a bug report in one of my applications. Later I filed a bug with wine because of it. I mainly use smol and so I just checked if it also appears on tokio before filing a bug.
I didn't bother to find exactly which of the 80 libraries in the async stack misused the the winapi,
Since mio isn't part of the dependencies of my minimal example in the bug report, could the person delving into this also have a look at smol/async-std to see if they did make a similar mistake?
Cargo tree
wine-smol v0.1.0 (D:\Git\test\wine-smol) โโโ smol v1.2.5 โโโ async-channel v1.5.1 โ โโโ concurrent-queue v1.2.2 โ โ โโโ cache-padded v1.1.1 โ โโโ event-listener v2.5.1 โ โโโ futures-core v0.3.9 โโโ async-executor v1.4.0 โ โโโ async-task v4.0.3 โ โโโ concurrent-queue v1.2.2 (*) โ โโโ fastrand v1.4.0 โ โโโ futures-lite v1.11.3 โ โ โโโ fastrand v1.4.0 โ โ โโโ futures-core v0.3.9 โ โ โโโ futures-io v0.3.9 โ โ โโโ memchr v2.3.4 โ โ โโโ parking v2.0.0 โ โ โโโ pin-project-lite v0.2.4 โ โ โโโ waker-fn v1.1.0 โ โโโ once_cell v1.5.2 โ โโโ vec-arena v1.0.0 โโโ async-fs v1.5.0 โ โโโ async-lock v2.3.0 โ โ โโโ event-listener v2.5.1 โ โโโ blocking v1.0.2 โ โ โโโ async-channel v1.5.1 (*) โ โ โโโ async-task v4.0.3 โ โ โโโ atomic-waker v1.0.0 โ โ โโโ fastrand v1.4.0 โ โ โโโ futures-lite v1.11.3 (*) โ โ โโโ once_cell v1.5.2 โ โโโ futures-lite v1.11.3 (*) โโโ async-io v1.3.1 โ โโโ concurrent-queue v1.2.2 (*) โ โโโ fastrand v1.4.0 โ โโโ futures-lite v1.11.3 (*) โ โโโ log v0.4.13 โ โ โโโ cfg-if v0.1.10 โ โโโ nb-connect v1.0.2 โ โ โโโ winapi v0.3.9 โ โโโ once_cell v1.5.2 โ โโโ parking v2.0.0 โ โโโ polling v2.0.2 โ โ โโโ cfg-if v0.1.10 โ โ โโโ log v0.4.13 (*) โ โ โโโ wepoll-sys v3.0.1 โ โ โ [build-dependencies] โ โ โ โโโ cc v1.0.66 โ โ โโโ winapi v0.3.9 โ โโโ vec-arena v1.0.0 โ โโโ waker-fn v1.1.0 โ โโโ winapi v0.3.9 โโโ async-lock v2.3.0 (*) โโโ async-net v1.5.0 โ โโโ async-io v1.3.1 (*) โ โโโ blocking v1.0.2 (*) โ โโโ fastrand v1.4.0 โ โโโ futures-lite v1.11.3 (*) โโโ async-process v1.0.1 โ โโโ blocking v1.0.2 (*) โ โโโ cfg-if v0.1.10 โ โโโ event-listener v2.5.1 โ โโโ futures-lite v1.11.3 (*) โ โโโ once_cell v1.5.2 โ โโโ winapi v0.3.9 โโโ blocking v1.0.2 (*) โโโ futures-lite v1.11.3 (*) โโโ once_cell v1.5.2
@greaka I have already gotten in touch with people on the async-std side if we have the same bug (thanks for answering the question!) and we'll track that there.
@piscisaureus any thoughts on this: #1444 (comment)? Wepoll likely has the same issue.
judging by Trio's issue tracking Windows event notification (python-trio/trio#52) i suspect they also ought to be informed about whatever the conclusion is here.
for the immediate issue (creating a handle with a path where Wine's AFD implementation differs from Windows'), wepoll used to handle this through WSAEnumProtocolsW
. i'm really curious where the idea of \Device\Afd\*
came from - it looks like the only non-wepoll/mio/trio use of a path like that is an implementation detail of mswsock.dll? \Endpoint seems to only come up through there, stack traces in this post were helpful.
as for AFD at all, it looks like the remaining use of those APIs is an AFD_POLL
ioctl ("this is what select
does internally"). judging by the commit that added this in libuv (libuv/libuv@19aca7a - hi Bert!), i'm curious if sending ioctls to AFD are just necessary for some kind of Windows API reason?
@piscisaureus any thoughts on this: #1444 (comment)?
Wepoll likely has the same issue.
wepoll supports windows Vista / Server 2008 and up. WINE most likely does not work as it generally does not support APIs that were introduced after Windows XP.
The last time I got yelled at by a wine user it did not support GetQueuedCompletionStatusEx()
nor the SIO_BASE_HANDLE
ioctl, which are required for wepoll to work, both introduced in 2006 with the release of Vista. I also doubt that it supports the AFD_POLL_XXX
set of ioctls, but generally you can't even get to a point where they would be called, so I don't know for sure.
The question whether the Tokio/Mio maintainers want to invest the time and make the technical tradeoffs to support an incomplete emulation layer is up to them. For wepoll, I have no intention to do so (*).
(*) Unless wine more or less caught up with the times, of course -- a few changes here and there are fine, but I'm not available to implement a select-in-many-threads back end that is super slow (which is what makes libuv work under WINE).
The reason this call fails is because, while wine does have
\Device\Afd
, mio uses\Device\Afd\Mio
for reasons it never bothers to explain, and wine implements\Device\Afd
as an empty directory.
I really don't see what your problem is. Windows itself uses the following paths:
\Device\Afd\AsyncConnectHlp
\Device\Afd\AsyncSelectHlp
\Device\Afd\Endpoint
\Device\Afd\Helper
The last component of the path can be anything really, but it can't be omitted. Picking a descriptive name (or at least one that is not misleading) can be helpful when debugging.
Thanks @piscisaureus. Then I think the unfortunate conclusion is that Mio will not support Wine unless someone is willing to implement it.
The current implementation is blocked on the Wine bug: https://bugs.winehq.org/show_bug.cgi?id=50520 (thanks @greaka).
@piscisaureus Out of curiosity, do you know any of documentation on the \Device\Afd
device? Some simple searches of docs.microsoft.com
and the Windows SDK header files don't turn anything up.
I've noticed `\Device\Afd' sitting in the handle list in Process Explorer for a while, so it's cool to learn more about what it is doing. Microsoft has been shuffling around documentation a lot recently, so any help learning more would be appreciated.
We've added a note in the Readme that Wine is not supported and improved the error returned, so I think there is anything else to do here.
i ran into the issue while using tonic for a grpc server in wine. lots of time wasted trying to debug. i would suggest that if mio finds itself running in wine, the error just said "wine not supported by mio", so instead of me having to start debugging from tonic -> hyper -> tokio -> mio -> tcp and os errors, i / any future user/ can just google mio wine and reach this issue to avoid wasting lots of time :( . just a suggestion though :) but anyway i finally found this after putting a lot of dbg! statements in src code of tonic/hyper/tokio/mio.
Wine 6.11 just released https://www.winehq.org/announce/6.11 . and they now have the relevant patch from https://bugs.winehq.org/show_bug.cgi?id=50520 merged. so, i checked in fedora with wine 6.11 version, and tonic is working fine. its gonna be a while before it is updated in most distros though.
just wanted to mention it here so that the exp people can start running tests to see if everything is working finally and Wine can be removed from the unsupported section in the README.
EDIT: https://stackoverflow.com/questions/7372388/determine-whether-a-program-is-running-under-wine-at-runtime for reference. the first answer of using ntdll is recommended by official Wine developer FAQ.
i ran into the issue while using tonic for a grpc server in wine. lots of time wasted trying to debug. i would suggest that if mio finds itself running in wine, the error just said "wine not supported by mio", so instead of me having to start debugging from tonic -> hyper -> tokio -> mio -> tcp and os errors, i / any future user/ can just google mio wine and reach this issue to avoid wasting lots of time :( . just a suggestion though :) but anyway i finally found this after putting a lot of dbg! statements in src code of tonic/hyper/tokio/mio.
I'm sorry to hear you've wasted time your time on this issue. Mio documents that Wine is supported: https://github.com/tokio-rs/mio#unsupported, but if you're using it via another framework that won't be clear. Perhaps these crate should also add it to their documentation.
Wine 6.11 just released https://www.winehq.org/announce/6.11 . and they now have the relevant patch from https://bugs.winehq.org/show_bug.cgi?id=50520 merged. so, i checked in fedora with wine 6.11 version, and tonic is working fine. its gonna be a while before it is updated in most distros though.
Great! But indeed it will likely take a while.
just wanted to mention it here so that the exp people can start running tests to see if everything is working finally and Wine can be removed from the unsupported section in the README.
EDIT: https://stackoverflow.com/questions/7372388/determine-whether-a-program-is-running-under-wine-at-runtime for reference. the first answer of using ntdll is recommended by official Wine developer FAQ.
I'm not sure about detecting Wine support. However if someone is willing to make a pr I'll consider it. Otherwise with a bit of luck Wine 6.11 becomes quite popular soon and this won't be a problem anymore :)
Stock ubuntu 20.04, latest rustup, while trying to run the client example from tokio-tungstenite under wine getting this:
thread 'main' panicked at 'Failed to connect: Io(Os { code: 66, kind: Uncategorized, message: "Bad device type." })', examples/client.rs:29:51
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
tried winehq-7.0 (stable), winehq-7.2 (dev & staging). ping & DNS works ok from wineconsole
BTW: Backtrace is empty even when "full". I'm building using cross from linux.
panicked at 'failed to park: Os { code: 66, kind: Uncategorized, message: "Bad device type." }', C:\Users\xxxku\.cargo\registry\src\github.com-1285ae84e5963aae\tokio-1.19.2\src\runtime\basic_scheduler.rs:321:31
@Thomasdezeeuw is this the changed message? I need to use tokio 0.2 because of this ๐ข
oh I checked the changes #1448 and it must be an message from an different part of code, because it does not have that message.
For me it is more complicated ๐ I do not use linux, but I do not want to break the game for people that do, just to update libraries. But the build warning is annoying ๐
warning: the following packages contain code that will be rejected by a future version of Rust: winapi v0.2.8
panicked at 'failed to park: Os { code: 66, kind: Uncategorized, message: "Bad device type." }', C:\Users\xxxku\.cargo\registry\src\github.com-1285ae84e5963aae\tokio-1.19.2\src\runtime\basic_scheduler.rs:321:31
@xNxExOx this seems to be problem with Tokio as the panic is from https://github.com/tokio-rs/tokio/blob/34b8ebbe662cd8c818bc017e81a717a465ab1f01/tokio/src/runtime/basic_scheduler.rs#L326. Please open an issue with Tokio.
warning: the following packages contain code that will be rejected by a future version of Rust: winapi v0.2.8
Mio doesn't depend on winapi any more, so either you Mio version is outdated or it's caused by another crate.
ok, thank you, I think it is same thing (or related) because the code, and message text was exactly the same
@xNxExOx I made a mio-fork that is API-compatible with mio v0.7 and does not use Afd stuff. Maybe you can try pinning tokio at 1.16.1
(which is the last version using mio v0.7) and patching mio with this fork?
Disclaimer: this patch is not meant for production use and may degrade network IO performance.
@bdbai for me it is a production use, and if the issue is with new tokio, then it will probably not help ๐
@xNxExOx sorry to hear that, but mio guys are happy with the current implementation and do not even provide a fallback. Which means this problem is nearly impossible to be solved, unless wine introduces these private, undocumented API into their runtime.
@xNxExOx sorry to hear that, but mio guys are happy with the current implementation and do not even provide a fallback. Which means this problem is nearly impossible to be solved, unless wine introduces these private, undocumented API into their runtime.
@bdbai Wine v6.11+ has already fixed it and Mio works fine on it.
Yeah so I guess this does need a tokio issue and people looking into what tokio is doing weirdly here.
tokio-rs/tokio#4781 already created
@bdbai I tried, but did not get pass build with:
tokio = { version = "1.16.1", features = ["io-util", "net", "time", "sync", "rt"] }
tokio-util = { version = "0.5", features = ["codec"] }
rustls = { version = "0.19" }
tokio-rustls = "0.21"
futures = "0.3"
bytes = "0.6"
it wants 1.19.2, and I am not going to spend much more time dropping versions, and updating code, to use these versions, until it will work ๐
@xNxExOx sorry to hear that, but mio guys are happy with the current implementation and do not even provide a fallback. Which means this problem is nearly impossible to be solved, unless wine introduces these private, undocumented API into their runtime.
@bdbai Wine v6.11+ has already fixed it and Mio works fine on it.
@Thomasdezeeuw Was this ever actually tested? I've traced it and looked into the source code of wine and it seems pretty clear to me that the bug is in mio (as opposed to tokio) and in particular in the way mio skips the call to WSASocketW
.
Check tokio-rs/tokio#4781 (comment) and the following comment for details.
@Thomasdezeeuw Was this ever actually tested?
I didn't, but see #1444 (comment).
I've traced it and looked into the source code of wine and it seems pretty clear to me that the bug is in mio (as opposed to tokio) and in particular in the way mio skips the call to
WSASocketW
.
Mio doesn't use any Windows networking API directly and it relies on std lib to call WSASocketW
because it would panic otherwise. Same is true for socket2 (which Tokio uses) which has an explicit call to init to do so: https://github.com/rust-lang/socket2/blob/723453768539ca48f3d9af5b9f38d1cd73958b3a/src/sys/windows.rs#L186-L195.
Check tokio-rs/tokio#4781 (comment) and the following comment for details.
I'm afraid I currently don't have the time to deep dive into this I'm afraid. Perhaps people more familiar with Wine/Windows can step in here.
tokio = { version = "=1.16.1", features = ["io-util", "net", "time", "sync", "rt"] }
tokio-util = { version = "=0.7.1", features = ["codec"] }
rustls = { version = "=0.20.6", features = ["dangerous_configuration"] }
tokio-rustls = "=0.23.4"
futures = "0.3"
bytes = "1"
[patch.crates-io]
mio = { git = "https://github.com/YtFlow/mio-noafd.git", rev = "f1e073d" }
I finally get it working wit these values.
But can someone explain me the advantages of "afd"? If there are any, because as @bdbai shows in his implementation it is not necessary, and I do not see any performance difference.
@xNxExOx AFD is there for performance reasons. The fork's implementation has a great deal of overhead.
but AFD does not work for linux users ๐ข and if I would not be suggested to use tokio = "=1.16.1"
(but even after that I was struggling quite a lot to get all versions to align) I would be stuck to version 0.2
, because that one worked. I guess few 1000s packets per second is not enough to be able to measure the performance difference.
@xNxExOx I get that, and its quite unfortunate that wine has this issue.
from tokio-rs/tokio#4781 (comment) I understand that it is not wine issue, but mio issue ๐ค so I did not report anything to wine.
@Noah-Kennedy are you sure it is an wine issue?
@xNxExOx wine is an emulation layer; any discrepancies with the behavior of windows are bugs in wine.
The whole point of wine is that you can get random applications developed for windows to run under wine.
@xNxExOx If I may ask, why are you trying to target wine instead of just targeting Linux normally?
That is not correct, it might be a bug in windows that will get fixed at some point and all rust async networking application might stop working. But on the other hand it would not be first bug microsoft decided to not fix, because it would break so many applications.
@Noah-Kennedy I am not targeting wine specifically, I am targeting x86 windows, because that is the only platform where rest of things work, which I do not have source code for ( https://skylords.eu/ ), and breaking the game for everyone playing on linux, just because some library incompatibility sounds like an terrible idea. I am glad I figured an working combination of at least semi recent tokio + mio fork without AFD, so I can at least update the compiler without worrying about old winapi not being compatible with new compiler.
@xNxExOx I am pretty sure that this has long since crossed into the realm of being "a feature not a bug" in windows.
@bdbai mio = { git = "https://github.com/YtFlow/mio-noafd.git", rev = "f1e073d" }
this causes issues on Windows 7, that it sometimes get stuck and does not read packets :(
Does anyone have a suggestion what to do, if we do not want to break game for anyone, and with newer versions we can only chose between bad experience on Windows 7, or not working under wine. The latest version that worked for both was 0.2, but compiler mark it as soon unsupported.
@bdbai
mio = { git = "https://github.com/YtFlow/mio-noafd.git", rev = "f1e073d" }
this causes issues on Windows 7, that it sometimes get stuck and does not read packets :(
Since this is no longer related to Mio can we keep it off Mio's issue tracker.
@Thomasdezeeuw it is related to mio, because mio does not work under wine, even though sine is listed under supported platforms. And that fork solve wine compatibility, but introduces Windows 7 incompatibility, so can not be considered as an good fix.
@xNxExOx The issue here is that the issue is in the fork, not mio. You should file an issue on that fork and/or fork the fork and try and figure it out and patch it.
@Noah-Kennedy let me repeat it one more time. this mio WITHOUT that fork does not work under wine, not wine 6.11, not 7.
I believe the issue has been solved in Wine 7.13. I am unable to reproduce the issue in this new release:
This is possibly related to the change listed here: server: Allow IOCTL_AFD_POLL for sockets without unix fd.
(see tokio-rs/tokio#4781 (comment))
@Thomasdezeeuw do we want to re-add support with a note about this versioning requirement?
@Thomasdezeeuw do we want to re-add support with a note about this versioning requirement?
Honestly, not really at the moment. If we're going to claim support I want to see some tests passing first, preferably setup CI.
@zynaa would you be willing to implement a CI test?
@Noah-Kennedy I'm afraid I'm not too experienced with azure pipelines. For the CI I assume something like this would make sense though?
- Build test binaries with a Windows VM
- Upload artifacts for test binaries
- Setup a linux VM and install wine
- Run the test binaries from step 2 via wine in the new VM
Technically you could also setup cargo and msvc inside a wine environment (via https://github.com/mstorsjo/msvc-wine) but that sounds more complicated than the steps above.
Currently under wine, these are the test results I get:
running 3 tests
test sys::windows::iocp::tests::get_many ... ok
test sys::windows::iocp::tests::is_send_sync ... ok
test sys::windows::named_pipe::ptr_from ... ok
test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s
Running tests\aio.rs (target\debug\deps\aio-dc973f9b0e7a9dcf.exe)
running 0 tests
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s
Running tests\close_on_drop.rs (target\debug\deps\close_on_drop-38133db9955acd75.exe)
running 1 test
test close_on_drop ... ok
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s
Running tests\events.rs (target\debug\deps\events-fb8336174c94fd48.exe)
running 2 tests
test assert_event_source_implemented_for ... ok
test events_all ... ok
test result: ok. 2 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s
Running tests\interest.rs (target\debug\deps\interest-59e994171297d44b.exe)
running 4 tests
test add ... ok
test bit_or ... ok
test fmt_debug ... ok
test is_tests ... ok
test result: ok. 4 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s
Running tests\poll.rs (target\debug\deps\poll-bfbf52de4789826f.exe)
running 16 tests
test is_send_and_sync ... ok
test deregister_without_register ... ok
test poll_erroneous_registration ... ok
test double_register_different_token ... ok
test poll_registration ... ok
test drop_cancels_interest_and_shuts_down ... ok
test reregister_without_register ... ok
test reregister_interest_token_usage ... ok
test poll_ok_after_cancelling_pending_ops ... ok
test registry_behind_arc ... ok
test registry_operations_are_thread_safe ... ok
test poll_closes_fd ... ok
test zero_duration_polls_events ... ok
test add_then_drop ... ok
test run_once_with_nothing ... ok
test register_during_poll ... ok
test result: ok. 16 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.21s
Running tests\registering.rs (target\debug\deps\registering-06f0bd6de42b2079.exe)
running 5 tests
test udp_register_multiple_event_loops ... ok
test tcp_register_multiple_event_loops ... ok
test register_deregister ... ok
test registering_after_deregistering ... ok
test reregister_different_interest_without_poll ... ok
test result: ok. 5 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.41s
Running tests\regressions.rs (target\debug\deps\regressions-d2ece371ad44a6ea.exe)
running 2 tests
test issue_776 ... ok
test issue_1205 ... ok
test result: ok. 2 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.50s
Running tests\tcp.rs (target\debug\deps\tcp-30e228c55ad215a5.exe)
running 18 tests
test connect_error ... ok
test is_send_and_sync ... ok
test bind_twice_bad ... ok
test connect ... ok
test connect_then_close ... ok
test accept ... ok
test local_addr_ready ... ok
test multiple_writes_immediate_success ... ok
test write_then_deregister ... ok
test write_error ... ok
test write_then_drop ... ok
test listen_then_close ... ok
test connection_reset_by_peer ... ok
test write_shutdown ... ok
test tcp_no_events_after_deregister ... ok
test write ... ok
test peek ... ok
test read ... ok
test result: ok. 18 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.61s
Running tests\tcp_listener.rs (target\debug\deps\tcp_listener-37ed97f7c929200e.exe)
running 10 tests
test is_send_and_sync ... ok
test get_ttl_without_previous_set ... ok
test set_get_ttl ... ok
test tcp_listener_ipv6 ... FAILED
test tcp_listener ... ok
test reregister ... ok
test tcp_listener_std ... ok
test registering ... ok
test no_events_after_deregister ... ok
test tcp_listener_two_streams ... ok
failures:
---- tcp_listener_ipv6 stdout ----
thread 'tcp_listener_ipv6' panicked at 'called `Result::unwrap()` on an `Err` value: Os { code: 10049, kind: AddrNotAvailable, message: "OS Error 10049 (FormatMessageW() returned error 317)" }', tests\tcp_listener.rs:56:44
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
failures:
tcp_listener_ipv6
test result: FAILED. 9 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.06s
error: test failed, to rerun pass '--test tcp_listener'
@zynaa I think we can do this by adding a windows job that builds the binaries and stores the artifacts, and then having a linux job that pulls in the artifacts and attempts to run them.
Unfortunately, however, I'm, not as familiar with Azure pipelines as with GH Actions, although I am going to be learning to use it soon.
I'm also fine with switching to GitHub Actions, although that is of course a larger project. We used Travis-CI before and switched to Azure after they went bust (or decided to fire everyone any way) and at that point GitHub Actions wasn't a thing yet.
I can look into switching us over later.
I can look into switching us over later.
I'll give it a quick try now, have some free time (and it's too hot to do anything).
If being able to build and run tests on the same OS would make it easier, lld can emulate msvc's linker. The xwin utility was created to make it easier to build Rust binaries for MSVC targets on Linux CI environments using lld: https://jake-shadle.github.io/xwin/