mmap error for most operations on debian 10
Closed this issue · 10 comments
Hi,
I tried to use mig-parted on a debian 10 system with 6 A100 GPUs installed and backports kernel 5.10 (5.10.0-0.bpo.8-amd64 #1 SMP Debian 5.10.46-4~bpo10+1 (2021-08-07) x86_64 GNU/Linux
) as well as kernel 4.19 and get the following error:
# nvidia-mig-parted -d assert --config-file /etc/nvidia-mig-manager/config.yaml --selected-config all-disabled
DEBU[0000] Parsing config file...
DEBU[0000] Selecting specific MIG config...
DEBU[0000] Asserting MIG mode configuration...
DEBU[0000] Walking MigConfig for (devices=all)
DEBU[0000] GPU 0: 0x20F110DE
DEBU[0000] Asserting MIG mode: Disabled
DEBU[0000] Error checking MIG capable: error opening bar0 MMIO resource: failed to open file for mmio: failed to mmap file: invalid argument
FATA[0000] Assertion failure: selected configuration not currently applied
# nvidia-mig-parted -d apply --config-file /etc/nvidia-mig-manager/config.yaml --selected-config all-disabled
[...]
DEBU[0001] Applying MIG mode change...
DEBU[0001] Walking MigConfig for (devices=all)
DEBU[0001] GPU 0: 0x20F110DE
DEBU[0001] MIG capable: true
DEBU[0001] Current MIG mode: Disabled
DEBU[0001] Updating MIG mode: Disabled
DEBU[0001] Mode change pending: false
DEBU[0001] GPU 1: 0x20F110DE
DEBU[0001] MIG capable: true
DEBU[0001] Current MIG mode: Disabled
DEBU[0001] Updating MIG mode: Disabled
DEBU[0001] Mode change pending: false
DEBU[0001] GPU 2: 0x20F110DE
DEBU[0001] MIG capable: true
DEBU[0001] Current MIG mode: Disabled
DEBU[0001] Updating MIG mode: Disabled
DEBU[0001] Mode change pending: false
DEBU[0001] GPU 3: 0x20F110DE
DEBU[0001] MIG capable: true
DEBU[0001] Current MIG mode: Disabled
DEBU[0001] Updating MIG mode: Disabled
DEBU[0001] Mode change pending: false
DEBU[0001] GPU 4: 0x20F110DE
DEBU[0001] MIG capable: true
DEBU[0001] Current MIG mode: Enabled
DEBU[0001] Updating MIG mode: Disabled
DEBU[0003] Mode change pending: false
DEBU[0003] GPU 5: 0x20F110DE
DEBU[0003] MIG capable: true
DEBU[0003] Current MIG mode: Enabled
DEBU[0003] Updating MIG mode: Disabled
DEBU[0014] Mode change pending: false
DEBU[0014] Checking current MIG device configuration...
DEBU[0014] Walking MigConfig for (devices=all)
DEBU[0014] GPU 0: 0x20F110DE
DEBU[0014] Running pre-apply-config hook
[...]
DEBU[0014] Applying MIG device configuration...
DEBU[0014] Walking MigConfig for (devices=all)
DEBU[0014] GPU 0: 0x20F110DE
DEBU[0014] Running apply-exit hook
[....]
FATA[0015] Error checking MIG capable: error opening bar0 MMIO resource: failed to open file for mmio: failed to mmap file: invalid argument
The change for mig mode (equivalent to nvidia-smi -mig 0) works fine but the assertion always fails with this error and setting up mig instances doesn't work.
I tried to debug the mmap failure but couldn't find anything obvious.
I also used strace to how the call:
strace -e trace=%memory nvidia-mig-parted -d assert --config-file /etc/nvidia-mig-manager/vrvis.yaml --selected-config all-disabled
brk(NULL) = 0x1255000
mmap(NULL, 25926, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f904a11f000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f904a11d000
mmap(NULL, 132288, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f904a0fc000
mmap(0x7f904a102000, 61440, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x6000) = 0x7f904a102000
mmap(0x7f904a111000, 24576, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x15000) = 0x7f904a111000
mmap(0x7f904a117000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1a000) = 0x7f904a117000
mmap(0x7f904a119000, 13504, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f904a119000
mmap(NULL, 16656, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f904a0f7000
mmap(0x7f904a0f8000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x7f904a0f8000
mmap(0x7f904a0f9000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f904a0f9000
mmap(0x7f904a0fa000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f904a0fa000
mmap(NULL, 1837056, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f9049f36000
mprotect(0x7f9049f58000, 1658880, PROT_NONE) = 0
mmap(0x7f9049f58000, 1343488, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x22000) = 0x7f9049f58000
mmap(0x7f904a0a0000, 311296, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x16a000) = 0x7f904a0a0000
mmap(0x7f904a0ed000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b6000) = 0x7f904a0ed000
mmap(0x7f904a0f3000, 14336, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f904a0f3000
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9049f33000
mprotect(0x7f904a0ed000, 16384, PROT_READ) = 0
mprotect(0x7f904a0fa000, 4096, PROT_READ) = 0
mprotect(0x7f904a117000, 4096, PROT_READ) = 0
mprotect(0x878000, 4096, PROT_READ) = 0
mprotect(0x7f904a14d000, 4096, PROT_READ) = 0
munmap(0x7f904a11f000, 25926) = 0
brk(NULL) = 0x1255000
brk(0x1276000) = 0x1276000
mmap(NULL, 262144, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9049ef3000
mmap(NULL, 131072, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9049ed3000
mmap(NULL, 1048576, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9049dd3000
mmap(NULL, 8388608, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f90495d3000
mmap(NULL, 67108864, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f90455d3000
mmap(NULL, 536870912, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f90255d3000
mmap(0xc000000000, 67108864, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xc000000000
mmap(0xc000000000, 67108864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xc000000000
mmap(NULL, 33554432, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f90235d3000
mmap(NULL, 2165768, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f90233c2000
mmap(0x7f9049ed3000, 131072, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f9049ed3000
mmap(0x7f9049e53000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f9049e53000
mmap(0x7f90499d9000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f90499d9000
mmap(0x7f9047603000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f9047603000
mmap(0x7f9035753000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f9035753000
mmap(NULL, 1048576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f90232c2000
mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f90232b2000
mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f90232a2000
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f9022aa1000
mprotect(0x7f9022aa2000, 8388608, PROT_READ|PROT_WRITE) = 0
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f90222a0000
mprotect(0x7f90222a1000, 8388608, PROT_READ|PROT_WRITE) = 0
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f9021a9f000
mprotect(0x7f9021aa0000, 8388608, PROT_READ|PROT_WRITE) = 0
--- SIGURG {si_signo=SIGURG, si_code=SI_TKILL, si_pid=6578, si_uid=0} ---
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f9020a5d000
mprotect(0x7f9020a5e000, 8388608, PROT_READ|PROT_WRITE) = 0
mmap(NULL, 262144, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9020a1d000
DEBU[0000] Parsing config file...
DEBU[0000] Selecting specific MIG config...
DEBU[0000] Asserting MIG mode configuration...
--- SIGURG {si_signo=SIGURG, si_code=SI_TKILL, si_pid=6578, si_uid=0} ---
mmap(NULL, 1439992, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f90208bd000
--- SIGURG {si_signo=SIGURG, si_code=SI_TKILL, si_pid=6578, si_uid=0} ---
DEBU[0000] Walking MigConfig for (devices=all)
DEBU[0000] GPU 0: 0x20F110DE
DEBU[0000] Asserting MIG mode: Disabled
--- SIGURG {si_signo=SIGURG, si_code=SI_TKILL, si_pid=6578, si_uid=0} ---
--- SIGURG {si_signo=SIGURG, si_code=SI_TKILL, si_pid=6578, si_uid=0} ---
mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f902086d000
--- SIGURG {si_signo=SIGURG, si_code=SI_TKILL, si_pid=6578, si_uid=0} ---
--- SIGURG {si_signo=SIGURG, si_code=SI_TKILL, si_pid=6578, si_uid=0} ---
mmap(NULL, 16777216, PROT_READ, MAP_SHARED, 3, 0) = -1 EINVAL (Invalid argument)
DEBU[0000] Error checking MIG capable: error opening bar0 MMIO resource: failed to open file for mmio: failed to mmap file: invalid argument
FATA[0000] Assertion failure: selected configuration not currently applied
+++ exited with 1 +++
Thanks for your help,
Valentin
You need sudo
or root
to change the MIG mode.
i'm running the commands as root user
running nvidia-smi commands to create mig instance and change the mig mode works flawlessly on the same shell
i just realised that there was a misunderstanding:
- setting the mig mode inluding the device reset works.
- the check for mig capability fails every time with:
failed to mmap file: invalid argument
the corresponding syscall:mmap(NULL, 16777216, PROT_READ, MAP_SHARED, 3, 0) = -1 EINVAL (Invalid argument)
Sorry for my quick response yesterday. I only quickly glanced at the error and others have had a similar problem with mmap
not succeeding because of a permission error (hence me jumping to conclusions that you weren't running as root).
I've not had any reports of this particular error, so it's unclear immediately what could be causing this. The arguments to the mmap
all look reasonable. The reason an EINVAL
might be returned are:
EINVAL We don't like addr, length, or offset (e.g., they are too
large, or not aligned on a page boundary).
EINVAL (since Linux 2.6.12) length was 0.
EINVAL flags contained none of MAP_PRIVATE, MAP_SHARED, or
MAP_SHARED_VALIDATE.
In this case:
addr
isNULL
(which is OK, it just means that the kernel will pick the address for us)length
is16777216
or0x1000000
(which appears to be of a reasonable size but we should check)offset
is 0 (so should still be page aligned assuming the kernel gives us anaddr
that is page aligned)flags
containsMAP_SHARED
We also have an fd
of 3 being passed in, which I don't see an open()
for in the strace
(maybe you grepped it out?), but from the code that calls this, I don't see how it could have an incorrect fd
:
https://gitlab.com/nvidia/cloud-native/mig-parted/-/blob/master/vendor/gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci/mmio/mmio.go#L43
In any case, an incorrect fd
would return an EACCES
not an EINVAL
so this seems unlikely to be the culprit.
The only thing I can think of is maybe the length
is somehow bigger than the actual file? Which again seems unlikely given the code linked above which calls Stat()
to get the length from the actual file size.
Also, I have the exact same size for this file on my machine where mig-parted
is working:
$ ls -la /sys/bus/pci/devices/0000\:06\:00.0/resource0
-rw------- 1 root root 16777216 Jan 30 2021 /sys/bus/pci/devices/0000:06:00.0/resource0
What's also interesting is that your trace seems to show that the "mig capable" check succeeds when changing MIG mode (and only failing when applying an actual MIG config). Why it would work in one scenario and not the other is very odd.
Hi,
We did some more investigating and it seems the issue is that the debian kernel is compiled with the flag CONFIG_IO_STRICT_DEVMEM
.
This leads to an mmap error EINVAL
if one tries to mmap a pci devices bar0 while the device is bound to a kernel driver.
I could confirm this behavior with a small c program and an unbound device for reference.
Unfortunately I do not know how to get around this other than unloading the kernel driver which is not a viable option.
It would be great if we could find a solution.
Cheers, Valentin
Can you change this line from:
func NewCombinedMigManager() CombinedMigManager {
...
}{mode.NewPciMigModeManager(), config.NewNvmlMigConfigManager()}
to
func NewCombinedMigManager() CombinedMigManager {
...
}{mode.NewNvmlMigModeManager(), config.NewNvmlMigConfigManager()}
and recompile and try it out.
This basically says use NVML for the mode check instead of the PCI bus. It will be slower (but not much hopefully).
This also explains why this worked for MIG mode changes but not for MIG config changes. Only config changes instantiate this "combined" manager (and hard code it to use the PCI variant of the mode manager). Mode changes use NVML if it is available and PCI if it is not (and your setup clearly has the driver / NVML installed).
I also changed cmd/assert/mode.go:28 to
manager := mode.NewNvmlMigModeManager()
in addition to the suggested change to have everything work via the nvml library. Now export, assert and apply work.
Thanks for your suggestion, maybe a change to detect nvml for this functions would enable mig-parted to run on systems with the kernel flag CONFIG_IO_STRICT_DEVMEM
enabled
thanks
Both of these changes seem reasonable to make unconditionally. I wasn't aware of this restriction with CONFIG_IO_STRICT_DEVMEM
or I probably would have done it this way in the first place. The next release will have these changes in it.
The following MR is now out to address this. Review welcome:
https://gitlab.com/nvidia/cloud-native/mig-parted/-/merge_requests/45
The fix for this is now included in https://github.com/NVIDIA/mig-parted/releases/tag/v0.1.3 so I m closing this issue. Please reopen (or create a new issue) if you encounter this again in the future.