Unnable to read more than 78643 chars from stdout pipe
chwpearse opened this issue · 3 comments
When running a process I seem to unable to read more than 78643 chars.
If I use read_output_lines() I get no result and when I use read_output() it only reads the first 78,643 chars. I think read_output_lines() is not reaching the \n at the end of the line so it doesn't return anything.
After reading with read_output() I cannot read the rest of the line until I send new input. I have tried to troubleshoot with the R debugger but as the read happens through the dll file I can't see what's happening.
The below creates and runs a cmd file to write 100,000 chars, but processx only reads 78,643. It then does the same for 50,000 chars and it reads all 50,000 and prints a summary at the end.
This is on Windows 10 & 11
library(tidyverse)
library(processx)
rm(list = ls())
# Set file names to save to, and if + how long to wait
print_file <- 'to_print.txt'
print_file_2 <- 'to_print_2.txt'
cmd_file <- 'do.cmd'
cmd_file_2 <- 'do_2.cmd'
do_wait <- T
wait_secs <- 2
# Write a text file of 100,000 'a' ending a new line, this will be printed. Make a second with 50,000 chars
'a' %>% rep(100000) %>% paste0(collapse = '') %>% paste0(., '\n') %>% write_file(file = print_file)
'a' %>% rep(50000) %>% paste0(collapse = '') %>% paste0(., '\n') %>% write_file(file = print_file_2)
# Write a cmd file that prompts for input, then prints the contents of the text file created above
paste0('@echo off \n:prompt\n set /p id="" \ntype ', print_file , '\nset /p id=""') %>% write_file(cmd_file)
paste0('@echo off \n:prompt\n set /p id="" \ntype ', print_file_2 , '\nset /p id=""') %>% write_file(cmd_file_2)
if(exists('my_proc')){my_proc$kill()}
my_proc <- process$new(cmd_file, stdin = '|', stdout = '|', stderr = '|')
# Feed in \n to begin printing. Try and read output_lines, then just lines.
# Repeat to try and catch any remaining chars not read
my_proc$write_input('\n')
if(do_wait){Sys.sleep(wait_secs)}
a_output_lines_1_1 <- my_proc$read_output_lines()
b_output_1_1 <- my_proc$read_output()
c_output_lines_1_2 <- my_proc$read_output_lines()
d_output_1_2 <- my_proc$read_output()
# The above only reads 78,643 chars, which is less than the 100,000 that should be printed
# The remaining 12,357 can be read if you feed in additional input
my_proc$write_input('-')
if(do_wait){Sys.sleep(wait_secs)}
e_output_lines_2_1 <- my_proc$read_output_lines()
f_output_2_1 <- my_proc$read_output()
g_output_lines_2_2 <- my_proc$read_output_lines()
h_output_2_2 <- my_proc$read_output()
# Collect all the output vars and their lengths into a table
results <- ls() %>% str_subset('output') %>%
map(~list(var = .x, length = .x %>% get %>% nchar %>% append(0) %>% pluck(1),
has_linebreak = .x %>% str_detect('\n'),
chars = .x %>% get %>% str_extract_all(pattern = '.') %>% unlist %>% unique() %>% paste0(collapse = ''))) %>%
bind_rows()
# Clear output vars for next run
rm(list = ls() %>% str_subset('output'))
################
##### Part 2
##### Same as above but with 50,000 chars printed
################
if(exists('my_proc')){my_proc$kill()}
#> [1] FALSE
my_proc <- process$new(cmd_file_2, stdin = '|', stdout = '|', stderr = '|')
# Feed in \n to begin printing. Try and read output_lines, then just lines. Repeat to try and catch any remaining chars not read
my_proc$write_input('\n')
if(do_wait){Sys.sleep(wait_secs)}
a_output_lines_1_1 <- my_proc$read_output_lines()
b_output_1_1 <- my_proc$read_output()
c_output_lines_1_2 <- my_proc$read_output_lines()
d_output_1_2 <- my_proc$read_output()
# All 50,000 char are read on the first try
my_proc$write_input('-')
if(do_wait){Sys.sleep(wait_secs)}
e_output_lines_2_1 <- my_proc$read_output_lines()
f_output_2_1 <- my_proc$read_output()
g_output_lines_2_2 <- my_proc$read_output_lines()
h_output_2_2 <- my_proc$read_output()
results_2 <- ls() %>% str_subset('output') %>%
map(~list(var = .x, length = .x %>% get %>% nchar %>% append(0) %>% pluck(1),
has_linebreak = .x %>% str_detect('\n'),
chars = .x %>% get %>% str_extract_all(pattern = '.') %>% unlist %>% unique() %>% paste0(collapse = ''))) %>%
bind_rows()
results
#> # A tibble: 8 × 4
#> var length has_linebreak chars
#> <chr> <dbl> <lgl> <chr>
#> 1 a_output_lines_1_1 0 FALSE ""
#> 2 b_output_1_1 78643 FALSE "a"
#> 3 c_output_lines_1_2 0 FALSE ""
#> 4 d_output_1_2 0 FALSE ""
#> 5 e_output_lines_2_1 21357 FALSE "a"
#> 6 f_output_2_1 0 FALSE ""
#> 7 g_output_lines_2_2 0 FALSE ""
#> 8 h_output_2_2 0 FALSE ""
results %>% summarise(tot_length = sum(length))
#> # A tibble: 1 × 1
#> tot_length
#> <dbl>
#> 1 100000
results_2
#> # A tibble: 8 × 4
#> var length has_linebreak chars
#> <chr> <dbl> <lgl> <chr>
#> 1 a_output_lines_1_1 50000 FALSE "a"
#> 2 b_output_1_1 0 FALSE ""
#> 3 c_output_lines_1_2 0 FALSE ""
#> 4 d_output_1_2 0 FALSE ""
#> 5 e_output_lines_2_1 0 FALSE ""
#> 6 f_output_2_1 0 FALSE ""
#> 7 g_output_lines_2_2 0 FALSE ""
#> 8 h_output_2_2 0 FALSE ""
results_2 %>% summarise(tot_length = sum(length))
#> # A tibble: 1 × 1
#> tot_length
#> <dbl>
#> 1 50000
Created on 2023-05-27 with reprex v2.0.2
Standard output and standard error
-- nothing to show --
Three tips:
- if you know that you can have long lines, you need to use
read_output()
instead ofread_output_lines()
. read_output()
can always return less output than requested.- you need to call
$poll_io()
before reading, even if you call it with no waiting time:$poll_io(0)
.
Ah thanks for that. For anyone who hits this problem in the future here's what I did:
proc_read_long_singleline <- function(proc, wait = 100, timeout = 5000){
my_proc$poll_io(wait)
counter <- 1
output_lines_res <- proc$read_output_lines(1)
output_res <- c()
while(length(output_lines_res) == 0 & counter*wait <= timeout){
output_res <- proc$read_output() %>% append(output_res, .)
proc$poll_io(wait)
counter <- counter + 1
output_lines_res <- proc$read_output_lines(1) %>% append(output_lines_res, .)
}
res_output <- paste0(output_res, collapse = '') %>% paste0(output_lines_res)
}
if(exists('my_proc')){my_proc$kill()}
my_proc <- process$new(cmd_file, stdin = '|', stdout = '|', stderr = '|')
my_proc$write_input('\n')
read_chars <- my_proc %>% proc_read_long_singleline()
I think this is documented, but I'll keep this issue open until we improve the documentation.