gesistsa/adaR

One row dataframe is coercised to named vector

Closed this issue · 4 comments

https://github.com/schochastics/adaR/blob/b2eb3e4662423b53db979541777da0e5847a7b69/R/parse.R#L13

str(ada_url_parse("https://www.google.co.jp/search?q=ドイツ"))
# Named chr [1:10] "https://www.google.co.jp/search?q=ドイツ" "https:" "" "" "www.google.co.jp" #"www.google.co.jp" "" "/search" "?q=ドイツ" ...
# - attr(*, "names")= chr [1:10] "href" "protocol" "username" "password" ...

simplify should be FALSE and coercied as data.frame again; or a better way.

ada_url_parse <- function(url, decode = TRUE) {
    url <- utf8::as_utf8(url)
    # url_parsed <- Rcpp_ada_parse(url, nchar(url, type = "bytes"))
    url_parsed <- as.data.frame(do.call("rbind", lapply(url, function(x) Rcpp_ada_parse(x, nchar(x, type = "bytes")))))
    if (isTRUE(decode)) {
        url_parsed <- apply(url_parsed, 2, function(x) utils::URLdecode(x), simplify = FALSE)
        return(as.data.frame(url_parsed))
    }
    return(url_parsed)
}

Not completely work.

ada_url_parse <- function(url, decode = TRUE) {
    url <- utf8::as_utf8(url)
    # url_parsed <- Rcpp_ada_parse(url, nchar(url, type = "bytes"))
    url_parsed <- do.call("rbind", lapply(url, function(x) Rcpp_ada_parse(x, nchar(x, type = "bytes"))))
    if (isTRUE(decode)) {
        url_parsed <- apply(url_parsed, 2, utils::URLdecode, simplify = FALSE)
    }
    return(as.data.frame(url_parsed))
}

yeah I am working on this atm. This dim drop is painful

It seems to be easier to do that in CPP as a for-loop rather than using the type-unsave, drop-inconsistent do.call, apply, and friends (as long as we don't want to use purrr).

ok let me try to do that