ALS for implicit data
Closed this issue · 4 comments
Hello Prof. Hahsler,
I wish to trial the ALS algorithm on my dataset which is composed of implicit-feedback data for product purchases. I see that the recommenderlab package appears to support an appropriate implementation of as Koren et al. ("Collaborative Filtering for Implicit Feedback Datasets") within the recommenderRegistry however I'm encountering errors and I'm unsure if my approach is incorrect or unsupported by the software..
To illustrate these errors please refer to my example using the MovieLens data which follows. I'm a little confused at these errors because:
-
the binaryRatingMatrix would appear to be supported in ALS_implicit as per the documentation, and
-
The the calcPredictionAccuracy does not appear to return a correct result for the 'topNList, binaryRatingMatrix' signature dspite being referenced within the documentation.
Any assistance you could provide to clarify my queries would be greatly appreciated.
Kind regards,
Michael
library(recommenderlab)
library(arules)
# import MovieLens data and transform to binaryRatingMatrix
data(MovieLense)
data.bin = binarize(MovieLense, minRating=1)
# create recommender object using AL_implicit
r = Recommender(data.bin[1:500], method = "ALS_implicit",
parameter = list(lambda=0.1, n_factors=10,
n_iterations=10, seed = NULL, verbose = TRUE))
recom = predict(r, data.bin[501:502], n=7)
recom_topNList = predict(r, newdata = data.bin[501:502,], type = "topNList", n = 7)
as(recom_topNList, "list")
# create evaluation scheme
scheme <- evaluationScheme(data.bin[1:500], method="split", train=0.9, given=-5)
# list available methods for implicit data
recommenderRegistry$get_entries(dataType = "binaryRatingMatrix")
# form list of algorithms supporting implicit data
algorithms = list("random items" = list(name="RANDOM", param=NULL),
"popular items" = list(name="POPULAR", param=NULL),
"user-based CF" = list(name="UBCF", param=list(nn=50)),
"item-based CF" = list(name="IBCF", param=list(k=50)),
"ALS Implicit" = list(name="ALS_implicit", param=list(lambda=0.1, n_factors=10,
n_iterations=10, seed = NULL, verbose = TRUE)),
"Association Rules" = list(name="AR", param=NULL))
# output results of evaluation
results = evaluate(scheme, algorithms)
# ^^^ the above line produces the following error
# Error in matrix2[only_new_users, , drop = FALSE] :
# invalid or not-yet-implemented 'Matrix' subsetting
# calculate metrics for algorithms
accuracy_table <- function(scheme, algorithm, parameter){
r <- Recommender(getData(scheme, "train"), algorithm, parameter = parameter)
p <- predict(r, getData(scheme, "known"), type="ratings")
acc_list <- calcPredictionAccuracy(p, getData(scheme, "unknown"))
total_list <- c(algorithm =algorithm, acc_list)
total_list <- total_list[sapply(total_list, function(x) !is.null(x))]
return(data.frame(as.list(total_list)))
}
# calculate accuracy metrics
table_random <- accuracy_table(scheme, algorithm = "RANDOM", parameter = NULL)
table_ubcf <- accuracy_table(scheme, algorithm = "UBCF", parameter = list(nn=50))
table_ibcf <- accuracy_table(scheme, algorithm = "IBCF", parameter = list(k=50))
table_pop <- accuracy_table(scheme, algorithm = "POPULAR", parameter = NULL)
table_ALS_implicit <- accuracy_table(scheme, algorithm = "ALS_implicit",
parameter = list(lambda=0.1, n_factors=10,
n_iterations=10, seed = NULL, verbose = TRUE))
# ^^^ the calcPredictionAccuracy does not appear to return a correct result for the
'topNList,binaryRatingMatrix' signature
# report metrics
rbind(table_random, table_pop, table_ubcf, table_ibcf, table_ALS_implicit)
# plot ROC and precicion/accuracy graphs
plot(results, annotate=c(1,3), legend="topright")
plot(results, "prec/rec", annotate=3, legend="topleft")
Thanks for the bug report. I think I have this problem fixed now. Please try the development version from github. I had to modify your code slightly.
library(recommenderlab)
library(arules)
# import MovieLens data and transform to binaryRatingMatrix
data(MovieLense)
data.bin = binarize(MovieLense, minRating=1)
# create recommender object using AL_implicit
r = Recommender(data.bin[1:500], method = "ALS_implicit",
parameter = list(lambda=0.1, n_factors=10,
n_iterations=10, seed = NULL, verbose = TRUE))
recom = predict(r, data.bin[501:502], n=7)
recom_topNList = predict(r, newdata = data.bin[501:502,], type = "topNList", n = 7)
as(recom_topNList, "list")
# create evaluation scheme
scheme <- evaluationScheme(data.bin[1:500], method="split", train=0.9, given=-5)
# list available methods for implicit data
recommenderRegistry$get_entries(dataType = "binaryRatingMatrix")
# form list of algorithms supporting implicit data
algorithms = list("random items" = list(name="RANDOM", param=NULL),
"popular items" = list(name="POPULAR", param=NULL),
"user-based CF" = list(name="UBCF", param=list(nn=50)),
"item-based CF" = list(name="IBCF", param=list(k=50)),
"ALS Implicit" = list(name="ALS_implicit", param=list(lambda=0.1, n_factors=10,
n_iterations=10, seed = NULL, verbose = TRUE)),
"Association Rules" = list(name="AR", param=NULL))
# output results of evaluation
results = evaluate(scheme, algorithms)
# calculate metrics for algorithms
# I had to fix the following code to work with binary data
accuracy_table <- function(scheme, algorithm, parameter){
r <- Recommender(getData(scheme, "train"), algorithm, parameter = parameter)
p <- predict(r, getData(scheme, "known"), type="topNList")
acc_list <- calcPredictionAccuracy(p, getData(scheme, "unknown"), given = -5)
total_list <- c(algorithm =algorithm, acc_list)
total_list <- total_list[sapply(total_list, function(x) !is.null(x))]
return(data.frame(as.list(total_list)))
}
# calculate accuracy metrics
table_random <- accuracy_table(scheme, algorithm = "RANDOM", parameter = NULL)
table_ubcf <- accuracy_table(scheme, algorithm = "UBCF", parameter = list(nn=50))
table_ibcf <- accuracy_table(scheme, algorithm = "IBCF", parameter = list(k=50))
table_pop <- accuracy_table(scheme, algorithm = "POPULAR", parameter = NULL)
table_ALS_implicit <- accuracy_table(scheme, algorithm = "ALS_implicit",
parameter = list(lambda=0.1, n_factors=10,
n_iterations=10, seed = NULL, verbose = TRUE))
# report metrics
rbind(table_random, table_pop, table_ubcf, table_ibcf, table_ALS_implicit)
# plot ROC and precicion/accuracy graphs
plot(results, annotate=c(1,3), legend="topright")
plot(results, "prec/rec", annotate=3, legend="topleft")
Hello Prof. Hahsler,
I also meet a error , when run the code you fix, there is a error
Error in matrix2[only_new_users, , drop = FALSE] :
invalid or not-yet-implemented 'Matrix' subsetting
Any assistance you could provide to clarify my queries would be greatly appreciated.
regards,
Alex
Please send me code that recreates this problem.
@mhahsler thanks. the code is same to you , i just test the ALS algorithm. but i meet error.