nitanilla/GH4RE

Clustering Corpus R

Closed this issue · 0 comments

docs <- tm_map(data, removeNumbers) 
removeURL<- function(x) gsub("http.* *", " ", x)
docs <- tm_map(docs, content_transformer(removeURL))
removeNonAlphanumeric<- function(x) gsub("[^[:alnum:]]", " ", x)
docs <- tm_map(docs, content_transformer(removeNonAlphanumeric))
docs <- tm_map(docs, stripWhitespace)
docs <- tm_map(docs, removeWords, stopwords('english'))