/go-tf-idf

A small Go implementation of tf-idf (term frequency-inverse document frequency).

Primary LanguageGoMIT LicenseMIT

go-tf-idf

Coverage Status

A small Go implementation of tf-idf (term frequency-inverse document frequency) with support for comparing documents using cosine similarities.

Usage

Install with go get -u github.com/dkgv/go-tf-idf.

package main

import (
    "fmt"
    go_tf_idf "github.com/dkgv/go-tf-idf"
)

func main() {
    // Initializing a tf-idf container 
    doc1 := "this is a document"
    doc2 := "and this is another document"
    tfidf := go_tf_idf.New(
        go_tf_idf.WithDocuments([]string{doc1, doc2}),
        go_tf_idf.WithDefaultStopWords(),
    )

    // Calculating tf-idf for a term
    term := "document"
    res1 := tfidf.TermFrequencyInverseDocumentFrequencyForTerm(term, doc1)
    fmt.Printf("res1 %f", res1)

    // Comparing two documents via cosine similarity
    comparator := go_tf_idf.CosineComparator
    similarity, err := tfidf.Compare(doc1, doc2, comparator)
    if err != nil {
        // ...
    }
	
    fmt.Printf("similarity %f", similarity)
}