/ostrich

WIP: Full text search engine library written in Go with 1.18+ Generics, heavily inspired by Tantivy

Primary LanguageGoMIT LicenseMIT

ostrich

License: MIT Test Codecov Go Report Card

Full text search engine library written in Go with 1.18+ Generics, heavily inspired by Tantivy

※ This library is not production ready, don't use it in production.

Features

  • Full-text search
  • Configurable analyzer
  • Concurrent indexing in batch
  • Segment merge with LogMergePolicy
  • Mmap directory
  • Natural query language (e.g. "(go OR golang) AND (search or fts)")
  • Concurrent search
  • TF-IDF scoring (will be replaced with BM25)

Supported field types:

  • Text

Supported query types:

  • Term, Conjunction, Disjunction, Boolean

※ We'll support more and more types

Example

package main

import (
	"fmt"

	"github.com/k-yomo/ostrich/analyzer"
	"github.com/k-yomo/ostrich/collector"
	"github.com/k-yomo/ostrich/index"
	"github.com/k-yomo/ostrich/indexer"
	"github.com/k-yomo/ostrich/query"
	"github.com/k-yomo/ostrich/reader"
	"github.com/k-yomo/ostrich/schema"
)

func main() {
	indexSchema := schema.NewSchema()
	analyzer.Register("en_stem", analyzer.NewEnglishAnalyzer())
	phraseField := indexSchema.AddTextField("phrase", "en_stem")
	descriptionField := indexSchema.AddTextField("description", "en_stem")

	idx, err := index.NewBuilder(indexSchema).OpenOrCreate("tmp")
	if err != nil {
		panic(err)
	}

	indexWriter, err := indexer.NewIndexWriter(idx, 100_000_000)
	if err != nil {
		panic(err)
	}
	defer indexWriter.Close()

	doc := &schema.Document{
		FieldValues: []*schema.FieldValue{
			{
				FieldID: phraseField,
				Value:   "When the Rubber Hits the Road",
			},
			{
				FieldID: descriptionField,
				Value:   "When something is about to begin, get serious, or put to the test.",
			},
		},
	}
	indexWriter.AddDocument(doc)
	if _, err := indexWriter.Commit(); err != nil {
		panic(err)
	}

	indexReader, err := reader.NewIndexReader(idx)
	if err != nil {
		panic(err)
	}
	defer indexReader.Close()

	queryParser := query.NewParser(idx.Schema(), idx.Schema().FieldIDs())
	q, err := queryParser.Parse("phrase:hat OR description:serious")
	if err != nil {
		panic(err)
	}
	tupleCollector := collector.NewTupleCollector(
		collector.NewTopScoreCollector(10, 0),
		collector.NewCountCollector(),
	)

	searcher := indexReader.Searcher()
	tupleResult, err := reader.Search(searcher, q, tupleCollector)
	if err != nil {
		panic(err)
	}

	hits := tupleResult.Left
	count := tupleResult.Right
	fmt.Println("total hit:", count)
	for _, hit := range hits {
		fmt.Printf("docAddress: %+v, score: %v\n", hit.DocAddress, hit.Score)
	}
}