/head-dictionary

Primary LanguageRustMIT LicenseMIT

PrefixDictionary

A PrefixDictionary is like a dictionary, but enabling the capacity to search by prefix. It's underlying structure is a n-ary tree.

It operates only on Strings. It's (way) slower to find individual elements (hashmap::contains), but faster to find all words starting with a prefix.

Timer precision: 100 ns
all                             fastest       │ slowest       │ median        │ mean          │ samples │ iters
╰─ benches                                    │               │               │               │         │
   ├─ all_words_starting_with                 │               │               │               │         │
   │  ├─ hash_set                             │               │               │               │         │
   │  │  ├─ Aaron               95.29 µs      │ 178.9 µs      │ 96.89 µs      │ 99.55 µs      │ 100     │ 100
   │  │  ├─ just                92.59 µs      │ 395.2 µs      │ 98.59 µs      │ 105.8 µs      │ 100     │ 100
   │  │  ├─ not_present_at_all  47.59 µs      │ 82.89 µs      │ 48.04 µs      │ 50.82 µs      │ 100     │ 100
   │  │  ╰─ wagons              101.7 µs      │ 242.5 µs      │ 107.2 µs      │ 112 µs        │ 100     │ 100
   │  ╰─ head_dict                            │               │               │               │         │
   │     ├─ Aaron               165.3 ns      │ 298.2 ns      │ 181 ns        │ 179 ns        │ 100     │ 6400
   │     ├─ just                10.09 µs      │ 24.19 µs      │ 11.29 µs      │ 12.12 µs      │ 100     │ 100
   │     ├─ not_present_at_all  48.98 ns      │ 72.42 ns      │ 49.37 ns      │ 50.43 ns      │ 100     │ 25600
   │     ╰─ wagons              176.3 ns      │ 409.1 ns      │ 187.2 ns      │ 188.8 ns      │ 100     │ 6400
   ╰─ contains_word                           │               │               │               │         │
      ├─ hash_set                             │               │               │               │         │
      │  ├─ Aaron               16.37 ns      │ 22.13 ns      │ 16.46 ns      │ 16.84 ns      │ 100     │ 102400
      │  ├─ just                12.66 ns      │ 196.7 ns      │ 12.75 ns      │ 14.91 ns      │ 100     │ 102400
      │  ├─ not_present_at_all  15.49 ns      │ 28.28 ns      │ 15.59 ns      │ 15.92 ns      │ 100     │ 102400
      │  ╰─ wagons              16.17 ns      │ 179.4 ns      │ 16.95 ns      │ 21.02 ns      │ 100     │ 12800
      ╰─ head_dict                            │               │               │               │         │
         ├─ Aaron               70.86 ns      │ 120 ns        │ 71.25 ns      │ 72.83 ns      │ 100     │ 25600
         ├─ just                1.862 µs      │ 5.649 µs      │ 2.012 µs      │ 2.178 µs      │ 100     │ 800
         ├─ not_present_at_all  47.81 ns      │ 112.2 ns      │ 48.2 ns       │ 49.72 ns      │ 100     │ 25600
         ╰─ wagons              76.33 ns      │ 286.4 ns      │ 77.89 ns      │ 89.94 ns      │ 100     │ 12800

Pseudo code

a = PrefixDictionary("dictionary", "lapin", "lapins", "lapine")

  • a.contains("dict") -> as_prefix because "dict" is not in the word list but only a prefix
  • a.contains("dictionary") -> as_word because "dictionary" is an actual word and no longer word exist
  • a.contains("lapin") -> as_prefix | as_word because "lapin" is both a word and a prefix (for "lapins" and "lapine")
  • a.contains("tutu") -> None because tutu is not in the dictionary

Note

In the examples above, only the first t of the word "tutu" will be read as we know (from the structure of dictionary), that no words inserted starts with the letter t.

Example

pub fn test_simple_1() {
    let mut dict = PrefixDictionary::new();
    dict.feed_str(["dictionary", "lapin", "lapins", "lapine"]);
    assert_eq!(4, dict.len());

    let mut result = dict.contains_str("dict");
    assert!(result.is_some());
    assert_eq!(result.unwrap(), SearchResult::AS_PREFIX);

    result = dict.contains_str("lapin");
    assert!(result.is_some());
    assert_eq!(result.unwrap(), SearchResult::AS_PREFIX | SearchResult::AS_WORD);

    result = dict.contains_str("dictionary");
    assert!(result.is_some());
    assert_eq!(result.unwrap(), SearchResult::AS_WORD);

    result = dict.contains_str("tutu");
    assert!(result.is_none());
}