sean-public/fast-skiplist

i hava a test to show. skiplist is not fast!

474420502 opened this issue · 3 comments

package main

import (
"bytes"
"encoding/gob"
"io/ioutil"
"log"
"os"
"testing"

pqueuekey "github.com/474420502/focus/priority_queuekey"
skiplist "github.com/sean-public/fast-skiplist"

"github.com/474420502/focus/compare"
"github.com/Pallinder/go-randomdata"

)

// TestCreateData Before Benchmark, Execute the func for test
func TestCreateData(t *testing.T) {

f, err := os.OpenFile("l.log", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0666)
if err != nil {
	log.Println(err)
}

var l []int

m := make(map[int]int)
for i := 0; len(l) < CompartorSize; i++ {
	v := randomdata.Number(0, NumberMax)
	if _, ok := m[v]; !ok {
		m[v] = v
		l = append(l, v)
	}
}

var result bytes.Buffer
encoder := gob.NewEncoder(&result)
encoder.Encode(l)
lbytes := result.Bytes()
f.Write(lbytes)

}

// BenchmarkSkiplist skiplist Benchmark
func BenchmarkSkiplist(b *testing.B) {

l := loadTestData()
execCount := 5
b.N = len(l) * execCount

var temp []float64
for _, v := range l {
	temp = append(temp, float64(v))
}

b.ResetTimer()
b.StartTimer()

for i := 0; i < execCount; i++ {
	pq := skiplist.New()
	for _, v := range temp {
		pq.Set(v, v)
	}
}

}

// BenchmarkPriorityQueue like skiplist tree
func BenchmarkPriorityQueue(b *testing.B) {

l := loadTestData()
execCount := 5
b.N = len(l) * execCount

b.ResetTimer()
b.StartTimer()

for i := 0; i < execCount; i++ {
	pq := pqueuekey.New(compare.Int)
	for _, v := range l {
		pq.Push(v, v)
	}
}

}

const CompartorSize = 1000000
const NumberMax = 50000000

func loadTestData() []int {
data, err := ioutil.ReadFile("./l.log")
if err != nil {
log.Println(err)
}
var l []int
decoder := gob.NewDecoder(bytes.NewReader(data))
decoder.Decode(&l)
return l
}

Thank you for reporting what may be a performance issue. Can you describe what your code sample is doing and what is in the l.log file? Is it sorted and, if so, is it ascending or descending?

What is the expected result and what are you seeing instead?

I think there are some formatting problems when viewing your post, can you try editing so all the code is in a single formatted block.

package main

import (
	"bytes"
	"encoding/gob"
	"io/ioutil"
	"log"
	"os"
	"testing"

	pqueuekey "github.com/474420502/focus/priority_queuekey"
	skiplist "github.com/sean-public/fast-skiplist"

	"github.com/474420502/focus/compare"
	"github.com/Pallinder/go-randomdata"
)

// TestCreateData Before Benchmark, Execute the func for test
func TestCreateData(t *testing.T) {

	f, err := os.OpenFile("l.log", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0666)
	if err != nil {
		log.Println(err)
	}

	var l []int

	m := make(map[int]int)
	for i := 0; len(l) < CompartorSize; i++ {
		v := randomdata.Number(0, NumberMax)
		if _, ok := m[v]; !ok {
			m[v] = v
			l = append(l, v)
		}
	}

	var result bytes.Buffer
	encoder := gob.NewEncoder(&result)
	encoder.Encode(l)
	lbytes := result.Bytes()
	f.Write(lbytes)

}

// BenchmarkSkiplist  skiplist Benchmark
func BenchmarkSkiplist(b *testing.B) {

	l := loadTestData()
	execCount := 5
	b.N = len(l) * execCount

	var temp []float64
	for _, v := range l {
		temp = append(temp, float64(v))
	}

	b.ResetTimer()
	b.StartTimer()

	for i := 0; i < execCount; i++ {
		pq := skiplist.New()
		for _, v := range temp {
			pq.Set(v, v)
		}
	}
}

// BenchmarkPriorityQueue like skiplist tree
func BenchmarkPriorityQueue(b *testing.B) {

	l := loadTestData()
	execCount := 5
	b.N = len(l) * execCount

	b.ResetTimer()
	b.StartTimer()

	for i := 0; i < execCount; i++ {
		pq := pqueuekey.New(compare.Int)
		for _, v := range l {
			pq.Push(v, v)
		}
	}
}

const CompartorSize = 1000000
const NumberMax = 50000000

func loadTestData() []int {
	data, err := ioutil.ReadFile("./l.log")
	if err != nil {
		log.Println(err)
	}
	var l []int
	decoder := gob.NewDecoder(bytes.NewReader(data))
	decoder.Decode(&l)
	return l
}

image
image

@sean-public

// maxLevel has to be int(math.Ceil(math.Log(N))) for DefaultProbability (where N is an upper bound on the
// number of elements in a skip list). See http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.17.524

The default number of levels is 18.

You might try calling NewWithMaxLevel(maxLevel int) instead of New() with levels around 14. As noted in the function's comment, I'm calculating the natural log of a million (your upper bound of samples), which just under 14. This is one of two variables I have exposed to fine-tune performance. As maxLevel grows, we would add more "height" to the data structure, allowing it to be traversed more quickly in exchange for increased memory consumption. For every data set, there is an optimal value. I think you can see a small improvement in execution time by finding it.

The main purpose for this skip list implementation is to store time series data, which generally arrives in order and must remain ordered for quickly recalling ranges to satisfy queries. Therefore, the optimizations include "search fingers", which bookmark the last few locations referenced. The access pattern in your test is completely random and not optimal for this data structure, so performance is just "ok".

Finally, comparing a skip list to a priority queue can be tricky; skip lists are simple and easy to understand (and not hard to implement correctly). On insert, they are doing quite a bit of work, especially if the keys are always random. A priority queue has to juggle binary tree rotations, which is still work, but it's more of a fixed cost for evenly distributed (random) keys.


For these reasons, I didn't include a random insertion benchmark and would not recommend fast-skiplist for use cases where extreme performance matters and keys will be inserted randomly. I do recommend it when dealing with time series or other "mostly-ordered" data, which we encounter often in the real world.