Non-deterministic FSM generation
Closed this issue · 3 comments
bicycle1885 commented
This is a note for myself. I need to remove nondeterminism of FSM generation before #16 because it may affect the benchmarks.
Here is the smallest example of nondeterminism I could make:
import Automa
import Automa.RegExp: @re_str
re = re"0?12|0?1*"
dot = Automa.machine2dot(Automa.compile(re))
write("test.0.dot", dot)
run(`dot -Tpng -o test.0.png test.0.dot`)
n = 0
for i in 1:100
dot′ = Automa.machine2dot(Automa.compile(deepcopy(re)))
if hash(dot′) != hash(dot)
n += 1
write("test.$(i).dot", dot′)
run(`dot -Tpng -o test.$(i).png test.$(i).dot`)
end
end
@show n
There are two possibilities:
Curiously, re"1*|2"
seems to be deterministic.
bicycle1885 commented
bicycle1885 commented
Combining re2nfa
and nfa2dfa
causes the problem.
import Automa
import Automa.RegExp: @re_str
re = re"0?11|0?12"
println("nfa2dot")
n = 0
for _ in 1:1000
nfa = Automa.re2nfa(re)
n += Automa.nfa2dot(nfa) != Automa.nfa2dot(nfa)
end
@show n
println("nfa2dot . re2nfa")
n = 0
for _ in 1:1000
n += Automa.nfa2dot(Automa.re2nfa(re)) != Automa.nfa2dot(Automa.re2nfa(re))
end
@show n
println("dfa2dot . nfa2dfa")
n = 0
for _ in 1:1000
nfa = Automa.re2nfa(re)
n += Automa.dfa2dot(Automa.nfa2dfa(nfa)) != Automa.dfa2dot(Automa.nfa2dfa(nfa))
end
@show n
println("dfa2dot . nfa2dfa . re2nfa")
n = 0
for _ in 1:1000
n += Automa.dfa2dot(Automa.nfa2dfa(Automa.re2nfa(re))) != Automa.dfa2dot(Automa.nfa2dfa(Automa.re2nfa(re)))
end
@show n
nfa2dot
n = 0
nfa2dot . re2nfa
n = 0
dfa2dot . nfa2dfa
n = 0
dfa2dot . nfa2dfa . re2nfa
n = 135
bicycle1885 commented
This kind of poor man's Dict
and Set
seem to solve the problem. So I think OrderedDict
and/or OrderedSet
still have nondeterministic operations.
type Dict{K,V} <: Associative{K,V}
data::Vector{Tuple{K,V}}
end
function Dict{K,V}() where {K,V}
return Dict{K,V}(Tuple{K,V}[])
end
function Dict(kvs::Pair{K,V}...) where {K,V}
return Dict{K,V}([(k, v) for (k, v) in kvs])
end
function Dict(vals)
return Dict([(k, v) for (k, v) in vals])
end
function Base.copy(dict::Dict{K,V}) where {K,V}
return Dict{K,V}(copy(dict.data))
end
function Base.length(dict::Dict)
return length(dict.data)
end
function Base.eltype(::Type{Dict{K,V}}) where {K,V}
return Tuple{K,V}
end
function Base.haskey(dict::Dict, key)
for (k, v) in dict.data
if k == key
return true
end
end
return false
end
function Base.getindex(dict::Dict, key)
for (k, v) in dict.data
if k == key
return v
end
end
throw(KeyError(key))
end
function Base.get!(dict::Dict, key, default)
if haskey(dict, key)
return dict[key]
end
dict[key] = default
return default
end
function Base.get!(f::Function, dict::Dict, key)
if haskey(dict, key)
return dict[key]
end
default = f()
dict[key] = default
return default
end
function Base.setindex!(dict::Dict, val, key)
for (i, (k, v)) in enumerate(dict.data)
if k == key
dict.data[i] = (key, val)
return dict
end
end
resize!(dict.data, length(dict.data) + 1)
dict.data[end] = (key, val)
return dict
end
function Base.start(dict::Dict)
return 1
end
function Base.done(dict::Dict, i)
return i > endof(dict.data)
end
function Base.next(dict::Dict, i)
return dict.data[i], i + 1
end
type Set{T} <: AbstractSet{T}
dict::Dict{T,Void}
end
function Set{T}() where T
return Set{T}(Dict{T,Void}())
end
function Set(vals)
return Set(Dict([(v, nothing) for v in vals]))
end
function Base.length(set::Set)
return length(set.dict)
end
function Base.eltype(::Type{Set{T}}) where T
return T
end
function Base.haskey(set::Set, val)
return haskey(set.dict, val)
end
function Base.push!(set::Set, val)
if !haskey(set, val)
set.dict[val] = nothing
end
return set
end
function Base.pop!(set::Set)
return pop!(set.dict.data)[1]
end
function Base.delete!(set::Set, val)
for (i, (v, _)) in enumerate(set.dict.data)
if v == val
deleteat!(set.dict.data, i)
break
end
end
return set
end
function Base.union!(set::Set, xs)
for x in xs
push!(set, x)
end
return set
end
function Base.union(set::Set, xs)
return union!(copy(set), xs)
end
function Base.start(set::Set)
return start(set.dict)
end
function Base.done(set::Set, s)
return done(set.dict, s)
end
function Base.next(set::Set, s)
item, s = next(set.dict, s)
return item[1], s
end
function Base.copy(set::Set{T}) where T
return Set{T}(copy(set.dict))
end
function Base.filter(p::Function, set::Set{T}) where T
ret = Set{T}()
for x in set
if p(x)
push!(ret, x)
end
end
return ret
end