elsoroka/Satisfiability.jl

Add support for unicode

mykelk opened this issue · 4 comments

This works:

@satvariable(alpha, Real)
sat!(alpha  1)

but this does not:

@satvariable(α, Real)
sat! 1)

It results in:

┌ Error: Solver error:
│  (error "line 4 column 13: unexpected character")
│ (error "line 4 column 14: unexpected character")
│ (error "line 5 column 12: unexpected character")
│ (error "line 5 column 13: unexpected character")
│ sat
└ @ Satisfiability [C:\Users\mykel\.julia\dev\Satisfiability\src\call_solver.jl:192](file:///C:/Users/mykel/.julia/dev/Satisfiability/src/call_solver.jl:192)

Oof, I see what happens here...the solver doesn't like unicode but Julia does

julia> using Satisfiability
julia> @satvariable(α, Bool)
julia> @satvariable(β, Bool)
julia> smt(α∧β)

outputs

(declare-fun α () Bool)
(declare-fun β () Bool)
(assert (and α β))

From the SMT-LIB definition: "Most lexical tokens defined below are limited to US-ASCII printable characters, namely,
characters 32dec to 126dec. The remaining printable characters, mostly used for non-English
alphabet letters (characters 128dec and beyond), are allowed in string literals, quoted symbols,
and comments." (Page 21 https://smt-lib.org/papers/smt-lib-reference-v2.6-r2021-05-12.pdf)

I agree it makes sense to accept Unicode variable names in Julia, so it seems there needs to be a translation mechanism such that there is a one-to-one mapping of Unicode variable names in Julia and ASCII names in SMT-LIB.

Here is a translation function produced by ChatGPT 4o that might do the trick:

using Unicode

function convert_to_ascii(input_string::String)::String
    output_string = ""
    for char in input_string
        if char <= '\x7F'
            output_string *= char
        else
            # Use a placeholder for non-ASCII characters
            unicode_repr = string(Char(char))
            placeholder = string("<u+", hex(Char(char), 4), ">")
            output_string *= placeholder
        end
    end
    return output_string
end

function decode_from_ascii(ascii_string::String)::String
    output_string = ""
    i = 1
    while i <= length(ascii_string)
        if i <= length(ascii_string) - 8 && ascii_string[i:i+2] == "<u+" && ascii_string[i+7] == '>'
            # Extract Unicode code point from placeholder
            code_point_str = ascii_string[i+3:i+6]
            code_point = parse(Int, code_point_str, base=16)
            output_string *= Char(code_point)
            i += 8  # Skip past the placeholder
        else
            output_string *= ascii_string[i]
            i += 1
        end
    end
    return output_string
end

# Example usage
input_string = "Héllo, wörld! 🌍"
ascii_string = convert_to_ascii(input_string)
println(ascii_string)  # Output: "H<u+00e9>llo, w<u+00f6>rld! <u+1f30d>"

decoded_string = decode_from_ascii(ascii_string)
println(decoded_string)  # Output: "Héllo, wörld! 🌍"

so chatGPT's code did not quite work, but I fixed it and added tests for this issue in PR #52