6m segfault at compilation on linux/amd64
henesy opened this issue · 3 comments
Building
The file that crashes 6m, this is not minimized, apologies:
use std
use bio
pkg adb =
const ingest : (f : byte[:] -> std.result(db, byte[:]))
/* Types */
// An attribute=value pair
type attribute = struct
key : byte[:]
value : byte[:]
;;
// A tuple composed of attributes
type tuple = struct
pairs : attribute[256]
;;
// An individual record composed of tuples
type record = struct
tuples : tuple[256]
;;
// A parsed attrdb database
type db = struct
records : record[256]
;;
// The results from a query
type query = union
`Some
`None
;;
/* State */
var chatty = false
;;
/* ???
Are traits in myr powerful enough to implement find(@f) as :: findable on top of:
{tuple[:], record[:], attribute[:]}
???
*/
// Open and parse a file
const ingest = {path : byte[:]
var f, db
match bio.open(path, bio.Rd)
| `std.Ok file:
f = file
| `std.Err e:
-> `std.Err e
;;
// States for parsing state machine
type states = union
`SingleQuote uint64 // The starting character point for the quote
`DoubleQuote uint64 // ^
`Comment
`Newline
`Key
`Value
;;
// Current state
var state = `Newline
// Last state
var last = state
// Current line for errors - tick by passing \n
var line = 1
// Current character for errors - tick each iteration
var cn = 0
// String builder buffer
var buf = std.mksb() // Memory leak? - need init for compiler to ☺
// Index trackers for slices
var ki = 0, vi = 0, pi = 0, ti = 0, ri = 0
// Parse into db
for c : bio.bychar(f)
last = state
cn++
if c == '\n'
line++
cn = 0
;;
if chatty
// Nice printing of each character
std.put("→ ")
match c
| '\n':
std.put("\\n")
| '\t':
std.put("\\t")
| ' ':
std.put("_")
| _:
std.put("{}", c)
;;
std.put(" ({})\n", state)
;;
match state
| `Comment:
// Read through until end of line
if c == '\n'
state = `Newline
;;
continue
| _:
;
;;
match c
| '#':
// Allow comments at the beginning of lines and after values
match state
| `Newline:
;
| `Value:
;
| _:
-> `std.Err std.fmt("invalid comment beginning at {}:{}:{}", path, line, cn)
;;
state = `Comment
| '\n':
var str = std.sbpeek(buf)
std.sbtrim(buf, 0)
if str.len < 1
// Empty string, discard value
;
else
std.put("» finished value (\\n): {}\n", str)
db.records[ri].tuples[ti].pairs[pi].value = str
;;
state = `Newline
| '\'':
match state
| `SingleQuote origin:
// In a quote already
match bio.peekc(f)
| `std.Ok next:
match next
| '\'':
// A '' in a quote
if ! std.sbputc(buf, '\'')
-> `std.Err std.fmt("could not insert ' at {}:{}:{}", path, line, cn)
;;
// Consume the next quote
bio.getc(f)
| _:
// Ending a quote
var str = std.sbpeek(buf)
std.sbtrim(buf, 0)
if str.len < 1
// Empty string, discard value
else
std.put("» finished value ('): {}\n", str)
db.records[ri].tuples[ti].pairs[pi].value = str
;;
std.put("»» committing previous pair (')\n")
//tuple.pairs = std.slpush(&tuple.pairs, pair)
pi++
state = `Key
buf = std.mksb()
;;
| `std.Err e:
-> `std.Err std.fmt("single quote peek failed at {}:{}:{}", path, line, cn)
;;
| `Value:
// Beginning a quote
state = `SingleQuote cn
| `DoubleQuote origin:
std.sbputc(buf, c)
| _:
-> `std.Err std.fmt("unexpected single quote at {}:{}:{}", path, line, cn)
;;
| '\"':
match state
| `DoubleQuote origin:
// In a quote already
match bio.peekc(f)
| `std.Ok next:
match next
| '\"':
// A "" in a quote
if ! std.sbputc(buf, '\"')
-> `std.Err std.fmt("could not insert \" at {}:{}:{}", path, line, cn)
;;
// Consume the next quote
bio.getc(f)
| _:
// Ending a quote
var str = std.sbpeek(buf)
std.sbtrim(buf, 0)
if str.len < 1
// Empty string, discard value
else
std.put("» finished value (\"): {}\n", str)
db.records[ri].tuples[ti].pairs[pi].value = str
;;
std.put("»» committing previous pair (\")\n")
//tuple.pairs = std.slpush(&tuple.pairs, pair)
pi++
state = `Key
buf = std.mksb()
;;
| `std.Err e:
-> `std.Err std.fmt("double quote peek failed at {}:{}:{}", path, line, cn)
;;
| `Value:
// Beginning a quote
state = `DoubleQuote cn
| `SingleQuote origin:
std.sbputc(buf, c)
| _:
-> `std.Err std.fmt("unexpected double quote at {}:{}:{}", path, line, cn)
;;
| ' ':
match state
| `Newline:
// This is necessary, apparently
// Start a new sub-tuple of a record
std.put("»» committing previous tuple to record (_)\n")
/*
record.tuples = std.slpush(&record.tuples, tuple)
tuple.pairs = std.slzalloc(0)
*/
ti++
pi = 0
std.put("»» start sub-tuple (_)\n")
state = `Key
buf = std.mksb()
| `Key:
// Allow to passthrough, might be a mistake?
;
| `Value:
var str = std.sbpeek(buf)
std.sbtrim(buf, 0)
if str.len < 1
// Empty string, discard value
else
std.put("» finished value (_): {}\n", str)
db.records[ri].tuples[ti].pairs[pi].value = str
;;
std.put("»» committing previous pair (_)\n")
//tuple.pairs = std.slpush(&tuple.pairs, pair)
pi++
state = `Key
buf = std.mksb()
| `SingleQuote origin:
std.sbputc(buf, c)
| `DoubleQuote origin:
std.sbputc(buf, c)
| _:
-> `std.Err std.fmt("invalid space at {}:{}:{}", path, line, cn)
;;
| '\t':
match state
| `Newline:
// This is necessary, apparently
// Start a new sub-tuple of a record
state = `Key
buf = std.mksb()
std.put("»» committing previous tuple (\\t)\n")
/*
record.tuples = std.slpush(&record.tuples, tuple)
tuple.pairs = std.slzalloc(0)
*/
ti++
pi = 0
std.put("»» start sub-tuple (\\t)\n")
| `Key:
// Allow to passthrough, might be a mistake?
;
| `Value:
var str = std.sbpeek(buf)
std.sbtrim(buf, 0)
if str.len < 1
// Empty string, discard value
else
std.put("» finished value (\\t): {}\n", str)
db.records[ri].tuples[ti].pairs[pi].value = str
;;
std.put("»» committing previous pair (\\t)\n")
//tuple.pairs = std.slpush(&tuple.pairs, pair)
pi++
state = `Key
buf = std.mksb()
| `SingleQuote origin:
std.sbputc(buf, c)
| `DoubleQuote origin:
std.sbputc(buf, c)
| _:
-> `std.Err std.fmt("invalid tab at {}:{}:{}", path, line, cn)
;;
| '=':
var str = std.sbpeek(buf)
std.sbtrim(buf, 0)
if str.len < 1
// Empty string, disallowed for keys
-> `std.Err std.fmt("empty key at {}:{}:{}", path, line, cn)
else
std.put("» finished key (=): {}\n", str)
db.records[ri].tuples[ti].pairs[pi].key = str
;;
state = `Value
buf = std.mksb()
| _:
// A key or value text segment
match state
| `Key:
if ! std.sbputc(buf, c)
-> `std.Err std.fmt("could not insert key char at {}:{}:{}", path, line, cn)
;;
| `Value:
std.sbputc(buf, c)
| `SingleQuote origin:
std.sbputc(buf, c)
| `DoubleQuote origin:
std.sbputc(buf, c)
| `Newline:
// Start a new record
// TODO - do we need to push the last pair and tuple as well?
if ti > 0
std.put("»» committing previous record\n")
//db.records = std.slpush(&db.records, record)
std.put("»» # tuples in rec = {}\n", ti)
ri++
ti = 0
pi = 0
;;
//record.tuples = std.slzalloc(0)
//tuple.pairs = std.slzalloc(0)
ti = 0
pi = 0
std.put("»» start record\n")
std.put("»» start master tuple\n")
state = `Key
std.sbfree(buf)
buf = std.mksb()
std.sbputc(buf, c)
| _:
-> `std.Err std.fmt("unexpected character at {}:{}:{}", path, line, cn)
;;
;;
;;
// TODO - if there is a tuple (or key= only pair) dangling, be sure to append it
// Check key.len?
if ti > 0
// TODO - do we need to push the last pair and tuple as well?
std.put("»» committing dangling record\n")
//db.records = std.slpush(&db.records, record)
ri++
;;
var nr = 0, nt = 0, np = 0
for r : db.records
nr++
std.put("nr = {}\n", nr)
for t : r.tuples
nt++
std.put("nt = {}\n", nt)
for p : t.pairs
std.put("np = {}\n", np)
np++
if p.key.len < 1
std.put("¡! 0 length key at {}/{}/{}\n", nr, nt, np)
elif p.value.len < 1
std.put("¡! 0 length value at {}/{}/{}\n", nr, nt, np)
else
std.put(" {}={}\n", p.key, p.value)
;;
;;
;;
;;
/* Correct:
nr = 4
nt = 12
np = 17
*/
std.put("# nr = {}\n# nt = {}\n# np = {}\n", nr, nt, np)
-> `std.Ok db
}
Segfaulting
Compilation:
; 6m -O obj adb.myr
segmentation violation--core dumped
;
Running and interrupting valgrind with SIGINT:
; valgrind 6m -O obj adb.myr
==24845== Memcheck, a memory error detector
==24845== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==24845== Using Valgrind-3.15.0 and LibVEX; rerun with -h for copyright info
==24845== Command: 6m -O obj adb.myr
==24845==
^?==24845==
==24845== Process terminating with default action of signal 2 (SIGINT)
==24845== at 0x483DF50: strlen (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==24845== by 0x4914532: strdup (strdup.c:41)
==24845== by 0x13A409: mklbl (node.c:270)
==24845== by 0x13A505: genlbl (node.c:291)
==24845== by 0x15050E: nextnode (match.c:172)
==24845== by 0x150C26: addwildrec (match.c:326)
==24845== by 0x150D95: addwildrec (match.c:344)
==24845== by 0x150C7F: addwildrec (match.c:330)
==24845== by 0x150D95: addwildrec (match.c:344)
==24845== by 0x15104F: addwild (match.c:392)
==24845== by 0x152166: addpat (match.c:662)
==24845== by 0x152901: genonematch (match.c:788)
==24845==
==24845== HEAP SUMMARY:
==24845== in use at exit: 18,349,457 bytes in 232,547 blocks
==24845== total heap usage: 459,939 allocs, 227,392 frees, 28,815,981,120 bytes allocated
==24845==
==24845== LEAK SUMMARY:
==24845== definitely lost: 167,279 bytes in 3,197 blocks
==24845== indirectly lost: 68,377 bytes in 1,385 blocks
==24845== possibly lost: 72 bytes in 1 blocks
==24845== still reachable: 18,113,729 bytes in 227,964 blocks
==24845== suppressed: 0 bytes in 0 blocks
==24845== Rerun with --leak-check=full to see details of leaked memory
==24845==
==24845== For lists of detected and suppressed errors, rerun with: -s
==24845== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
;
@henesy I suspect you were using an older compiler. The latest does not have functions such as addwildrec
.
@henesy
I gave the code a try. The compiler wouldn't crash in the match compiler using the latest.
But the type db
is of the size 32256256*256 = 536,870,912 bytes.
The backend would try to generate instructions for each word of the object, which thus explodes the compiler.
See https://github.com/oridb/mc/blob/master/6/simp.c#L1243 and https://github.com/oridb/mc/blob/master/6/isel.c#L450-L455 where the compiler is generating the code for -> std.Err e
(line 60) in adb.myr.
Thanks :)
This explanation makes sense, thank you.
It would be nice if the compiler handled this kind of overflow sort of thing more gracefully, but on the other hand the data structure was a little absurd :)