oridb/mc

6m segfault at compilation on linux/amd64

henesy opened this issue · 3 comments

Building

The file that crashes 6m, this is not minimized, apologies:

use std
use bio

pkg adb =
	const ingest	: (f : byte[:]	-> std.result(db,			byte[:]))

	/* Types */

	// An attribute=value pair
	type attribute = struct
		key		: byte[:]
		value	: byte[:]
	;;

	// A tuple composed of attributes
	type tuple = struct
		pairs : attribute[256]
	;;

	// An individual record composed of tuples
	type record = struct
		tuples : tuple[256]
	;;

	// A parsed attrdb database
	type db = struct
		records : record[256]
	;;

	// The results from a query
	type query = union
		`Some
		`None
	;;

	/* State */

	var chatty = false
;;

/* ???

	Are traits in myr powerful enough to implement find(@f) as :: findable on top of:

		{tuple[:], record[:], attribute[:]}

	???
*/


// Open and parse a file
const ingest = {path : byte[:]
	var f, db

	match bio.open(path, bio.Rd)
	| `std.Ok	file:
		f = file

	| `std.Err	e:
		-> `std.Err e
	;;

	// States for parsing state machine
	type states = union
	`SingleQuote	uint64		// The starting character point for the quote
	`DoubleQuote	uint64		// ^
	`Comment
	`Newline
	`Key
	`Value
	;;

	// Current state
	var state	= `Newline

	// Last state
	var last	= state

	// Current line for errors - tick by passing \n
	var line	= 1

	// Current character for errors - tick each iteration
	var cn		= 0

	// String builder buffer
	var buf		= std.mksb()	// Memory leak? - need init for compiler to ☺

	// Index trackers for slices
	var ki = 0, vi = 0, pi = 0, ti = 0, ri = 0

	// Parse into db
	for c : bio.bychar(f)
		last = state

		cn++

		if c == '\n'
			line++
			cn = 0
		;;

		if chatty
			// Nice printing of each character
			std.put("→ ")
			match c
			| '\n':
				std.put("\\n")
			| '\t':
				std.put("\\t")
			| ' ':
				std.put("_")
			| _:
				std.put("{}", c)
			;;
			std.put("				({})\n", state)
		;;

		match state
		| `Comment:
			// Read through until end of line
			if c == '\n'
				state = `Newline
			;;

			continue
		| _:
			;
		;;

		match c
		| '#':
			// Allow comments at the beginning of lines and after values
			match state
			| `Newline:
				;
			| `Value:
				;
			| _:
				-> `std.Err std.fmt("invalid comment beginning at {}:{}:{}", path, line, cn)
			;;

			state = `Comment

		| '\n':
			var str = std.sbpeek(buf)
			std.sbtrim(buf, 0)

			if str.len < 1
				// Empty string, discard value
				;
			else
				std.put("» finished value (\\n): {}\n", str)
				db.records[ri].tuples[ti].pairs[pi].value = str
			;;

			state = `Newline

		| '\'':
			match state
			| `SingleQuote origin:
				// In a quote already

				match bio.peekc(f)
				| `std.Ok	next:
					match next
					| '\'':
						// A '' in a quote
						if ! std.sbputc(buf, '\'')
							-> `std.Err std.fmt("could not insert ' at {}:{}:{}", path, line, cn)
						;;

						// Consume the next quote
						bio.getc(f)
					| _:
						// Ending a quote
						var str = std.sbpeek(buf)
						std.sbtrim(buf, 0)

						if str.len < 1
							// Empty string, discard value
						else
							std.put("» finished value ('): {}\n", str)

							db.records[ri].tuples[ti].pairs[pi].value = str
						;;

						std.put("»» committing previous pair (')\n")

						//tuple.pairs = std.slpush(&tuple.pairs, pair)

						pi++

						state = `Key
						buf = std.mksb()
					;;
				| `std.Err	e:
					-> `std.Err std.fmt("single quote peek failed at {}:{}:{}", path, line, cn)
				;;

			| `Value:
				// Beginning a quote
				state = `SingleQuote cn

			| `DoubleQuote origin:
				std.sbputc(buf, c)

			| _:
				-> `std.Err std.fmt("unexpected single quote at {}:{}:{}", path, line, cn)
			;;

		| '\"':
			match state
			| `DoubleQuote origin:
				// In a quote already

				match bio.peekc(f)
				| `std.Ok	next:
					match next
					| '\"':
						// A "" in a quote
						if ! std.sbputc(buf, '\"')
							-> `std.Err std.fmt("could not insert \" at {}:{}:{}", path, line, cn)
						;;

						// Consume the next quote
						bio.getc(f)
					| _:
						// Ending a quote
						var str = std.sbpeek(buf)
						std.sbtrim(buf, 0)

						if str.len < 1
							// Empty string, discard value
						else
							std.put("» finished value (\"): {}\n", str)
							db.records[ri].tuples[ti].pairs[pi].value = str
						;;

						std.put("»» committing previous pair (\")\n")

						//tuple.pairs = std.slpush(&tuple.pairs, pair)

						pi++

						state = `Key
						buf = std.mksb()
					;;
				| `std.Err	e:
					-> `std.Err std.fmt("double quote peek failed at {}:{}:{}", path, line, cn)
				;;

			| `Value:
				// Beginning a quote
				state = `DoubleQuote cn

			| `SingleQuote origin:
				std.sbputc(buf, c)

			| _:
				-> `std.Err std.fmt("unexpected double quote at {}:{}:{}", path, line, cn)
			;;

		| ' ':

			match state
			| `Newline:
				// This is necessary, apparently
				// Start a new sub-tuple of a record
				std.put("»» committing previous tuple to record (_)\n")

				/*
				record.tuples = std.slpush(&record.tuples, tuple)
				tuple.pairs = std.slzalloc(0)
				*/

				ti++
				pi = 0

				std.put("»» start sub-tuple (_)\n")
				state = `Key
				buf = std.mksb()

			| `Key:
				// Allow to passthrough, might be a mistake?
				;

			| `Value:
				var str = std.sbpeek(buf)
				std.sbtrim(buf, 0)

				if str.len < 1
					// Empty string, discard value
				else
					std.put("» finished value (_): {}\n", str)
					db.records[ri].tuples[ti].pairs[pi].value = str
				;;

				std.put("»» committing previous pair (_)\n")

				//tuple.pairs = std.slpush(&tuple.pairs, pair)
				pi++

				state = `Key
				buf = std.mksb()

			| `SingleQuote origin:
				std.sbputc(buf, c)

			| `DoubleQuote origin:
				std.sbputc(buf, c)

			| _:
				-> `std.Err std.fmt("invalid space at {}:{}:{}", path, line, cn)
			;;

		| '\t':
			match state
			| `Newline:
				// This is necessary, apparently
				// Start a new sub-tuple of a record
				state = `Key
				buf = std.mksb()

				std.put("»» committing previous tuple (\\t)\n")

				/*
				record.tuples = std.slpush(&record.tuples, tuple)
				tuple.pairs = std.slzalloc(0)
				*/

				ti++
				pi = 0

				std.put("»» start sub-tuple (\\t)\n")

			| `Key:
				// Allow to passthrough, might be a mistake?
				;

			| `Value:
				var str = std.sbpeek(buf)
				std.sbtrim(buf, 0)

				if str.len < 1
					// Empty string, discard value
				else
					std.put("» finished value (\\t): {}\n", str)
					db.records[ri].tuples[ti].pairs[pi].value = str
				;;

				std.put("»» committing previous pair (\\t)\n")

				//tuple.pairs = std.slpush(&tuple.pairs, pair)

				pi++

				state = `Key
				buf = std.mksb()

			| `SingleQuote origin:
				std.sbputc(buf, c)

			| `DoubleQuote origin:
				std.sbputc(buf, c)

			| _:
				-> `std.Err std.fmt("invalid tab at {}:{}:{}", path, line, cn)
			;;

		| '=':
			var str = std.sbpeek(buf)
			std.sbtrim(buf, 0)

			if str.len < 1
				// Empty string, disallowed for keys
				-> `std.Err std.fmt("empty key at {}:{}:{}", path, line, cn)
			else
				std.put("» finished key (=): {}\n", str)
				db.records[ri].tuples[ti].pairs[pi].key = str
			;;

			state = `Value
			buf = std.mksb()
		| _:
			// A key or value text segment
			match state
			| `Key:
				if ! std.sbputc(buf, c)
					-> `std.Err std.fmt("could not insert key char at {}:{}:{}", path, line, cn)
				;;

			| `Value:
				std.sbputc(buf, c)

			| `SingleQuote	origin:
				std.sbputc(buf, c)

			| `DoubleQuote	origin:
				std.sbputc(buf, c)

			| `Newline:
				// Start a new record
				// TODO - do we need to push the last pair and tuple as well?
				if ti > 0
					std.put("»» committing previous record\n")
					//db.records = std.slpush(&db.records, record)

					std.put("»» # tuples in rec = {}\n", ti)

					ri++
					ti = 0
					pi = 0

				;;

				//record.tuples = std.slzalloc(0)
				//tuple.pairs = std.slzalloc(0)
				ti = 0
				pi = 0

				std.put("»» start record\n")
				std.put("»» start master tuple\n")

				state = `Key

				std.sbfree(buf)
				buf = std.mksb()
				std.sbputc(buf, c)



			| _:
				-> `std.Err std.fmt("unexpected character at {}:{}:{}", path, line, cn)
			;;
		;;
	;;

	// TODO - if there is a tuple (or key= only pair) dangling, be sure to append it
	// Check key.len?
	if ti > 0
		// TODO - do we need to push the last pair and tuple as well?
		std.put("»» committing dangling record\n")
		//db.records = std.slpush(&db.records, record)
		ri++
	;;


	var nr = 0, nt = 0, np = 0
	for r : db.records
		nr++
		std.put("nr = {}\n", nr)
		for t : r.tuples
			nt++
			std.put("nt = {}\n", nt)
			for p : t.pairs
				std.put("np = {}\n", np)
				np++

				if p.key.len < 1
					std.put("¡! 0 length key at {}/{}/{}\n", nr, nt, np)
				elif p.value.len < 1
					std.put("¡! 0 length value at {}/{}/{}\n", nr, nt, np)
				else
					std.put("	{}={}\n", p.key, p.value)
				;;
			;;
		;;
	;;

	/* Correct:

		nr = 4

		nt = 12

		np = 17
	*/
	std.put("# nr = {}\n# nt = {}\n# np = {}\n", nr, nt, np)

	-> `std.Ok db
}

Segfaulting

Compilation:

; 6m -O obj adb.myr
segmentation violation--core dumped
; 

Running and interrupting valgrind with SIGINT:

; valgrind 6m -O obj adb.myr
==24845== Memcheck, a memory error detector
==24845== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==24845== Using Valgrind-3.15.0 and LibVEX; rerun with -h for copyright info
==24845== Command: 6m -O obj adb.myr
==24845== 


^?==24845== 
==24845== Process terminating with default action of signal 2 (SIGINT)
==24845==    at 0x483DF50: strlen (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==24845==    by 0x4914532: strdup (strdup.c:41)
==24845==    by 0x13A409: mklbl (node.c:270)
==24845==    by 0x13A505: genlbl (node.c:291)
==24845==    by 0x15050E: nextnode (match.c:172)
==24845==    by 0x150C26: addwildrec (match.c:326)
==24845==    by 0x150D95: addwildrec (match.c:344)
==24845==    by 0x150C7F: addwildrec (match.c:330)
==24845==    by 0x150D95: addwildrec (match.c:344)
==24845==    by 0x15104F: addwild (match.c:392)
==24845==    by 0x152166: addpat (match.c:662)
==24845==    by 0x152901: genonematch (match.c:788)
==24845== 
==24845== HEAP SUMMARY:
==24845==     in use at exit: 18,349,457 bytes in 232,547 blocks
==24845==   total heap usage: 459,939 allocs, 227,392 frees, 28,815,981,120 bytes allocated
==24845== 
==24845== LEAK SUMMARY:
==24845==    definitely lost: 167,279 bytes in 3,197 blocks
==24845==    indirectly lost: 68,377 bytes in 1,385 blocks
==24845==      possibly lost: 72 bytes in 1 blocks
==24845==    still reachable: 18,113,729 bytes in 227,964 blocks
==24845==         suppressed: 0 bytes in 0 blocks
==24845== Rerun with --leak-check=full to see details of leaked memory
==24845== 
==24845== For lists of detected and suppressed errors, rerun with: -s
==24845== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

; 

@henesy I suspect you were using an older compiler. The latest does not have functions such as addwildrec.

@henesy
I gave the code a try. The compiler wouldn't crash in the match compiler using the latest.
But the type db is of the size 32256256*256 = 536,870,912‬ bytes.
The backend would try to generate instructions for each word of the object, which thus explodes the compiler.

See https://github.com/oridb/mc/blob/master/6/simp.c#L1243 and https://github.com/oridb/mc/blob/master/6/isel.c#L450-L455 where the compiler is generating the code for -> std.Err e (line 60) in adb.myr.

Thanks :)

This explanation makes sense, thank you.

It would be nice if the compiler handled this kind of overflow sort of thing more gracefully, but on the other hand the data structure was a little absurd :)