scikit-hep/uproot5

`signed char` is missing from the parsable types of `uproot.interpretation.identify._parse_node`; what else is missing?

Closed this issue · 0 comments

This section of _parse_node handles combinations of const, signed/unsigned, char/int/float/etc.:

elif tokens[i].group(0) == "Bool_t":
return i + 1, _parse_maybe_quote('numpy.dtype("?")', quote)
elif tokens[i].group(0) == "bool":
return i + 1, _parse_maybe_quote('numpy.dtype("?")', quote)
elif _simplify_token(tokens[i]) == "Bool_t*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype("?"))',
quote,
),
)
elif _simplify_token(tokens[i]) == "bool*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype("?"))',
quote,
),
)
elif tokens[i].group(0) == "Char_t":
return i + 1, _parse_maybe_quote('numpy.dtype("i1")', quote)
elif tokens[i].group(0) == "char":
return i + 1, _parse_maybe_quote('numpy.dtype("i1")', quote)
elif tokens[i].group(0) == "UChar_t":
return i + 1, _parse_maybe_quote('numpy.dtype("u1")', quote)
elif has2 and tokens[i].group(0) == "unsigned" and tokens[i + 1].group(0) == "char":
return i + 2, _parse_maybe_quote('numpy.dtype("u1")', quote)
elif _simplify_token(tokens[i]) == "UChar_t*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype("u1"))',
quote,
),
)
elif (
has2
and tokens[i].group(0) == "unsigned"
and _simplify_token(tokens[i + 1]) == "char*"
):
return (
i + 2,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype("u1"))',
quote,
),
)
elif tokens[i].group(0) == "Short_t":
return i + 1, _parse_maybe_quote('numpy.dtype(">i2")', quote)
elif tokens[i].group(0) == "short":
return i + 1, _parse_maybe_quote('numpy.dtype(">i2")', quote)
elif tokens[i].group(0) == "UShort_t":
return i + 1, _parse_maybe_quote('numpy.dtype(">u2")', quote)
elif (
has2 and tokens[i].group(0) == "unsigned" and tokens[i + 1].group(0) == "short"
):
return i + 2, _parse_maybe_quote('numpy.dtype(">u2")', quote)
elif _simplify_token(tokens[i]) == "Short_t*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">i2"))',
quote,
),
)
elif _simplify_token(tokens[i]) == "short*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">i2"))',
quote,
),
)
elif _simplify_token(tokens[i]) == "UShort_t*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">u2"))',
quote,
),
)
elif (
has2
and tokens[i].group(0) == "unsigned"
and _simplify_token(tokens[i + 1]) == "short*"
):
return (
i + 2,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">u2"))',
quote,
),
)
elif tokens[i].group(0) == "Int_t":
return i + 1, _parse_maybe_quote('numpy.dtype(">i4")', quote)
elif tokens[i].group(0) == "int":
return i + 1, _parse_maybe_quote('numpy.dtype(">i4")', quote)
elif tokens[i].group(0) == "UInt_t":
return i + 1, _parse_maybe_quote('numpy.dtype(">u4")', quote)
elif has2 and tokens[i].group(0) == "unsigned" and tokens[i + 1].group(0) == "int":
return i + 2, _parse_maybe_quote('numpy.dtype(">u4")', quote)
elif _simplify_token(tokens[i]) == "Int_t*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">i4"))',
quote,
),
)
elif _simplify_token(tokens[i]) == "int*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">i4"))',
quote,
),
)
elif _simplify_token(tokens[i]) == "UInt_t*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">u4"))',
quote,
),
)
elif (
has2
and tokens[i].group(0) == "unsigned"
and _simplify_token(tokens[i + 1]) == "int*"
):
return (
i + 2,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">u4"))',
quote,
),
)
elif has2 and tokens[i].group(0) == tokens[i + 1].group(0) == "long":
return i + 2, _parse_maybe_quote('numpy.dtype(">i8")', quote)
elif (
i + 2 < len(tokens)
and tokens[i].group(0) == "unsigned"
and tokens[i + 1].group(0) == tokens[i + 2].group(0) == "long"
):
return i + 3, _parse_maybe_quote('numpy.dtype(">u8")', quote)
elif tokens[i].group(0) == "Long_t":
return i + 1, _parse_maybe_quote('numpy.dtype(">i8")', quote)
elif tokens[i].group(0) == "Long64_t":
return i + 1, _parse_maybe_quote('numpy.dtype(">i8")', quote)
elif tokens[i].group(0) == "long":
return i + 1, _parse_maybe_quote('numpy.dtype(">i8")', quote)
elif tokens[i].group(0) == "ULong_t":
return i + 1, _parse_maybe_quote('numpy.dtype(">u8")', quote)
elif tokens[i].group(0) == "ULong64_t":
return i + 1, _parse_maybe_quote('numpy.dtype(">u8")', quote)
elif has2 and tokens[i].group(0) == "unsigned" and tokens[i + 1].group(0) == "long":
return i + 2, _parse_maybe_quote('numpy.dtype(">u8")', quote)
elif (
has2
and tokens[i].group(0) == "long"
and _simplify_token(tokens[i + 1]) == "long*"
):
return (
i + 2,
_parse_maybe_quote(
f'uproot.containers.AsArray({header}, numpy.dtype(">i8"))',
quote,
),
)
elif (
i + 2 < len(tokens)
and tokens[i].group(0) == "unsigned"
and _simplify_token(tokens[i + 1]) == "long"
and _simplify_token(tokens[i + 2]) == "long*"
):
return (
i + 3,
_parse_maybe_quote(
f'uproot.containers.AsArray({header}, numpy.dtype(">u8"))',
quote,
),
)
elif _simplify_token(tokens[i]) == "Long_t*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">i8"))',
quote,
),
)
elif _simplify_token(tokens[i]) == "Long64_t*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">i8"))',
quote,
),
)
elif _simplify_token(tokens[i]) == "long*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">i8"))',
quote,
),
)
elif _simplify_token(tokens[i]) == "ULong_t*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">u8"))',
quote,
),
)
elif _simplify_token(tokens[i]) == "ULong64_t*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">u8"))',
quote,
),
)
elif (
has2
and tokens[i].group(0) == "unsigned"
and _simplify_token(tokens[i + 1]) == "long*"
):
return (
i + 2,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">u8"))',
quote,
),
)
elif tokens[i].group(0) == "Float_t":
return i + 1, _parse_maybe_quote('numpy.dtype(">f4")', quote)
elif tokens[i].group(0) == "float":
return i + 1, _parse_maybe_quote('numpy.dtype(">f4")', quote)
elif _simplify_token(tokens[i]) == "Float_t*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">f4"))',
quote,
),
)
elif _simplify_token(tokens[i]) == "float*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">f4"))',
quote,
),
)
elif tokens[i].group(0) == "Double_t":
return i + 1, _parse_maybe_quote('numpy.dtype(">f8")', quote)
elif tokens[i].group(0) == "double":
return i + 1, _parse_maybe_quote('numpy.dtype(">f8")', quote)
elif _simplify_token(tokens[i]) == "Double_t*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">f8"))',
quote,
),
)
elif _simplify_token(tokens[i]) == "double*":
return (
i + 1,
_parse_maybe_quote(
f'uproot.containers.AsArray(False, {header}, numpy.dtype(">f8"))',
quote,
),
)
elif tokens[i].group(0) == "Float16_t":
return (
i + 1,
_parse_maybe_quote(
'uproot.containers.AsFIXME("Float16_t in another context")', quote
),
)
elif _simplify_token(tokens[i]) == "Float16_t*":
return (
i + 1,
_parse_maybe_quote(
f"uproot.containers.AsArray(False, {header}, "
'uproot.containers.AsFIXME("Float16_t in array"))',
quote,
),
)
elif tokens[i].group(0) == "Double32_t":
return (
i + 1,
_parse_maybe_quote(
'uproot.containers.AsFIXME("Double32_t in another context")', quote
),
)
elif _simplify_token(tokens[i]) == "Double32_t*":
return (
i + 1,
_parse_maybe_quote(
f"uproot.containers.AsArray(False, {header}, "
'uproot.containers.AsFIXME("Double32_t in array '
'(note: Event.root fClosestDistance has an example)"))',
quote,
),
)

There are many synonyms for these basic types, and a few have slipped through the cracks.

@MatousVozak noticed that ["signed", "char"] isn't there—indeed, there's no check for "signed" at all. (In C, signed is the default!) At least this needs to be added, but perhaps this part of the code should get more systematic.