bjodah/chempy

square brackets support

DonHalkon opened this issue · 2 comments

I tried to parse "[Fe(CN)6]-4" and got ParseException:

In [2]: import chempy.util.parsing as chem

In [3]: comp = chem.formula_to_composition("[Fe(CN)6]-4")
---------------------------------------------------------------------------
ParseException                            Traceback (most recent call last)
<ipython-input-3-da99372ece8d> in <module>()
----> 1 comp = chem.formula_to_composition("[Fe(CN)6]-4")

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/chempy/util/parsing.py in formula_to_composition(formula, prefixes, suffixes)
    247         else:
    248             m, stoich = _get_leading_integer(stoich)
--> 249         comp = _parse_stoich(stoich)
    250         for k, v in comp.items():
    251             if k not in tot_comp:

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/chempy/util/parsing.py in _parse_stoich(stoich)
    175         return {}
    176     return {symbols.index(k)+1: n for k, n
--> 177             in _get_formula_parser().parseString(stoich)}
    178
    179 _greek_letters = (

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyparsing.py in parseString(self, instring, parseAll)
   1630             else:
   1631                 # catch and re-raise exception from here, clears out pyparsing internal stack trace
-> 1632                 raise exc
   1633         else:
   1634             return tokens

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyparsing.py in parseString(self, instring, parseAll)
   1620             instring = instring.expandtabs()
   1621         try:
-> 1622             loc, tokens = self._parse( instring, 0 )
   1623             if parseAll:
   1624                 loc = self.preParse( instring, loc )

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyparsing.py in _parseNoCache(self, instring, loc, doActions, callPreParse)
   1377             if self.mayIndexError or loc >= len(instring):
   1378                 try:
-> 1379                     loc,tokens = self.parseImpl( instring, preloc, doActions )
   1380                 except IndexError:
   1381                     raise ParseException( instring, len(instring), self.errmsg, self )

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyparsing.py in parseImpl(self, instring, loc, doActions)
   3715     def parseImpl( self, instring, loc, doActions=True ):
   3716         if self.expr is not None:
-> 3717             return self.expr._parse( instring, loc, doActions, callPreParse=False )
   3718         else:
   3719             raise ParseException("",loc,self.errmsg,self)

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyparsing.py in _parseNoCache(self, instring, loc, doActions, callPreParse)
   1377             if self.mayIndexError or loc >= len(instring):
   1378                 try:
-> 1379                     loc,tokens = self.parseImpl( instring, preloc, doActions )
   1380                 except IndexError:
   1381                     raise ParseException( instring, len(instring), self.errmsg, self )

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyparsing.py in parseImpl(self, instring, loc, doActions)
   3846         if check_ender:
   3847             try_not_ender(instring, loc)
-> 3848         loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
   3849         try:
   3850             hasIgnoreExprs = (not not self.ignoreExprs)

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyparsing.py in _parseNoCache(self, instring, loc, doActions, callPreParse)
   1377             if self.mayIndexError or loc >= len(instring):
   1378                 try:
-> 1379                     loc,tokens = self.parseImpl( instring, preloc, doActions )
   1380                 except IndexError:
   1381                     raise ParseException( instring, len(instring), self.errmsg, self )

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyparsing.py in parseImpl(self, instring, loc, doActions)
   3715     def parseImpl( self, instring, loc, doActions=True ):
   3716         if self.expr is not None:
-> 3717             return self.expr._parse( instring, loc, doActions, callPreParse=False )
   3718         else:
   3719             raise ParseException("",loc,self.errmsg,self)

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyparsing.py in _parseNoCache(self, instring, loc, doActions, callPreParse)
   1377             if self.mayIndexError or loc >= len(instring):
   1378                 try:
-> 1379                     loc,tokens = self.parseImpl( instring, preloc, doActions )
   1380                 except IndexError:
   1381                     raise ParseException( instring, len(instring), self.errmsg, self )

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyparsing.py in parseImpl(self, instring, loc, doActions)
   3376         # pass False as last arg to _parse for first element, since we already
   3377         # pre-parsed the string as part of our And pre-parsing
-> 3378         loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
   3379         errorStop = False
   3380         for e in self.exprs[1:]:

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyparsing.py in _parseNoCache(self, instring, loc, doActions, callPreParse)
   1377             if self.mayIndexError or loc >= len(instring):
   1378                 try:
-> 1379                     loc,tokens = self.parseImpl( instring, preloc, doActions )
   1380                 except IndexError:
   1381                     raise ParseException( instring, len(instring), self.errmsg, self )

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyparsing.py in parseImpl(self, instring, loc, doActions)
   3543             if maxException is not None:
   3544                 maxException.msg = self.errmsg
-> 3545                 raise maxException
   3546             else:
   3547                 raise ParseException(instring, loc, "no defined alternatives to match", self)

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyparsing.py in parseImpl(self, instring, loc, doActions)
   3528         for e in self.exprs:
   3529             try:
-> 3530                 ret = e._parse( instring, loc, doActions )
   3531                 return ret
   3532             except ParseException as err:

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyparsing.py in _parseNoCache(self, instring, loc, doActions, callPreParse)
   1381                     raise ParseException( instring, len(instring), self.errmsg, self )
   1382             else:
-> 1383                 loc,tokens = self.parseImpl( instring, preloc, doActions )
   1384
   1385         tokens = self.postParse( instring, loc, tokens )

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyparsing.py in parseImpl(self, instring, loc, doActions)
   2792         result = self.re.match(instring,loc)
   2793         if not result:
-> 2794             raise ParseException(instring, loc, self.errmsg, self)
   2795
   2796         loc = result.end()

ParseException: Expected {Re:('A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|M[dgnot]|N[abdeiop]?|Os?|P[abdmortu]?|R[abefghnu]|S[bcegimnr]?|T[abcehilm]|Uu[bhopqst]|U|V|W|Xe|Yb?|Z[nr]') | Group:({Suppress:("(") Forward: ... Suppress:(")")})} (at char 0), (line:1, col:1)

The problem is in square brackets I guess, without them parsing works well:

In [5]: chem.formula_to_composition("Fe(CN)6-4")
Out[5]: {26: 1, 6: 6, 7: 6, 0: -4}

Yes, the parser has not been written to account for square brackets.
If they don't add any information you could write your own "pre-processor" which just removes them:

lambda s: s.replace('[', '').replace(']', '')

(or replaces them with ordinary parenthesis).

Thank you!