wsp-sag/Lasso

[BUG] Log file concatenate not working

Opened this issue · 1 comments

e-lo commented

Describe the bug

test_project_card_concatenate() fails with

ValueError: cannot set a row with mismatched columns

To Reproduce

Failing tests

  • test_project_card_concatenate()

Triggering line of code

lasso/project.py:347: in read_logfile
    link_df.loc[link_df_length] = row

Thoughts on resolution

None yet...

Full stack trace

request = <FixtureRequest for <Function test_project_card_concatenate>>

    @pytest.mark.travis
    def test_project_card_concatenate(request):
        """
        Tests that you can add multiple log files together.
        """
        print("\n--Starting:", request.node.name)
        whole_logfile = os.path.join(CUBE_DIR, "st_paul_test.log")
    
        split_logfile_list = [
            os.path.join(CUBE_DIR, "st_paul_test-A.log"),
            os.path.join(CUBE_DIR, "st_paul_test-B.log"),
        ]
    
        print("Reading Whole Logfile: {}".format(whole_logfile))
        lf = Project.read_logfile(whole_logfile)
        whole_logfile_project = Project.create_project(
            roadway_log_file=whole_logfile, base_roadway_dir=ROADWAY_DIR
        )
        print(
            "\nWHOLE  Card Dict:\n  {}".format(whole_logfile_project.card_data["changes"])
        )
    
        print("Reading Split Logfiles: {}".format(split_logfile_list))
>       lf = Project.read_logfile(split_logfile_list)

tests/test_project.py:109: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
lasso/project.py:347: in read_logfile
    link_df.loc[link_df_length] = row
../../../../../anaconda3/envs/wrangler/lib/python3.7/site-packages/pandas/core/indexing.py:671: in __setitem__
    self._setitem_with_indexer(indexer, value)
../../../../../anaconda3/envs/wrangler/lib/python3.7/site-packages/pandas/core/indexing.py:875: in _setitem_with_indexer
    return self._setitem_with_indexer_missing(indexer, value)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <pandas.core.indexing._LocIndexer object at 0x12cf3dfb0>, indexer = 2
value = ['Link', 'A', 'B', 'model_link_id', 'county', 'trn_priority', ...]

    def _setitem_with_indexer_missing(self, indexer, value):
        """
        Insert new row(s) or column(s) into the Series or DataFrame.
        """
        from pandas import Series
    
        # reindex the axis to the new value
        # and set inplace
        if self.ndim == 1:
            index = self.obj.index
            new_index = index.insert(len(index), indexer)
    
            # we have a coerced indexer, e.g. a float
            # that matches in an Int64Index, so
            # we will not create a duplicate index, rather
            # index to that element
            # e.g. 0.0 -> 0
            # GH#12246
            if index.is_unique:
                new_indexer = index.get_indexer([new_index[-1]])
                if (new_indexer != -1).any():
                    return self._setitem_with_indexer(new_indexer, value)
    
            # this preserves dtype of the value
            new_values = Series([value])._values
            if len(self.obj._values):
                # GH#22717 handle casting compatibility that np.concatenate
                #  does incorrectly
                new_values = concat_compat([self.obj._values, new_values])
            self.obj._data = self.obj._constructor(
                new_values, index=new_index, name=self.obj.name
            )._data
            self.obj._maybe_update_cacher(clear=True)
            return self.obj
    
        elif self.ndim == 2:
    
            if not len(self.obj.columns):
                # no columns and scalar
                raise ValueError("cannot set a frame with no defined columns")
    
            if isinstance(value, ABCSeries):
                # append a Series
                value = value.reindex(index=self.obj.columns, copy=True)
                value.name = indexer
    
            else:
                # a list-list
                if is_list_like_indexer(value):
                    # must have conforming columns
                    if len(value) != len(self.obj.columns):
>                       raise ValueError("cannot set a row with mismatched columns")
E                       ValueError: cannot set a row with mismatched columns

../../../../../anaconda3/envs/wrangler/lib/python3.7/site-packages/pandas/core/indexing.py:1119: ValueError

INFO     WranglerLogger:project.py:323 Reading logfile: /Users/elizabeth/Documents/urbanlabs/MetCouncil/working/client_met_council_wrangler_utilities/examples/cube/st_paul_test-A.log
INFO     WranglerLogger:project.py:323 Reading logfile: /Users/elizabeth/Documents/urbanlabs/MetCouncil/working/client_met_council_wrangler_utilities/examples/cube/st_paul_test-B.log
e-lo commented

Issue found: The concatenate method didn't consider if the link/node attributes might differ by log file.