/md_issue_mre

Primary LanguageJupyter NotebookApache License 2.0Apache-2.0

Project name here

Summary description here.

This file will become your README and also the index of your documentation.

Install

pip install your_project_name

How to use

Fill me in please! Don't forget code examples:

1 + 1
2

DataFrames in GitHub Markdown

The Issue

The default pandas output uses a <style scoped>...</style> element, which is neither stripped nor formatted nicely by GitHub.

import pandas as pd

d = {"col1": list(range(100)), "col2": list(range(100))}
df = pd.DataFrame(data=d)
df.head()
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
col1 col2
0 0 0
1 1 1
2 2 2
3 3 3
4 4 4

A Solution

Subclassing DataFrame, we can display the df without the <style... bit.

import re

import pandas as pd


class GitHubMarkdownDataFrame(pd.DataFrame):
    """DataFrame that strips <style> tags when used in a Notebook."""

    def _repr_html_(self):
        """Override parent's method."""
        original = super()._repr_html_()

        # See https://stackoverflow.com/a/55148480/3324095
        stripped = re.sub(
            "<style scoped>.*</style>\n",  # replace the CSS...
            "",  # ...with an empty string
            original,
            flags=re.DOTALL,  # match across multiple \n lines
        )

        return stripped
GitHubMarkdownDataFrame(data=df)
col1 col2
0 0 0
1 1 1
2 2 2
3 3 3
4 4 4
... ... ...
95 95 95
96 96 96
97 97 97
98 98 98
99 99 99

100 rows × 2 columns

# This doesn't work, as is
GitHubMarkdownDataFrame(data=df).head()
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
col1 col2
0 0 0
1 1 1
2 2 2
3 3 3
4 4 4

Not a good solution

from pandas.io.formats.style import Styler

# from pandas.util._decorators import (
# Substitution,
# )


class MyOtherDF(pd.DataFrame):
    @property
    def style(self) -> Styler:
        """
        Returns a Styler object.
        Contains methods for building a styled HTML representation of the DataFrame.
        See Also
        --------
        io.formats.style.Styler : Helps style a DataFrame or Series according to the
            data with HTML and CSS.
        """
        # raise Exception()
        print("in style")

        class MyStyler(Styler):
            # @Substitution(buf=buf, encoding=encoding)
            def to_html(
                self,
                **kwargs,
            ):
                print("calling super.to_html")
                # kwargs["exclude_styles"] = True
                return super().to_html(**kwargs)

            def _repr_html_(self):
                print("in repr")
                return super()._repr_html_()

        return MyStyler(self)
MyOtherDF(data=df)
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
col1 col2
0 0 0
1 1 1
2 2 2
3 3 3
4 4 4
... ... ...
95 95 95
96 96 96
97 97 97
98 98 98
99 99 99

100 rows × 2 columns