LSYS/forestplot

Duplicate values in separate groupings bug

EythorE opened this issue · 2 comments

There seems to be a bug where if you have duplicate values in separate groupings the plot does not show some of the rows.

import sys
import forestplot as fp
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib

print(
    f"numpy version: {sys.version}",
    f"pandas version: {pd.__version__}",
    f"matplotlib version: {matplotlib.__version__}",
    f"forestplot version: {fp.__version__}",
    sep='\n'
)
# numpy version: 3.8.1 (default, Feb  3 2020, 12:44:18) 
# [GCC 4.8.5 20150623 (Red Hat 4.8.5-39)]
# pandas version: 1.4.3
# matplotlib version: 3.4.2
# forestplot version: 0.3.1

def create_data():
    group_a = pd.DataFrame({'name': ['name_a', 'name_b'], 'estimate': [1.1, 1.0]})
    group_a['Lower CI'] = group_a['estimate'] - 0.05
    group_a['Upper CI'] = group_a['estimate'] + 0.05
    group_a['group'] = "group_a"

    group_b = group_a.copy()
    group_b['group'] = 'group_b'
    groups = pd.concat([group_a, group_b], axis=0) 
    # group_a["group"] = "group_a"
    return groups

df = create_data()
display(df)

print("Missing part of the plot")
fp.forestplot(df,
              estimate='estimate', varlabel='name', ll="Lower CI", hl="Upper CI", groupvar="group")
plt.show()

# print("Still missing part of the plot")
# df.loc[df['group'] == 'group_b', ['estimate', "Lower CI", "Upper CI"]] += 0.001
# fp.forestplot(df,
#               estimate='estimate', varlabel='name', ll="Lower CI", hl="Upper CI", groupvar="group")
# plt.show()

print("Now it works")
df.loc[df['group'] == 'group_b', ['estimate', "Lower CI", "Upper CI"]] += 0.01
fp.forestplot(df,
              estimate='estimate', varlabel='name', ll="Lower CI", hl="Upper CI", groupvar="group")
plt.show()

forest_bug

LSYS commented

@EythorE thanks so for raising this bug! i'll have to find some time to investigate this. If you can work this out, do consider opening a pull request.