/python_paint_api

简单易懂的画图接口(含丰富的注释)

Primary LanguageJupyter Notebook

20240515

box plot

import matplotlib.pyplot as plt
import numpy as np

# Random test data

def draw_boxplot(all_data, labels, all_titles,x_labels, y_labels, nrows=1, ncols=7,figsize=(9, 4) ):
    np.random.seed(19680801)
    #all_data = [np.random.normal(0, std, size=2) for std in range(1, 4)]
    #labels = ['x1', 'x2', 'x3']
    
    fig, ax_list = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize)

    bplot_list =  []
    for i_index, i_ax in enumerate(ax_list):
        
        # rectangular box plot
        bplot1 = i_ax.boxplot(all_data[i_index],
                             vert=True,  # vertical box alignment
                             patch_artist=True,  # fill with color
                             labels=labels)  # will be used to label x-ticks
        i_ax.set_title(all_titles[i_index])
        
        bplot_list.append(bplot1)
        
        
    # fill with colors
    colors = ['pink', 'lightblue', 'lightgreen']
    for bplot in bplot_list:
        for patch, color in zip(bplot['boxes'], colors):
            patch.set_facecolor(color)
    
    # adding horizontal grid lines
    for ax in ax_list:
        ax.yaxis.grid(True)
        ax.set_xlabel(x_labels)
        ax.set_ylabel(y_labels)
    
    plt.show()

image

Python画图教程

封装画图函数的常用功能,方便直接调用画图,

该jupyter notebook中的所有画图函数都汇总在 paint_func.py 文件中,方便直接使用

#配置
import pandas as pd
import numpy as np
from pandas import DataFrame
import matplotlib.pyplot as plt
pd.plotting.register_matplotlib_converters()

%matplotlib inline
import seaborn as sns
from wordcloud import WordCloud, STOPWORDS
from collections import Counter
print("Setup Complete")
Setup Complete
sample_path='./insurance.csv'
df_sample=pd.read_csv(sample_path)
df_sample.head(15)
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
age sex bmi children smoker region charges
0 19 female 27.900 0 yes southwest 16884.92400
1 18 male 33.770 1 no southeast 1725.55230
2 28 male 33.000 3 no southeast 4449.46200
3 33 male 22.705 0 no northwest 21984.47061
4 32 male 28.880 0 no northwest 3866.85520
5 31 female 25.740 0 no southeast 3756.62160
6 46 female 33.440 1 no southeast 8240.58960
7 37 female 27.740 3 no northwest 7281.50560
8 37 male 29.830 2 no northeast 6406.41070
9 60 female 25.840 0 no northwest 28923.13692
10 25 male 26.220 0 no northeast 2721.32080
11 62 female 26.290 0 yes southeast 27808.72510
12 23 male 34.400 0 no southwest 1826.84300
13 56 female 39.820 0 no southeast 11090.71780
14 27 male 42.130 0 yes southeast 39611.75770
#条形图📊
def draw_bar_chart_plot(data,x=None,y=None,hue=None,ci=None,fig_x=10,fig_y=10,fig_title=None):
    '''
    Ref:
    https://seaborn.pydata.org/generated/seaborn.barplot.html
    https://www.kaggle.com/alexisbcook/bar-charts-and-heatmaps
    
    
    1.图的高度表示了,这些变量的集中的趋势 
    2.条形图反映的是平均值
    3.两种形式:
     long-form(推荐):
       需要指定x,y (在通常是dataframe中为列名)
     wide-form:
       画图的是根据输入数据(通常是dataframe obj)的每列数据 
    
    4.关键参数:
    data:pandas DataFrame obj
        data的输入格式:
        label_1,  label2 , ...
         x[0]  ,  y_1[0] , y_2[0]
         x[1]  ,  y_1[1] , y_2[1]
         ...
    x:x坐标的name 
    y:y坐标的name
    hue:类别对应的column名字 
    ci:画图时采样的方式 ,ci==None时不用采样的方式获取观测值
    fig_x:图的长
    fig_y:图的宽
    fig_title:图的标题
    
    
    
    '''
    
    #设置画布大小
    plt.figure(figsize=(fig_x,fig_y))
    # 添加标题
    if fig_title:
        plt.title(fig_title)
    #label===>设置图中label的标签
    if x is  None and y is None:  
        #没有指定列,则所有列全部被使用
        sns.barplot(data=data,ci=ci) 
    else:
        #直接使用df的对应列作为y坐标,label_name作为类名
        sns.barplot(data=data,x=x,y=y,hue=hue,ci=ci)
draw_bar_chart_plot(df_sample)

png

draw_bar_chart_plot(df_sample,x='age',y='bmi',fig_x=20,fig_y=7)

png

#折线图
def draw_line_plot(data,x=None,y=None,hue=None,ci=None,fig_x=10,fig_y=10,fig_title=None):
    
    '''
    Ref:
    https://seaborn.pydata.org/generated/seaborn.lineplot.html?highlight=lineplot#seaborn.lineplot
    https://www.kaggle.com/alexisbcook/line-charts
    
    关键参数:
    df的输入格式:
        label_1,label2,...
     x[0],  y_1[0]   , y_2[0]
     x[1],  y_1[1]   , y_2[1]
     ...
    data:pandas DataFrame obj
        data的输入格式:
        label_1,  label2 , ...
         x[0]  ,  y_1[0] , y_2[0]
         x[1]  ,  y_1[1] , y_2[1]
         ...
    x:x坐标的name 
    y:y坐标的name
    hue:类别对应的column名字 
    ci:画图时采样的方式 ,ci==None时不用采样的方式获取观测值
    fig_x:图的长
    fig_y:图的宽
    fig_title:图的标题
    
    '''
    
    
    #设置画布大小
    plt.figure(figsize=(fig_x,fig_y))
    # 添加标题
    if fig_title:
        plt.title(fig_title)
    #label===>设置图中label的标签
    if x==None or y==None:
        #没有指定列,则所有列全部被使用
        sns.lineplot(data=data,hue=hue,ci=ci)
    else:
        #直接使用df的对应列作为y坐标,label_name作为类名
        sns.lineplot(data=data,x=x,y=y,hue=hue,ci=ci)
draw_line_plot(df_sample,x='age',y='bmi',hue='smoker')

png

#sample
import numpy as np
uniform_data = np.random.rand(10, 12)
#热图
def draw_heatmap(data,annot=False,fmt=None,vmin=None,vmax=None,cmap='YlGnBu',fig_x=10,fig_y=10,fig_title=None):
    '''
    Ref:
    https://seaborn.pydata.org/generated/seaborn.heatmap.html?highlight=heatmap#seaborn.heatmap
    https://www.kaggle.com/alexisbcook/bar-charts-and-heatmaps

    关键参数:
    data:能够转化为2维ndarray,使用dataframe对象时headmap的横纵坐标的label从行列名中自动找出
    annot:图中是否标识出label值
    fmt:annot的格式,'d'表示整数格式,'.1f'表示小数(保留小数点后一位)
        annot和fmt一般要同时使用。
    vmin, vmax:热点图的bar中的最大值和最小值范围,不提供则自动统计
    cmap:热点图的颜色样式,如“YlGnBu”/"Wistia"
    fig_x:图的长
    fig_y:图的宽
    fig_title:图的标题
    '''
    
     #设置画布大小
    plt.figure(figsize=(fig_x,fig_y))
    # 添加标题
    if fig_title:
        plt.title(fig_title)

    #直接使用df的对应列作为y坐标,label_name作为类名
    sns.heatmap(data=data,annot=annot,fmt=fmt,vmin=vmin,vmax=vmax,cmap=cmap)

    
    
draw_heatmap(uniform_data,annot=True,fmt='.1f',cmap='Wistia')

png

#散点图
def draw_scatter_plot(data,x=None,y=None,hue=None,size=None,style=None,fig_x=10,fig_y=10,fig_title=None):

    '''
    Ref:
    https://seaborn.pydata.org/generated/seaborn.scatterplot.html?highlight=scatterplot#seaborn.scatterplot
    https://www.kaggle.com/alexisbcook/scatter-plots
    
    功能:通过x,y展示和(hue,size,style)等参数来展示不同子集之间的关系
    
    关键参数:
    data:能够转化为2维ndarray,使用dataframe对象时headmap的横纵坐标的label从行列名中自动找出
    x:明确data中的哪一列作为x轴
    y:明确data中的哪一列作为y轴
    hue:不同类别对应的column名字====>对应不同的色调 
    size:不同类别对应的column名字====>对应不同的大小(可通过sizes参数调节大小)
    style:不同类别对应的column名字====>对应不同的风格
    fig_x:图的长
    fig_y:图的宽
    fig_title:图的标题
    '''
    
     #设置画布大小
    plt.figure(figsize=(fig_x,fig_y))
    # 添加标题
    if fig_title:
        plt.title(fig_title)
   

    #直接使用df
    if x==None and y==None:
        sns.scatterplot(data)
    else:
        sns.scatterplot(data=data,x=x,y=y,hue=hue,size=size,style=style)

    
    
draw_scatter_plot(df_sample,x='bmi',y='charges',hue='children',fig_x=5,fig_y=5)

png

draw_scatter_plot(df_sample,x='bmi',y='charges',style='children',fig_x=5,fig_y=5)

png

draw_scatter_plot(df_sample,x='bmi',y='charges',size='children',fig_x=5,fig_y=5)

png

#带有回归线的散点图
def draw_regplot(data,x,y,fig_x=10,fig_y=10,fig_title=None):
    '''
    Ref:
    https://seaborn.pydata.org/generated/seaborn.regplot.html?highlight=regplot#seaborn.regplot
    https://www.kaggle.com/alexisbcook/scatter-plots
    
    功能:画出这些数据,同时画出这些数据的回归模型

    data:Dataframe格式的数据
    x:横坐标的列名
    y:纵坐标的列名
    fig_x:图的长
    fig_y:图的宽
    fig_title:图的标题
    
    '''
    
     #设置画布大小
    plt.figure(figsize=(fig_x,fig_y))
    # 添加标题
    if fig_title:
        plt.title(fig_title)
   
    sns.regplot(data=data,x=x,y=y)
    
draw_regplot(df_sample,x='bmi',y='charges',fig_x=8,fig_y=8)

png

#带有多条回归线的散点图
def draw_lmplot(data,x,y,hue,fig_x=10,fig_y=10,fig_title=None):
    '''
    
    Ref:
    https://www.kaggle.com/alexisbcook/scatter-plots
    https://seaborn.pydata.org/generated/seaborn.lmplot.html?highlight=lmplot#seaborn.lmplot
    
    功能:画出多组数据,同时画出多组数据的回归模型,

    data:Dataframe格式的数据
    x:横坐标的列名
    y:纵坐标的列名
    hue:不同类别对应的column名字====>对应不同的色调 
    fig_x:图的长
    fig_y:图的宽
    fig_title:图的标题
    
    '''
    
     #设置画布大小
    plt.figure(figsize=(fig_x,fig_y))
    # 添加标题
    if fig_title:
        plt.title(fig_title)
   
    sns.lmplot(data=data,x=x,y=y,hue=hue)
    
draw_lmplot(df_sample,x='bmi',y='charges',hue='smoker',fig_x=10,fig_y=10)
<Figure size 720x720 with 0 Axes>

png

#群图
def draw_swarmplot(data,x,y,hue=None,fig_x=10,fig_y=10,fig_title=None):
    '''
    
    Ref:
    https://www.kaggle.com/alexisbcook/scatter-plots
    https://seaborn.pydata.org/generated/seaborn.swarmplot.html?highlight=swarmplot#seaborn.swarmplot
    
    功能:
    Draw a categorical scatterplot with non-overlapping points.
    画出一个不会有重合点的类别散点图
    
    data:Dataframe格式的数据
    x:横坐标的列名
    y:纵坐标的列名
    hue:不同类别对应的column名字====>对应不同的色调 
    fig_x:图的长
    fig_y:图的宽
    fig_title:图的标题
    
    '''
    
     #设置画布大小
    plt.figure(figsize=(fig_x,fig_y))
    # 添加标题
    if fig_title:
        plt.title(fig_title)
   
    sns.swarmplot(data=data,x=x,y=y,hue=hue)
    
draw_swarmplot(df_sample,x='children',y='bmi')
/Users/van/anaconda/anaconda3/envs/python36/lib/python3.6/site-packages/seaborn/categorical.py:1296: UserWarning: 6.1% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
  warnings.warn(msg, UserWarning)

png

draw_swarmplot(df_sample,x='smoker',y='charges',hue='region')
/Users/van/anaconda/anaconda3/envs/python36/lib/python3.6/site-packages/seaborn/categorical.py:1296: UserWarning: 11.4% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
  warnings.warn(msg, UserWarning)

png

#词云
def draw_wordcolud(allwords=None,fig_x=30,fig_y=10,fig_title=None):
    #Ref:
    #https://amueller.github.io/word_cloud/
    #
    #allwords:word list
    #
    #
    #WordCloud重要参数:
    #
    #min_font_size:最小的字体大小default=4
    #font_step:字体增长的step
    #max_words:能够画的最多的字
    #stopwords:停用词(不展示)
    #background_color:背景颜色
    #max_font_size:最大的字体尺寸
    #relative_scaling:字的相对大小,relative_scaling=0时,只考虑rank排名,relative_scaling=1字体大小和重要性的两倍成正比
    #           default=’auto’即relative_scaling=0.5,此时只考虑词的重要性
    #repeat:没有达到最大的词数量或者最小的字体大小时,是否重复词云中的word,default=False
    #include_numbers:是否包含数字default=False
    #
    #
    #fig_x:图的长
    #fig_y:图的宽
    #fig_title:图的标题

    #默认列表
    if not allwords:
        allwords={'好':1,'不好':0.001,'差':0.001,'优秀':1}
    #根据频率排序
    mostcommon = sorted(Counter(allwords).items(),key=lambda x:x[1],reverse=True)#[('好',0.1),('优秀',0.2),('不好',0.9),('差',0.8),]#FreqDist(allwords).most_common(100)#来自文本中出现频率最高的100个词

    #设置中文字体的路径
    font = '/Library/Fonts/Songti.ttc'
    
    
    wordcloud = WordCloud(width=1600, height=800,font_path=font,relative_scaling=1,repeat=True, background_color='white', stopwords=STOPWORDS).generate(str(mostcommon))

    fig = plt.figure(figsize=(fig_x,fig_y), facecolor='white')

    plt.imshow(wordcloud, interpolation="bilinear")

    #关掉坐标显示
    plt.axis('off')

    plt.title(fig_title, fontsize=50)
    #padding
    plt.tight_layout(pad=0)

    plt.show()
draw_wordcolud()

png

#密度分布图
def draw_kde_plot(data,x,y,hue=None,shade=True,fig_title=None,fig_x=10,fig_y=10):
    
    ''' 
    Ref:
    https://www.kaggle.com/alexisbcook/distributions
    https://seaborn.pydata.org/generated/seaborn.kdeplot.html
    
    功能:
    Plot univariate or bivariate distributions using kernel density estimation.
    使用核密度估计绘制单变量或双变量分布。
    用连续变量展示数据的分布。
    
    data:Dataframe格式的数据
    x:横坐标列名
    y:纵坐标列名
    shade:是否使用阴影
    hue:不同类别对应的column名字====>对应不同的色调 
    fig_x:图的长
    fig_y:图的宽
    fig_title:图的标题
    
    '''
    
     #设置画布大小
    plt.figure(figsize=(fig_x,fig_y))
    # 添加标题
    if fig_title:
        plt.title(fig_title)
   
    sns.kdeplot(data=data,x=x,y=y,hue=hue,shade=shade)
    
    
draw_kde_plot(df_sample,x='bmi',y='charges',hue='smoker')

png

#分布图
def draw_jointplot(data,x,y,hue=None,kind='kde',fig_title=None,fig_x=10,fig_y=10):
    
    '''
    
    Ref:
    https://www.kaggle.com/alexisbcook/distributions
    http://seaborn.pydata.org/generated/seaborn.jointplot.html
    
    功能:
    Draw a plot of two variables with bivariate and univariate graphs.
    用双变量和单变量图绘制两个变量的图。
    
    data:Dataframe格式的数据
    x:横坐标列名
    y:纵坐标列名
    shade:是否使用阴影
    hue:不同类别对应的column名字====>对应不同的色调 
    kind:不同种类的图,包括{ “scatter” | “kde” | “hist” | “hex” | “reg” | “resid” }
    fig_x:图的长
    fig_y:图的宽
    fig_title:图的标题
    
    '''
    
     #设置画布大小
    plt.figure(figsize=(fig_x,fig_y))
    # 添加标题
    if fig_title:
        plt.title(fig_title)
   
    sns.jointplot(data=data,x=x,y=y,hue=hue,kind=kind)
    
    
draw_jointplot(df_sample,x='bmi',y='charges',kind='resid')
<Figure size 720x720 with 0 Axes>

png

draw_jointplot(df_sample,x='bmi',y='charges',kind='reg')
<Figure size 720x720 with 0 Axes>

png

draw_jointplot(df_sample,x='bmi',y='charges',kind='hex')
<Figure size 720x720 with 0 Axes>

png

draw_jointplot(df_sample,x='bmi',y='charges',hue='smoker',kind='hist',fig_x=15,fig_y=15)
<Figure size 1080x1080 with 0 Axes>

png

draw_jointplot(df_sample,x='bmi',y='charges',hue='smoker',kind='scatter')
<Figure size 720x720 with 0 Axes>

png