JiaxiangBU/tutoring2

数据框某列分段方法

Closed this issue · 2 comments

问题

在python中,对数据框的某列使用if-else语法进行分段?

代码

import pandas as pd
import numpy as np

np.random.seed(12)
age = np.random.randint(low = 0, high = 100, size = 1000)
df_raw = pd.DataFrame({'age': age})

## 方法一:pd.cut()
df_raw['age_cut_1'] = pd.cut(df_raw['age'], bins=[0,18,30,60,1000], right=False, labels=['A','B','C','D'])

## 方法二:np.select()
conditions = [(df_raw['age'] < 18), 
              (df_raw['age'] < 30), 
              (df_raw['age'] < 60), 
              (df_raw['age'] >= 60)]
choices = ['A','B','C','D']
df_raw['age_cut_2'] = np.select(conditions, choices)

## 方法三:if-else

#30

import pandas as pd
import numpy as np

np.random.seed(12)
age = np.random.randint(low = 0, high = 100, size = 1000)
df_raw = pd.DataFrame({'age': age})
## 方法一:pd.cut()
df_raw['age_cut_1'] = pd.cut(df_raw['age'], bins=[0,18,30,60,1000], right=False, labels=['A','B','C','D'])
df_raw.head()
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
age age_cut_1
0 75 D
1 27 B
2 6 A
3 2 A
4 3 A
## 方法二:np.select()
conditions = [(df_raw['age'] < 18), 
              (df_raw['age'] < 30), 
              (df_raw['age'] < 60), 
              (df_raw['age'] >= 60)]
choices = ['A','B','C','D']
df_raw['age_cut_2'] = np.select(conditions, choices)
df_raw.head()
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
age age_cut_1 age_cut_2
0 75 D D
1 27 B B
2 6 A A
3 2 A A
4 3 A A
# https://mp.weixin.qq.com/s/6Kda5qGzX6IZyYi8McDruA
# https://blog.csdn.net/weixin_39750084/article/details/103437665
# 不要忘记 return
def int2cat(x):
    if x < 18:
        return 'A'
    elif x < 30:
        return 'B'
    elif x < 60:
        return 'C'
    elif x >= 60:
        return 'D'
    else:
        return 'E'
df_raw['age'].apply(int2cat).head()
0    D
1    B
2    A
3    A
4    A
Name: age, dtype: object
# https://stackoverflow.com/questions/9987483/elif-in-list-comprehension-conditionals
df_raw['age'].apply(lambda age: 'A' if age < 18 else 'B' if age < 30 else 'C' if age < 60 else 'D').head()
0    D
1    B
2    A
3    A
4    A
Name: age, dtype: object

@slsongge 晓松,见 commit cffbe1dnotebook