数据框某列分段方法
Closed this issue · 2 comments
slsongge commented
问题
在python中,对数据框的某列使用if-else语法进行分段?
代码
import pandas as pd
import numpy as np
np.random.seed(12)
age = np.random.randint(low = 0, high = 100, size = 1000)
df_raw = pd.DataFrame({'age': age})
## 方法一:pd.cut()
df_raw['age_cut_1'] = pd.cut(df_raw['age'], bins=[0,18,30,60,1000], right=False, labels=['A','B','C','D'])
## 方法二:np.select()
conditions = [(df_raw['age'] < 18),
(df_raw['age'] < 30),
(df_raw['age'] < 60),
(df_raw['age'] >= 60)]
choices = ['A','B','C','D']
df_raw['age_cut_2'] = np.select(conditions, choices)
## 方法三:if-else
JiaxiangBU commented
import pandas as pd
import numpy as np
np.random.seed(12)
age = np.random.randint(low = 0, high = 100, size = 1000)
df_raw = pd.DataFrame({'age': age})
## 方法一:pd.cut()
df_raw['age_cut_1'] = pd.cut(df_raw['age'], bins=[0,18,30,60,1000], right=False, labels=['A','B','C','D'])
df_raw.head()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
age | age_cut_1 | |
---|---|---|
0 | 75 | D |
1 | 27 | B |
2 | 6 | A |
3 | 2 | A |
4 | 3 | A |
## 方法二:np.select()
conditions = [(df_raw['age'] < 18),
(df_raw['age'] < 30),
(df_raw['age'] < 60),
(df_raw['age'] >= 60)]
choices = ['A','B','C','D']
df_raw['age_cut_2'] = np.select(conditions, choices)
df_raw.head()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
age | age_cut_1 | age_cut_2 | |
---|---|---|---|
0 | 75 | D | D |
1 | 27 | B | B |
2 | 6 | A | A |
3 | 2 | A | A |
4 | 3 | A | A |
# https://mp.weixin.qq.com/s/6Kda5qGzX6IZyYi8McDruA
# https://blog.csdn.net/weixin_39750084/article/details/103437665
# 不要忘记 return
def int2cat(x):
if x < 18:
return 'A'
elif x < 30:
return 'B'
elif x < 60:
return 'C'
elif x >= 60:
return 'D'
else:
return 'E'
df_raw['age'].apply(int2cat).head()
0 D
1 B
2 A
3 A
4 A
Name: age, dtype: object
# https://stackoverflow.com/questions/9987483/elif-in-list-comprehension-conditionals
df_raw['age'].apply(lambda age: 'A' if age < 18 else 'B' if age < 30 else 'C' if age < 60 else 'D').head()
0 D
1 B
2 A
3 A
4 A
Name: age, dtype: object
JiaxiangBU commented