1) Dataframe Manipulation
df ['col1' ].apply (lambda x : x .strip ().capitalize ())
mapping - more efficient for applying methods to a single column (series vs. entire row)
df ['col1' ].map (lambda x : x + 1 )
#Ternary if statement mapped to single col
df ['Col1' ].map (lambda x : 1500 if x == 'Music' else 800 )
df [['colA' , 'colD' ]] = df [['colA' , 'colD' ]].apply (lambda x : x + 1 )
def col_func (x ):
x ['col1' ] = 1
x ['col2' ] = x ['col2' ] + 'x
return x
df .apply (col_func , axis = 1 )
for index , row in df .iterrows ():
print (row ['col1' ], row ['col2' ])
for ind in df .index :
print (df ['name' ][ind ], df ['age' ][ind ])
# Ternary if statement
df ['Price' ] = [1500 if x == 'Music' else 800 for x in df ['Event' ]]
# Apply/Map
df ['Price' ] = df ['Event' ].map (lambda x : 1500 if x == 'Music' else 800 )
# Boundary filtering
dataframe [dataframe ['Percentage' ] > 80 ]
# In a list
options = ['Math' , 'Commerce' , 'Shaurya' ]
rslt_df = dataframe [~ dataframe ['Stream' ].isin (options )]
# Both conditions
rslt_df = dataframe [(dataframe ['Age' ] == 21 ) & dataframe ['Stream' ].isin (options )]
# Sorting
df .sort_values (by = 'col1' , ascending = False )
# Simple
df .groupby (['a' ,'b' ])['c' ].sum ()
# Multiple aggregations
df .groupby (['Gender' ,'State' ])[['col_1' , 'col_2' ]].agg ([min , max ])
# Aggregations:
#'sum', 'min', 'max', 'count', 'mean'
aggregations = {'shortcode' :'count' ,
'is_video' : np .mean ,
'is_sponsored' : np .mean }
# as_index so you don't have to .reset_index() later
df .groupby (company_info , as_index = False ).agg (aggregations )
# With lambda function
df .groupby ('Sex' ).Age .agg (
['max' , 'mean' , lambda x : True if x .mean () > 50 else False ]
)
df = df .dropna (how = 'all' )
df .drop (df [df ['Age' ] < 25 ].index , inplace = True )
# Change all the column names
df .columns = ['Col_1' , 'Col_2' ]
# Change all the row indexes
df .index = ['Row_1' , 'Row_2' , 'Row_3' , 'Row_4' ]
# Change some of the columns
df = df .rename (columns = {"Col_1" :"Mod_col" })
## add x to every column name
df = df .rename (columns = lambda x : x + 'x' )
df ["somecolumn" ].astype (int )
df1 = df ['Position' ].str .contains ("PG" )
pd .DataFrame (lst , columns = ["col1" , "col2" ])
pd .DataFrame (dict )
pd .DataFrame (lst_of_dicts )
lst = [['Geek' , 25 ], ['is' , 30 ],
['for' , 26 ], ['Geeksforgeeks' , 22 ]]
pd .DataFrame (lst , columns = ["word" , "number" ])
dict = {'col1' :['val1' , 'val2' , 'val3' ],
'col2' :[0 , 1 , 3 ]}
pd .DataFrame (dict )
lst_of_dicts = [{'Geeks' : 'dataframe' , 'For' : 'using' , 'geeks' : 'list' },
{'Geeks' :10 , 'For' : 20 , 'geeks' : 30 }]
pd .DataFrame (lst_of_dicts )
author = ['Jitender' , 'Purnima' , 'Arpit' , 'Jyoti' ]
article = [210 , 211 , 114 , 178 ]
auth_series = pd .Series (author )
article_series = pd .Series (article )
frame = { 'Author' : auth_series , 'Article' : article_series }
result = pd .DataFrame (frame )
new_data = { "col1" :"val1" ,
"col2" :"val2" ,
"col5" :"val5" ,
"col3" :"val3" ,
"col4" :"val4"
}
# combine this new data with existing DataFrame
df ["new_col" ] = df ["old_col_to_match" ].map (new_data )
df [col ].map ({'yes' :True , 'no' :False })
df1 .merge (df2 , how = 'left' , on = 'a' )
df1 .concat (df2 )