3.13. Manipulating Dataframe#

import pandas as pd
import numpy as np
df=pd.DataFrame({
'Name':['Sahil Choudhary','Sonia Choudhary','Sourav','Vishal'],
'Age':[10,20,30,40],
'Gender':['M','F','M','M'],
'City':['J','K','L','P'],
'Work':[True,False,False,True]
}
)
df
Name Age Gender City Work
0 Sahil Choudhary 10 M J True
1 Sonia Choudhary 20 F K False
2 Sourav 30 M L False
3 Vishal 40 M P True

3.13.1. Dataframe Inspection#

df['Age'].mean()
25.0
df['Age'].sum()
100
df['Name'].unique()
array(['Sahil Choudhary', 'Sonia Choudhary', 'Sourav', 'Vishal'],
      dtype=object)
df['Name'].nunique()
4
df['Name'].value_counts()
Sahil Choudhary    1
Sonia Choudhary    1
Sourav             1
Vishal             1
Name: Name, dtype: int64

3.13.2. Applying Conditions on columns#

3.13.2.1. Condition on one column#

df[df['Age']>10]
Name Age Gender City Work
1 Sonia Choudhary 20 F K False
2 Sourav 30 M L False
3 Vishal 40 M P True
# If you don't want entire dataframe but want only one column
df[df['Age']>10]['Name']
1    Sonia Choudhary
2             Sourav
3             Vishal
Name: Name, dtype: object

3.13.2.2. Name contains Choudhary#

df[df['Name'].str.contains('Choudhary')]
Name Age Gender City Work
0 Sahil Choudhary 10 M J True
1 Sonia Choudhary 20 F K False

3.13.2.3. Condition on multiple columns#

df[(df['Age']>10) & (df['Gender']=='M')]  # Note: It won't work if you replace & by and
Name Age Gender City Work
2 Sourav 30 M L False
3 Vishal 40 M P True
df[df['Age'].between(10,20)]
Name Age Gender City Work
0 Sahil Choudhary 10 M J True
1 Sonia Choudhary 20 F K False