3.13. Manipulating Dataframe#
import pandas as pd
import numpy as np
df=pd.DataFrame({
'Name':['Sahil Choudhary','Sonia Choudhary','Sourav','Vishal'],
'Age':[10,20,30,40],
'Gender':['M','F','M','M'],
'City':['J','K','L','P'],
'Work':[True,False,False,True]
}
)
df
Name | Age | Gender | City | Work | |
---|---|---|---|---|---|
0 | Sahil Choudhary | 10 | M | J | True |
1 | Sonia Choudhary | 20 | F | K | False |
2 | Sourav | 30 | M | L | False |
3 | Vishal | 40 | M | P | True |
3.13.1. Dataframe Inspection#
df['Age'].mean()
25.0
df['Age'].sum()
100
df['Name'].unique()
array(['Sahil Choudhary', 'Sonia Choudhary', 'Sourav', 'Vishal'],
dtype=object)
df['Name'].nunique()
4
df['Name'].value_counts()
Sahil Choudhary 1
Sonia Choudhary 1
Sourav 1
Vishal 1
Name: Name, dtype: int64
3.13.2. Applying Conditions on columns#
3.13.2.1. Condition on one column#
df[df['Age']>10]
Name | Age | Gender | City | Work | |
---|---|---|---|---|---|
1 | Sonia Choudhary | 20 | F | K | False |
2 | Sourav | 30 | M | L | False |
3 | Vishal | 40 | M | P | True |
# If you don't want entire dataframe but want only one column
df[df['Age']>10]['Name']
1 Sonia Choudhary
2 Sourav
3 Vishal
Name: Name, dtype: object
3.13.2.2. Name contains Choudhary#
df[df['Name'].str.contains('Choudhary')]
Name | Age | Gender | City | Work | |
---|---|---|---|---|---|
0 | Sahil Choudhary | 10 | M | J | True |
1 | Sonia Choudhary | 20 | F | K | False |
3.13.2.3. Condition on multiple columns#
df[(df['Age']>10) & (df['Gender']=='M')] # Note: It won't work if you replace & by and
Name | Age | Gender | City | Work | |
---|---|---|---|---|---|
2 | Sourav | 30 | M | L | False |
3 | Vishal | 40 | M | P | True |
df[df['Age'].between(10,20)]
Name | Age | Gender | City | Work | |
---|---|---|---|---|---|
0 | Sahil Choudhary | 10 | M | J | True |
1 | Sonia Choudhary | 20 | F | K | False |