3.10. String functions on Columns#
3.10.1. EDA for columns#
import pandas as pd
import numpy as np
df=pd.DataFrame({
'First Name':['Sahil','Sonia','Sourav','Vishal'],
'Age':[10,20,30,40],
'Gender':['M','F','M','M'],
'City':['J','K','L','P'],
'Place of Work':[True,False,False,True],
}
)
df
First Name | Age | Gender | City | Place of Work | |
---|---|---|---|---|---|
0 | Sahil | 10 | M | J | True |
1 | Sonia | 20 | F | K | False |
2 | Sourav | 30 | M | L | False |
3 | Vishal | 40 | M | P | True |
3.10.1.1. Get columns as list#
df.columns.tolist()
['First Name', 'Age', 'Gender', 'City', 'Place of Work']
3.10.1.2. Convert column names to series | df:#
df.columns.to_series()
First Name First Name
Age Age
Gender Gender
City City
Place of Work Place of Work
dtype: object
df.columns.to_frame()
0 | |
---|---|
First Name | First Name |
Age | Age |
Gender | Gender |
City | City |
Place of Work | Place of Work |
3.10.1.3. Check if specific column is there or not#
df.columns.str.contains('Name')
array([ True, False, False, False, False])
3.10.1.4. Check if any duplicate column is there#
df.columns.duplicated()
array([False, False, False, False, False])
3.10.1.5. Check methods/attributes of String#
dir(df.columns.str)[0:5]
['__annotations__', '__class__', '__delattr__', '__dict__', '__dir__']
3.10.1.6. Make column names to lower case#
df.columns.str.lower()
Index(['first name', 'age', 'gender', 'city', 'place of work'], dtype='object')
3.10.1.7. Make column names to Upper case#
df.columns.str.upper()
Index(['FIRST NAME', 'AGE', 'GENDER', 'CITY', 'PLACE OF WORK'], dtype='object')
3.10.1.8. Make column names to Title case#
df.columns.str.title() # Camel Case
Index(['First Name', 'Age', 'Gender', 'City', 'Place Of Work'], dtype='object')
3.10.1.9. Make column names to Capitalize#
df.columns.str.capitalize() # Only first letter big
Index(['First name', 'Age', 'Gender', 'City', 'Place of work'], dtype='object')
3.10.1.10. Replace empty spaces with underscores#
df.columns.str.replace(' ','-')
Index(['First-Name', 'Age', 'Gender', 'City', 'Place-of-Work'], dtype='object')
3.10.1.11. Rename columns#
df.rename(columns={'oldname':'newname'},inplace=True)
3.10.1.12. Check total number of columns#
len(df.columns)
5
3.10.1.13. Select particular columns#
df.columns.values[0:4]
array(['First Name', 'Age', 'Gender', 'City'], dtype=object)
3.10.1.14. Get 2nd column and rename it#
df.columns.values[2]='DOB'
df
First Name | Age | DOB | City | Place of Work | |
---|---|---|---|---|---|
0 | Sahil | 10 | M | J | True |
1 | Sonia | 20 | F | K | False |
2 | Sourav | 30 | M | L | False |
3 | Vishal | 40 | M | P | True |
3.10.1.15. Select all columns except one#
df.columns[df.columns!= 'DOB']
Index(['First Name', 'Age', 'City', 'Place of Work'], dtype='object')
df
First Name | Age | DOB | City | Place of Work | |
---|---|---|---|---|---|
0 | Sahil | 10 | M | J | True |
1 | Sonia | 20 | F | K | False |
2 | Sourav | 30 | M | L | False |
3 | Vishal | 40 | M | P | True |
3.10.1.16. Select all columns except multiple#
#?
df.loc[:,-df.columns.isin(['DOB','City']).columns]
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Input In [21], in <cell line: 2>()
1 #?
----> 2 df.loc[:,-df.columns.isin(['DOB','City']).columns]
AttributeError: 'numpy.ndarray' object has no attribute 'columns'
3.10.1.17. Select column names that begins with particular word#
df.columns.str.startswith('First')
# Gives array of booleans
array([ True, False, False, False, False])
3.10.1.18. Select group of column names#
df.columns.values[[0,1,2]]
array(['First Name', 'Age', 'DOB'], dtype=object)
df.columns[0:3]
Index(['First Name', 'Age', 'DOB'], dtype='object')