3.1. Creating a Dataframe#
import pandas as pd
import numpy as np
3.1.1. Empty Dataframe#
df=pd.DataFrame() # D and F are capital
print(df)
Empty DataFrame
Columns: []
Index: []
3.1.2. From Clipboard#
df=pd.read_clipboard()
3.1.3. From List#
Name=['Sahil', 'Sonia', 'Sourav', 'Vishal']
Age=[20, 21, 19, 18]
print(pd.DataFrame(list(zip(Name,Age)))) # Note: If you don't pass columns,it takes col names as 0 1 by default
print()
print(pd.DataFrame(list(zip(Name,Age)),columns=['Name','Age']))
0 1
0 Sahil 20
1 Sonia 21
2 Sourav 19
3 Vishal 18
Name Age
0 Sahil 20
1 Sonia 21
2 Sourav 19
3 Vishal 18
3.1.3.1. Custom indexes#
df=pd.DataFrame(list(zip(Name,Age)))
df.index=['First','Second','Third','Fourth']
print(df)
0 1
First Sahil 20
Second Sonia 21
Third Sourav 19
Fourth Vishal 18
3.1.4. From Single Series#
s1=pd.Series(['A','B','C','D'])
print(s1.to_frame()) #col header will be 0,1 by default
print()
print(s1.to_frame(name='Alphabets'))
0
0 A
1 B
2 C
3 D
Alphabets
0 A
1 B
2 C
3 D
3.1.5. From Multiple series as columns of dataframe#
s1=pd.Series(['A','B','C','D'])
s2=pd.Series([10,20,30,40])
s1.to_frame(name = 'Name').join(s2.to_frame(name='Age'))
Name | Age | |
---|---|---|
0 | A | 10 |
1 | B | 20 |
2 | C | 30 |
3 | D | 40 |
3.1.6. From dictionary#
df=pd.DataFrame({'Name':['Sahil', 'Sonia', 'Sourav', 'Vishal'],
'Age':[20, 21, 19, 18]})
df
Name | Age | |
---|---|---|
0 | Sahil | 20 |
1 | Sonia | 21 |
2 | Sourav | 19 |
3 | Vishal | 18 |
3.1.6.1. Assigning indexes after declaring the series#
new_indexes=['a','b','c','d']
df.index=new_indexes
df
Name | Age | |
---|---|---|
a | Sahil | 20 |
b | Sonia | 21 |
c | Sourav | 19 |
d | Vishal | 18 |
3.1.7. From csv#
df = pd.read_csv ('https://people.sc.fsu.edu/~jburkardt/data/csv/crash_catalonia.csv')
df
Day of Week | "Number of Crashes" | |
---|---|---|
0 | Sunday | 13664 |
1 | Monday | 17279 |
2 | Tuesday | 17337 |
3 | Wednesday | 17394 |
4 | Thursday | 17954 |
5 | Friday | 19147 |
6 | Saturday | 15714 |
3.1.7.1. Import specific columns#
pd.read_csv('https://people.sc.fsu.edu/~jburkardt/data/csv/crash_catalonia.csv', usecols= ['Day of Week'])
Day of Week | |
---|---|
0 | Sunday |
1 | Monday |
2 | Tuesday |
3 | Wednesday |
4 | Thursday |
5 | Friday |
6 | Saturday |
3.1.7.2. Replacing nans#
pd.read_csv('https://people.sc.fsu.edu/~jburkardt/data/csv/crash_catalonia.csv', na_values=['NA','np.nan'])
Day of Week | "Number of Crashes" | |
---|---|---|
0 | Sunday | 13664 |
1 | Monday | 17279 |
2 | Tuesday | 17337 |
3 | Wednesday | 17394 |
4 | Thursday | 17954 |
5 | Friday | 19147 |
6 | Saturday | 15714 |
3.1.7.3. Read only first n rows#
pd.read_csv('https://people.sc.fsu.edu/~jburkardt/data/csv/crash_catalonia.csv',nrows=3)
Day of Week | "Number of Crashes" | |
---|---|---|
0 | Sunday | 13664 |
1 | Monday | 17279 |
2 | Tuesday | 17337 |