3.1. Creating a Dataframe#

import pandas as pd
import numpy as np

3.1.1. Empty Dataframe#

df=pd.DataFrame()  # D and F are capital
print(df)
Empty DataFrame
Columns: []
Index: []

3.1.2. From Clipboard#

df=pd.read_clipboard()

3.1.3. From List#

Name=['Sahil', 'Sonia', 'Sourav', 'Vishal']
Age=[20, 21, 19, 18]
print(pd.DataFrame(list(zip(Name,Age)))) # Note: If you don't pass columns,it takes col names as 0 1 by default
print()
print(pd.DataFrame(list(zip(Name,Age)),columns=['Name','Age']))
        0   1
0   Sahil  20
1   Sonia  21
2  Sourav  19
3  Vishal  18

     Name  Age
0   Sahil   20
1   Sonia   21
2  Sourav   19
3  Vishal   18

3.1.3.1. Custom indexes#

df=pd.DataFrame(list(zip(Name,Age)))
df.index=['First','Second','Third','Fourth']
print(df)
             0   1
First    Sahil  20
Second   Sonia  21
Third   Sourav  19
Fourth  Vishal  18

3.1.4. From Single Series#

s1=pd.Series(['A','B','C','D'])
print(s1.to_frame()) #col header will be 0,1 by default
print()
print(s1.to_frame(name='Alphabets'))
   0
0  A
1  B
2  C
3  D

  Alphabets
0         A
1         B
2         C
3         D

3.1.5. From Multiple series as columns of dataframe#

s1=pd.Series(['A','B','C','D'])
s2=pd.Series([10,20,30,40])

s1.to_frame(name = 'Name').join(s2.to_frame(name='Age'))
Name Age
0 A 10
1 B 20
2 C 30
3 D 40

3.1.6. From dictionary#

df=pd.DataFrame({'Name':['Sahil', 'Sonia', 'Sourav', 'Vishal'],
        'Age':[20, 21, 19, 18]})
df
Name Age
0 Sahil 20
1 Sonia 21
2 Sourav 19
3 Vishal 18

3.1.6.1. Assigning indexes after declaring the series#

new_indexes=['a','b','c','d']
df.index=new_indexes
df
Name Age
a Sahil 20
b Sonia 21
c Sourav 19
d Vishal 18

3.1.7. From csv#

df = pd.read_csv ('https://people.sc.fsu.edu/~jburkardt/data/csv/crash_catalonia.csv')
df
Day of Week "Number of Crashes"
0 Sunday 13664
1 Monday 17279
2 Tuesday 17337
3 Wednesday 17394
4 Thursday 17954
5 Friday 19147
6 Saturday 15714

3.1.7.1. Import specific columns#

pd.read_csv('https://people.sc.fsu.edu/~jburkardt/data/csv/crash_catalonia.csv', usecols= ['Day of Week'])
Day of Week
0 Sunday
1 Monday
2 Tuesday
3 Wednesday
4 Thursday
5 Friday
6 Saturday

3.1.7.2. Replacing nans#

pd.read_csv('https://people.sc.fsu.edu/~jburkardt/data/csv/crash_catalonia.csv', na_values=['NA','np.nan'])
Day of Week "Number of Crashes"
0 Sunday 13664
1 Monday 17279
2 Tuesday 17337
3 Wednesday 17394
4 Thursday 17954
5 Friday 19147
6 Saturday 15714

3.1.7.3. Read only first n rows#

pd.read_csv('https://people.sc.fsu.edu/~jburkardt/data/csv/crash_catalonia.csv',nrows=3)
Day of Week "Number of Crashes"
0 Sunday 13664
1 Monday 17279
2 Tuesday 17337