13.1.2. Getting the list of all the files from OS#
import os
import pandas as pd
# Getting the current work directory (cwd)
thisdir = os.getcwd()
# Getting the reqd directory
thatdir='C:\\Users\\Sahil Choudhary\\Book\\Python'
# Traditional way
# r=root, d=directories, f = files
# csv_files=[]
# for r, d, f in os.walk(thisdir):
# for file in f:
# if file.endswith(".csv"):
# csv_files.append(file)
# Single line pythonic way
csv_files=[ file for r, d, f in os.walk(thisdir) for file in f if file.endswith('.csv')]
# outer loop first,then inner loop(just like traditional) and then if condition
csv_files
['demo.csv', 'demo2.csv']
# applying some filters if you want
# Lists_list=[i for i in python_files if 'List' in i and 'checkpoint' not in i]
len(csv_files)
2
csv_files
['demo.csv', 'demo2.csv']
13.1.2.1. Reading all as CSVs and converting them into single df#
# create empty list
dataframes_list = []
# append datasets into the list
for i in range(len(csv_files)):
if 'csv' in csv_files[i]:
temp_df = pd.read_csv(csv_files[i])
dataframes_list.append(temp_df)
dataframes_list
[ Name Hire Date Salary Sick Days remaining
0 Graham Chapman 03/15/14 50000.0 10
1 John Cleese 06/01/15 65000.0 8
2 Eric Idle 05/12/14 45000.0 10
3 Terry Jones 11/01/13 70000.0 3
4 Terry Gilliam 08/12/14 48000.0 7
5 Michael Palin 05/23/13 66000.0 8,
Name Hire Date Salary Sick Days remaining
0 Graham Chapman 2014-03-15 50000.0 10
1 John Cleese 2015-06-01 65000.0 8
2 Eric Idle 2014-05-12 45000.0 10
3 Terry Jones 2013-11-01 70000.0 3
4 Terry Gilliam 2014-08-12 48000.0 7
5 Michael Palin 2013-05-23 66000.0 8]
13.1.2.2. Applying operations on multiple dataframes at ones#
def filter(df):
return df[df.Name=='Graham Chapman']
df1, df2 = [filter(df) for df in dataframes_list]
df1
Name | Hire Date | Salary | Sick Days remaining | |
---|---|---|---|---|
0 | Graham Chapman | 03/15/14 | 50000.0 | 10 |
df2
Name | Hire Date | Salary | Sick Days remaining | |
---|---|---|---|---|
0 | Graham Chapman | 2014-03-15 | 50000.0 | 10 |