13.1.2. Getting the list of all the files from OS#

import os
import pandas as pd

# Getting the current work directory (cwd)
thisdir = os.getcwd()

# Getting the reqd directory
thatdir='C:\\Users\\Sahil Choudhary\\Book\\Python'

# Traditional way
# r=root, d=directories, f = files

# csv_files=[]
# for r, d, f in os.walk(thisdir):
#     for file in f:
#         if file.endswith(".csv"):
#             csv_files.append(file)


# Single line pythonic way            
csv_files=[ file for r, d, f in os.walk(thisdir) for file in f if file.endswith('.csv')] 
# outer loop first,then inner loop(just like traditional) and then if condition
csv_files
['demo.csv', 'demo2.csv']
# applying some filters if you want
# Lists_list=[i for i in python_files if 'List' in i and 'checkpoint' not in i]
len(csv_files)
2
csv_files
['demo.csv', 'demo2.csv']

13.1.2.1. Reading all as CSVs and converting them into single df#

# create empty list
dataframes_list = []

# append datasets into the list
for i in range(len(csv_files)):
    if 'csv' in csv_files[i]:
        temp_df = pd.read_csv(csv_files[i])
        dataframes_list.append(temp_df)
dataframes_list
[             Name Hire Date   Salary  Sick Days remaining
 0  Graham Chapman  03/15/14  50000.0                   10
 1     John Cleese  06/01/15  65000.0                    8
 2       Eric Idle  05/12/14  45000.0                   10
 3     Terry Jones  11/01/13  70000.0                    3
 4   Terry Gilliam  08/12/14  48000.0                    7
 5   Michael Palin  05/23/13  66000.0                    8,
              Name   Hire Date   Salary  Sick Days remaining
 0  Graham Chapman  2014-03-15  50000.0                   10
 1     John Cleese  2015-06-01  65000.0                    8
 2       Eric Idle  2014-05-12  45000.0                   10
 3     Terry Jones  2013-11-01  70000.0                    3
 4   Terry Gilliam  2014-08-12  48000.0                    7
 5   Michael Palin  2013-05-23  66000.0                    8]

13.1.2.2. Applying operations on multiple dataframes at ones#

def filter(df):
    return df[df.Name=='Graham Chapman']


df1, df2 = [filter(df) for df in dataframes_list]
df1
Name Hire Date Salary Sick Days remaining
0 Graham Chapman 03/15/14 50000.0 10
df2
Name Hire Date Salary Sick Days remaining
0 Graham Chapman 2014-03-15 50000.0 10