In [1]:
import pandas as pd
import os
#specify the path for input list of synthetic matrices
input_filepath = r'C:\\SEWTM - Storage 2\\Team Documents\\Project - Rebasing\\
26 Highway Matrix Build\\Incoming\\02 Synthetic Car and Bus v1\\'
input_filename = "Synth_matrix_list.xlsx"

#Specify the folder where the exported dataframes will be stored
output_filepath = r'C:\\SEWTM - Storage 2\\Team Documents\\Project - Rebasing\\
26 Highway Matrix Build\\Other 04 - Synthetic Car and Bus for Hwy Mat Build\\'
#Speficy the name of the file for Synthethic Bus Matrix to be exported
output_filename_bus = 'Synthethic_Bus.csv'
#Speficy the name of the file for Synthethic Car Matrix to be exported
output_filename_car = 'Synthethic_Car.csv'

#Specify the name of the file for Checking spreadsheet to be exported
output_checks_filename = 'Outputs_checks.xlsx'

#load the list of files as pandas dataframe
files_df=pd.read_excel(input_filepath + input_filename, sheet_name="Sheet1")

#create list of filenames to iterate over each filename
files_list = files_df["File Name"].values.tolist()
#create list for tour codes
tour_codes_list = files_df["Tour Code"].values.tolist()
#create list for modes
modes_list  = files_df["Mode"].values.tolist()

#create lists of dataframes
#for calculating checks of incoming files:
dfs_incoming_checks = []
#for buses:
dfs_bus = []

#for cars:
dfs_car = []

#for each file on the list:
for i in range(len(files_list)):
    #load file as dataframe
    df=pd.read_csv((input_filepath+files_list[i]),header=None)
    #add columns names for the dataframe
    df.columns =['Origin', 'Destination', 'AM', 'IP','PM','OP']
    
     
   
    #Create List for inputs check to obtain the values within iteration's i-th file
    lst_incoming = [[files_list[i], df['AM'].sum() + df['IP'].sum() + df['PM'].sum() \
                     + df['OP'].sum(), df.shape[0],tour_codes_list[i],modes_list[i][:3]]]
    #Convert the list into the dataframe so pandas function can be applied on it
    df_incoming = pd.DataFrame(lst_incoming, columns =['File', 'Trips Total (24hr)', 'Row Total', 'Tour','Mode'])
    #append frame to list of datafrems for checks of incoming files:
    dfs_incoming_checks.append(df_incoming)
    
    
    #check which tour code corresponds to file
    if tour_codes_list[i] == "HBEB":
        #add new column for 24h aggregated flow 
        df['HBEB'] = df['AM'] + df['IP'] + df['PM'] + df['OP']
                
    elif tour_codes_list[i] == "HBO":  
        #add new column for 24h aggregated flow 
        df['HBO'] = df['AM'] + df['IP'] + df['PM'] + df['OP']    
    elif tour_codes_list[i] == "HBW":  
        #add new column for 24h aggregated flow 
        df['HBW'] = df['AM'] + df['IP'] + df['PM'] + df['OP']          
    elif tour_codes_list[i] == "NHBEB":  
        #add new column for 24h aggregated flow 
        df['NHBEB'] = df['AM'] + df['IP'] + df['PM'] + df['OP']          
    elif tour_codes_list[i] == "NHBO": 
        #add new column for 24h aggregated flow 
        df['NHBO'] = df['AM'] + df['IP'] + df['PM'] + df['OP']    
  
    if modes_list[i] == "Bus":
        #append frame to list for buses
        dfs_bus.append(df)
    elif modes_list[i] == "Car Driver": 
        #append frame to list for cars
        dfs_car.append(df)
    elif modes_list[i] == "Car Passenger": 
        #append frame to list for cars
        dfs_car.append(df)

# concat frames together into one for bus and another for car
final_df_bus = pd.concat(dfs_bus)  
final_df_car = pd.concat(dfs_car)

# concat incoming checks frames together
final_df_incoming= pd.concat(dfs_incoming_checks)  

#aggregate tours with respesct to each OD pair
final_df_bus=final_df_bus.groupby(["Origin", "Destination"])[['HBEB','HBO','HBW','NHBEB','NHBO']].aggregate('sum').reset_index()
final_df_car=final_df_car.groupby(["Origin", "Destination"])[['HBEB','HBO','HBW','NHBEB','NHBO']].aggregate('sum').reset_index()




#####OUTPUT FILES CHECKS:    
#Calculate Row totals
row_total_bus = final_df_bus.shape[0]
row_total_car = final_df_car.shape[0]

#Create Lists for output checks
lst_output_file_bus = [[output_filename_bus, final_df_bus['HBEB'].sum(), final_df_bus['HBO'].sum(), final_df_bus['HBW'].sum(),\
                        final_df_bus['NHBEB'].sum(), final_df_bus['NHBO'].sum(),row_total_bus]]
lst_output_file_car = [[output_filename_car, final_df_car['HBEB'].sum(), final_df_car['HBO'].sum(), final_df_car['HBW'].sum(),\
                        final_df_car['NHBEB'].sum(), final_df_car['NHBO'].sum(),row_total_car]]

#Convert the list into the dataframe so pandas function can be applied on it
df_output_file_bus = pd.DataFrame(lst_output_file_bus, columns =['Filename','HBEB','HBO','HBW','NHBEB','NHBO', 'Row Total'])
df_output_file_car = pd.DataFrame(lst_output_file_car, columns =['Filename','HBEB','HBO','HBW','NHBEB','NHBO', 'Row Total'])
#Concatenate dataframes into one output checks table
df_output_file = pd.concat([df_output_file_bus, df_output_file_car], ignore_index=True)

#Export Checking DataFrames to Excel using ExcelWriter
with pd.ExcelWriter(output_filepath+output_checks_filename) as excel_writer:
    final_df_incoming.to_excel(excel_writer, sheet_name='Input files checks', index=False)
    df_output_file.to_excel(excel_writer, sheet_name='Outputs checks', index=False)
    
#Export synth matrices dataframes to csv
final_df_bus.to_csv(output_filepath+output_filename_bus,index=False)
final_df_car.to_csv(output_filepath+output_filename_car,index=False)
In [ ]: