import pandas as pd
import os
#specify the path for input list of synthetic matrices
input_filepath = r'C:\\SEWTM - Storage 2\\Team Documents\\Project - Rebasing\\
26 Highway Matrix Build\\Incoming\\02 Synthetic Car and Bus v1\\'
input_filename = "Synth_matrix_list.xlsx"
#Specify the folder where the exported dataframes will be stored
output_filepath = r'C:\\SEWTM - Storage 2\\Team Documents\\Project - Rebasing\\
26 Highway Matrix Build\\Other 04 - Synthetic Car and Bus for Hwy Mat Build\\'
#Speficy the name of the file for Synthethic Bus Matrix to be exported
output_filename_bus = 'Synthethic_Bus.csv'
#Speficy the name of the file for Synthethic Car Matrix to be exported
output_filename_car = 'Synthethic_Car.csv'
#Specify the name of the file for Checking spreadsheet to be exported
output_checks_filename = 'Outputs_checks.xlsx'
#load the list of files as pandas dataframe
files_df=pd.read_excel(input_filepath + input_filename, sheet_name="Sheet1")
#create list of filenames to iterate over each filename
files_list = files_df["File Name"].values.tolist()
#create list for tour codes
tour_codes_list = files_df["Tour Code"].values.tolist()
#create list for modes
modes_list = files_df["Mode"].values.tolist()
#create lists of dataframes
#for calculating checks of incoming files:
dfs_incoming_checks = []
#for buses:
dfs_bus = []
#for cars:
dfs_car = []
#for each file on the list:
for i in range(len(files_list)):
#load file as dataframe
df=pd.read_csv((input_filepath+files_list[i]),header=None)
#add columns names for the dataframe
df.columns =['Origin', 'Destination', 'AM', 'IP','PM','OP']
#Create List for inputs check to obtain the values within iteration's i-th file
lst_incoming = [[files_list[i], df['AM'].sum() + df['IP'].sum() + df['PM'].sum() \
+ df['OP'].sum(), df.shape[0],tour_codes_list[i],modes_list[i][:3]]]
#Convert the list into the dataframe so pandas function can be applied on it
df_incoming = pd.DataFrame(lst_incoming, columns =['File', 'Trips Total (24hr)', 'Row Total', 'Tour','Mode'])
#append frame to list of datafrems for checks of incoming files:
dfs_incoming_checks.append(df_incoming)
#check which tour code corresponds to file
if tour_codes_list[i] == "HBEB":
#add new column for 24h aggregated flow
df['HBEB'] = df['AM'] + df['IP'] + df['PM'] + df['OP']
elif tour_codes_list[i] == "HBO":
#add new column for 24h aggregated flow
df['HBO'] = df['AM'] + df['IP'] + df['PM'] + df['OP']
elif tour_codes_list[i] == "HBW":
#add new column for 24h aggregated flow
df['HBW'] = df['AM'] + df['IP'] + df['PM'] + df['OP']
elif tour_codes_list[i] == "NHBEB":
#add new column for 24h aggregated flow
df['NHBEB'] = df['AM'] + df['IP'] + df['PM'] + df['OP']
elif tour_codes_list[i] == "NHBO":
#add new column for 24h aggregated flow
df['NHBO'] = df['AM'] + df['IP'] + df['PM'] + df['OP']
if modes_list[i] == "Bus":
#append frame to list for buses
dfs_bus.append(df)
elif modes_list[i] == "Car Driver":
#append frame to list for cars
dfs_car.append(df)
elif modes_list[i] == "Car Passenger":
#append frame to list for cars
dfs_car.append(df)
# concat frames together into one for bus and another for car
final_df_bus = pd.concat(dfs_bus)
final_df_car = pd.concat(dfs_car)
# concat incoming checks frames together
final_df_incoming= pd.concat(dfs_incoming_checks)
#aggregate tours with respesct to each OD pair
final_df_bus=final_df_bus.groupby(["Origin", "Destination"])[['HBEB','HBO','HBW','NHBEB','NHBO']].aggregate('sum').reset_index()
final_df_car=final_df_car.groupby(["Origin", "Destination"])[['HBEB','HBO','HBW','NHBEB','NHBO']].aggregate('sum').reset_index()
#####OUTPUT FILES CHECKS:
#Calculate Row totals
row_total_bus = final_df_bus.shape[0]
row_total_car = final_df_car.shape[0]
#Create Lists for output checks
lst_output_file_bus = [[output_filename_bus, final_df_bus['HBEB'].sum(), final_df_bus['HBO'].sum(), final_df_bus['HBW'].sum(),\
final_df_bus['NHBEB'].sum(), final_df_bus['NHBO'].sum(),row_total_bus]]
lst_output_file_car = [[output_filename_car, final_df_car['HBEB'].sum(), final_df_car['HBO'].sum(), final_df_car['HBW'].sum(),\
final_df_car['NHBEB'].sum(), final_df_car['NHBO'].sum(),row_total_car]]
#Convert the list into the dataframe so pandas function can be applied on it
df_output_file_bus = pd.DataFrame(lst_output_file_bus, columns =['Filename','HBEB','HBO','HBW','NHBEB','NHBO', 'Row Total'])
df_output_file_car = pd.DataFrame(lst_output_file_car, columns =['Filename','HBEB','HBO','HBW','NHBEB','NHBO', 'Row Total'])
#Concatenate dataframes into one output checks table
df_output_file = pd.concat([df_output_file_bus, df_output_file_car], ignore_index=True)
#Export Checking DataFrames to Excel using ExcelWriter
with pd.ExcelWriter(output_filepath+output_checks_filename) as excel_writer:
final_df_incoming.to_excel(excel_writer, sheet_name='Input files checks', index=False)
df_output_file.to_excel(excel_writer, sheet_name='Outputs checks', index=False)
#Export synth matrices dataframes to csv
final_df_bus.to_csv(output_filepath+output_filename_bus,index=False)
final_df_car.to_csv(output_filepath+output_filename_car,index=False)