Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import pandas as pd | |
| import copy | |
| from functools import wraps | |
| from specklepy.api.client import SpeckleClient | |
| from tripGenerationFunc import * | |
| import speckle_utils | |
| import data_utils | |
| # get config file:# Parse JSON | |
| current_directory = os.path.dirname(os.path.abspath(__file__)) | |
| # Path to the config.json file | |
| config_file_path = os.path.join(current_directory, "config.json") | |
| #def runAll(): | |
| speckle_token = os.environ.get("SPECKLE_TOKEN") | |
| # Check if the config.json file exists | |
| if os.path.exists(config_file_path): | |
| # Load the JSON data from config.json | |
| with open(config_file_path, 'r') as f: | |
| config = json.load(f) | |
| # Convert to Python variables with the same names as the keys in the JSON | |
| locals().update(config) | |
| print("varaibles from json") | |
| # Now you can access the variables directly | |
| print(STREAM_ID) | |
| print(BRANCH_NAME_LAND_USES) | |
| print(TARGET_TRIP_RATE) | |
| print(ALPHA_LOW) | |
| print(F_VALUES_MANUAL) | |
| print(distance_matrices_of_interest) | |
| print(redistributeTrips) | |
| print(DISTANCE_BRACKETS) | |
| print(XLS_FILE_PATH) | |
| print("==================") | |
| else: | |
| print("Error: config.json file not found in the current directory.") | |
| xls_file_path = os.path.join(current_directory, XLS_FILE_PATH) | |
| print("full path", xls_file_path) | |
| # fetch speckle data | |
| CLIENT = SpeckleClient(host="https://speckle.xyz/") | |
| CLIENT.authenticate_with_token(token="52566d1047b881764e16ad238356abeb2fc35d8b42") | |
| # get land use stream | |
| stream_land_use = speckle_utils.getSpeckleStream(STREAM_ID, | |
| BRANCH_NAME_LAND_USES, | |
| CLIENT, | |
| commit_id = "") | |
| # navigate to list with speckle objects of interest | |
| stream_data = stream_land_use["@Data"]["@{0}"] | |
| # transform stream_data to dataframe (create a backup copy of this dataframe) | |
| df_speckle_lu = speckle_utils.get_dataframe(stream_data, return_original_df=False) | |
| df_main = df_speckle_lu.copy() | |
| # set index column | |
| df_main = df_main.set_index("ids", drop=False) | |
| # get distance matrix stream | |
| stream_distance_matrice = speckle_utils.getSpeckleStream(STREAM_ID, | |
| BRANCH_NAME_DISTANCE_MATRIX, | |
| CLIENT, | |
| commit_id = "") | |
| # navigate to list with speckle objects of interest | |
| distance_matrices = {} | |
| for distM in stream_distance_matrice["@Data"]['@{0}']: | |
| for kk in distM.__dict__.keys(): | |
| try: | |
| if kk.split("+")[1].startswith("distance_matrix"): | |
| distance_matrix_dict = json.loads(distM[kk]) | |
| origin_ids = distance_matrix_dict["origin_uuid"] | |
| destination_ids = distance_matrix_dict["destination_uuid"] | |
| distance_matrix = distance_matrix_dict["matrix"] | |
| # Convert the distance matrix to a DataFrame | |
| df_distances = pd.DataFrame(distance_matrix, index=origin_ids, columns=destination_ids) | |
| # i want to add the index & colum names to dist_m_csv | |
| #distance_matrices[kk] = dist_m_csv[kk] | |
| distance_matrices[kk] = df_distances | |
| except: | |
| pass | |
| # get metric matrix stream | |
| stream_metric_matrice = speckle_utils.getSpeckleStream(STREAM_ID, | |
| BRANCH_NAME_METRIC_DIST_MATRIX, | |
| CLIENT, | |
| commit_id = "") | |
| # navigate to list with speckle objects of interest | |
| metric_matrices = {} | |
| for distM in stream_metric_matrice["@Data"]['@{0}']: | |
| print(distM.__dict__.keys()) | |
| for kk in distM.__dict__.keys(): | |
| try: | |
| if kk.split("+")[1].startswith("metric_matrix"): | |
| metric_matrix_dict = json.loads(distM[kk]) | |
| origin_ids = metric_matrix_dict["origin_uuid"] | |
| destination_ids = metric_matrix_dict["destination_uuid"] | |
| metric_matrix = metric_matrix_dict["matrix"] | |
| # Convert the distance matrix to a DataFrame | |
| df_metric_dist = pd.DataFrame(metric_matrix, index=origin_ids, columns=destination_ids) | |
| metric_matrices[kk] = df_metric_dist*10 #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
| print("metric_matrix_dict", metric_matrix_dict.keys()) | |
| except: | |
| pass | |
| metric_matrices = extract_distance_matrices(stream_metric_matrice, metric_matrices_of_interest) | |
| sourceCommits = { | |
| "landuseCommitID": stream_land_use.id, | |
| "distanceMatrixCommitID": stream_distance_matrice.id, | |
| "metricMatrixCommitID": stream_metric_matrice.id | |
| } | |
| # READ XLS FILE ====================================== | |
| # Read Excel file into Pandas DataFrame | |
| #Production | |
| # Load Excel file separately | |
| #xls_file_path = os.path.join(current_directory, XLS_FILE_PATH) | |
| if os.path.exists(xls_file_path): | |
| # Production | |
| df_production = pd.read_excel(xls_file_path, sheet_name='Production') | |
| df_production_transposed = df_production.T | |
| df_production = preprocess_dataFrame(df_production, headerRow_idx=2, numRowsStart_idx=3) | |
| df_production_transposed = preprocess_dataFrame(df_production_transposed, headerRow_idx=0, numRowsStart_idx=4, | |
| numColsStart_idx=4, rowNames_idx=2) | |
| # Attraction | |
| df_attraction = pd.read_excel(xls_file_path, sheet_name='Attraction') | |
| df_attraction = preprocess_dataFrame(df_attraction, headerRow_idx=0, numRowsStart_idx=2) | |
| # Distribution_Matrix | |
| df_distributionMatrix = pd.read_excel(xls_file_path, sheet_name='Distribution_Matrix') | |
| df_distributionMatrix = preprocess_dataFrame(df_distributionMatrix, headerRow_idx=0, numRowsStart_idx=2, | |
| numRowsEnd_idx=None, numColsStart_idx=2, numColsEnd_idx=None, | |
| rowNames_idx=0) | |
| # Alphas | |
| df_alphas = pd.read_excel(xls_file_path, sheet_name='Alphas') | |
| df_alphas.columns = df_alphas.iloc[1] | |
| df_alphas = df_alphas.iloc[0, 2:] | |
| # Land use | |
| df_lu = pd.read_excel(xls_file_path, sheet_name='Example_Land_Use') | |
| df_lu = preprocess_dataFrame(df_lu, headerRow_idx=0, numRowsStart_idx=1) | |
| df_lu["nameCombined"] = df_lu.iloc[:, 1].astype(str) + "+" + df_lu.iloc[:, 0].astype(str) | |
| # Distance Matrix | |
| df_distMatrix = pd.read_excel(xls_file_path, sheet_name='Example_Distance_Matrix') | |
| df_distMatrix = preprocess_dataFrame(df_distMatrix, headerRow_idx=0, numRowsStart_idx=1, numRowsEnd_idx=None, | |
| numColsStart_idx=1, numColsEnd_idx=None, rowNames_idx=0) | |
| else: | |
| print("Error: Excel file specified in config.json not found.") | |
| # Land use strucutre ======= | |
| # THIS IS THE DISTANCE MATRIX THATS USED DOWN THE ROAD | |
| df_distances_aligned, df_lu_stream_aligned = align_dataframes(distance_matrices[distanceMatrixName], df_main, 'ids') | |
| #Create a df with lanuses | |
| lu_cols = [col for col in df_lu_stream_aligned.columns if col.startswith("lu+")] | |
| df_lu_stream = df_lu_stream_aligned[lu_cols] | |
| # Remove "lu+" from the beginning of column names | |
| df_lu_stream.columns = df_lu_stream.columns.str.lstrip('lu+') | |
| df_lu_stream = df_lu_stream.T | |
| df_lu_stream_t = df_lu_stream.T | |
| df_lu_stream_with_nameLu_column = df_lu_stream.reset_index(drop=False).rename(columns={'index': 'nameLu'}) | |
| #--- | |
| df_lu_names_xlsx = pd.concat([df_lu.iloc[:, 0:2], df_lu.iloc[:, -1]], axis=1) | |
| df_lu_names_xlsx.index = df_lu_names_xlsx.iloc[:, 1] | |
| column_names = ['nameTripType', 'nameLu', 'nameCombined'] | |
| df_lu_names_xlsx.columns = column_names | |
| print(f"df_lu_names_xlsx shape: {df_lu_names_xlsx.shape}") | |
| df_lu_names_xlsx.head() | |
| #-- | |
| # Merge DataFrames using an outer join | |
| merged_df = pd.merge(df_lu_stream_with_nameLu_column, df_lu_names_xlsx, on='nameLu', how='outer') | |
| # Get the unique names and their counts from df_lu_names_xlsx | |
| name_counts = df_lu_names_xlsx['nameLu'].value_counts() | |
| #print(name_counts) | |
| # Identify names in df_lu_stream_with_nameLu_column that are not in df_lu_names_xlsx | |
| missing_names = df_lu_stream_with_nameLu_column.loc[~df_lu_stream_with_nameLu_column['nameLu'].isin(df_lu_names_xlsx['nameLu'])] | |
| # Append missing rows to df_lu_stream_with_nameLu_column | |
| df_lu_stream_duplicated = pd.concat([merged_df, missing_names], ignore_index=True) | |
| #-- | |
| # Find names in df_lu_names_xlsx that are not in df_lu_stream_with_nameLu_column | |
| missing_names = df_lu_names_xlsx.loc[~df_lu_names_xlsx['nameLu'].isin(df_lu_stream_with_nameLu_column['nameLu'])] | |
| #-- | |
| # print existing names (?) | |
| df_lu_names_sorted = df_lu_names_xlsx.sort_values(by='nameLu') | |
| df_lu_stream_duplicated_sorted = df_lu_stream_duplicated.sort_values(by='nameLu') | |
| #-- | |
| # Merge DataFrames to get the order of names | |
| merged_order = pd.merge(df_lu_names_xlsx[['nameCombined']], df_lu_stream_duplicated[['nameCombined']], on='nameCombined', how='inner') | |
| # Sort df_lu_stream_duplicated based on the order of names in df_lu_names_xlsx | |
| df_lu_stream_sorted = df_lu_stream_duplicated.sort_values(by='nameCombined', key=lambda x: pd.Categorical(x, categories=merged_order['nameCombined'], ordered=True)) | |
| # Reorganize columns | |
| column_order = ['nameTripType', 'nameCombined'] + [col for col in df_lu_stream_sorted.columns if col not in ['nameTripType', 'nameCombined']] | |
| # Create a new DataFrame with the desired column order | |
| df_lu_stream_reordered = df_lu_stream_sorted[column_order] | |
| df_lu_stream_reordered_t = df_lu_stream_reordered.T | |
| #-- | |
| df_lu_stream_with_index = df_lu_stream_reordered_t.reset_index(drop=False).rename(columns={'index': 'ids'}) | |
| df_lu_stream_with_index.index = df_lu_stream_reordered_t.index | |
| df_lu_num_t_index = df_lu_stream_with_index.iloc[3:] | |
| df_distances_aligned_index = df_distances_aligned.reset_index(drop=False).rename(columns={'index': 'ids'}) | |
| df_distances_aligned_index.index = df_distances_aligned.index | |
| df_lu_namesCombined = df_lu_stream_with_index.loc["nameCombined"].iloc[1:] | |
| # Sort df_lu_stream_with_index based on the 'ids' column in df_distances_aligned_index | |
| df_lu_stream_sorted = df_lu_stream_with_index.sort_values(by=['ids'], key=lambda x: pd.Categorical(x, categories=df_distances_aligned_index['ids'], ordered=True)) | |
| df_lu_num = df_lu_stream_sorted.T.iloc[1:, :-3] | |
| df_lu_num.index = df_lu_namesCombined | |
| df_distMatrix_speckle = df_distances_aligned | |
| df_attraction_num = df_attraction.reset_index().iloc[:-1, 6:] | |
| # ============================================================================= | |
| # TRIP GENERATION | |
| # ATTRACTION & PRODUCTION ====================================================== | |
| """ | |
| INPUTS | |
| df_attraction_num | |
| df_lu_num | |
| df_production | |
| df_lu | |
| df_production_transposed | |
| """ | |
| df_attraction_proNode_sum_total = attraction_proNode_full_iter(df_attraction_num, df_lu_num, True) | |
| #Get the sqmProPerson | |
| df_sqmProPerson = df_production.iloc[0, 4:].reset_index()[3] | |
| #Get the trip rate | |
| df_tripRate = copy.deepcopy(df_production) # create a copy ensures df_tripRate doenst point to df_production | |
| df_tripRate.index = df_tripRate.iloc[:, 0] #Set the row names | |
| df_tripRate = df_tripRate.iloc[1:, 2] | |
| #Numerical df from production ============================================== | |
| df_production_num = df_production.iloc[1:, 4:] | |
| df_production_transposed1 = df_production_num.T | |
| df_total_trips_allNodes = production_proNode_total(df_lu, | |
| df_sqmProPerson, | |
| df_tripRate, | |
| df_production_num, | |
| df_production_transposed, | |
| df_lu_num, printSteps=False) | |
| # Convert data types to float | |
| df_total_trips_allNodes = df_total_trips_allNodes.astype(float) | |
| df_tripRate = df_tripRate.astype(float) | |
| df_total_trips_allNodes_sumPerson = df_total_trips_allNodes.div(df_tripRate, axis=0).sum() | |
| df_total_trips_allNodes_sumPerson_proCat = df_total_trips_allNodes.div(df_tripRate, axis=0) | |
| df_total_trips_allNodes_sumPerson_proCat_t = df_total_trips_allNodes_sumPerson_proCat.T | |
| df_total_trips_allNodes_sumPerson_proCat_t_sum = df_total_trips_allNodes_sumPerson_proCat_t.sum() | |
| # get total population | |
| total_population = df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_Res"] + df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_tou"] | |
| # ============================================================================= | |
| distance_matrices = extract_distance_matrices(stream_distance_matrice, distance_matrices_of_interest) | |
| metric_matrices_ = extract_distance_matrices(stream_metric_matrice, metric_matrices_of_interest) | |
| metric_matrices = { k:v*10 for k, v in metric_matrices_.items()} # scale (speckle issue) | |
| logs = computeTrips( | |
| df_distributionMatrix, | |
| df_total_trips_allNodes, | |
| df_distMatrix_speckle, | |
| df_alphas, | |
| df_attraction_proNode_sum_total, | |
| df_distances_aligned, | |
| TARGET_TRIP_RATE, | |
| SCALING_FACTOR, | |
| total_population, | |
| df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_Res"], | |
| df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_tou"], | |
| distance_matrices, | |
| metric_matrices, | |
| redistributeTrips, | |
| DISTANCE_BRACKETS, | |
| ALPHA_LOW, ALPHA_MED, ALPHA_HIGH, ALPHA, ALPHA_UNIFORM, F_VALUES_MANUAL, | |
| CLIENT, | |
| STREAM_ID, | |
| TARGET_BRANCH_TM, | |
| sourceCommits | |
| ) | |
| print(logs) |