Spaces:

UFOSINT
/

UAP-Data-Analysis-Tool

Sleeping

App Files Files Community

Ashoka74 commited on Jul 13, 2024

Commit

fa39f09

verified ·

1 Parent(s): 6a64be0

fix bug for dates filters

Browse files

Files changed (1) hide show

map.py +507 -506

map.py CHANGED Viewed

@@ -1,506 +1,507 @@
-import json
-import streamlit as st
-#import geopandas as gpd
-from keplergl import keplergl
-import pandas as pd
-import streamlit as st
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-import seaborn as sns
-from uap_analyzer import UAPParser, UAPAnalyzer, UAPVisualizer
-# import ChartGen
-# from ChartGen import ChartGPT
-from Levenshtein import distance
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import confusion_matrix
-from stqdm import stqdm
-stqdm.pandas()
-import streamlit.components.v1 as components
-from dateutil import parser
-from sentence_transformers import SentenceTransformer
-import torch
-import squarify
-import matplotlib.colors as mcolors
-import textwrap
-import datamapplot
-from streamlit_extras.stateful_button import button as stateful_button
-from streamlit_keplergl import keplergl_static
-from keplergl import KeplerGl
-st.set_option('deprecation.showPyplotGlobalUse', False)
-from pandas.api.types import (
-    is_categorical_dtype,
-    is_datetime64_any_dtype,
-    is_numeric_dtype,
-    is_object_dtype,
-)
-st.title('Interactive Map')
-# Initialize session state
-if 'analyzers' not in st.session_state:
-    st.session_state['analyzers'] = []
-if 'col_names' not in st.session_state:
-    st.session_state['col_names'] = []
-if 'clusters' not in st.session_state:
-    st.session_state['clusters'] = {}
-if 'new_data' not in st.session_state:
-    st.session_state['new_data'] = pd.DataFrame()
-if 'dataset' not in st.session_state:
-    st.session_state['dataset'] = pd.DataFrame()
-if 'data_processed' not in st.session_state:
-    st.session_state['data_processed'] = False
-if 'stage' not in st.session_state:
-    st.session_state['stage'] = 0
-if 'filtered_data' not in st.session_state:
-    st.session_state['filtered_data'] = None
-if 'gemini_answer' not in st.session_state:
-    st.session_state['gemini_answer'] = None
-if 'parsed_responses' not in st.session_state:
-    st.session_state['parsed_responses'] = None
-if 'map_generated' not in st.session_state:
-    st.session_state['map_generated'] = False
-if 'date_loaded' not in st.session_state:
-    st.session_state['data_loaded'] = False
-if "datasets" not in st.session_state:
-    st.session_state.datasets = []
-# sf_zip_geo_gdf = gpd.read_file("sf_zip_geo.geojson")
-# sf_zip_geo_gdf.label = "SF Zip Geo"
-# sf_zip_geo_gdf.id = "sf-zip-geo"
-# st.session_state.datasets.append(sf_zip_geo_gdf)
-def plot_treemap(df, column, top_n=32):
-        # Get the value counts and the top N labels
-        value_counts = df[column].value_counts()
-        top_labels = value_counts.iloc[:top_n].index
-        # Use np.where to replace all values not in the top N with 'Other'
-        revised_column = f'{column}_revised'
-        df[revised_column] = np.where(df[column].isin(top_labels), df[column], 'Other')
-        # Get the value counts including the 'Other' category
-        sizes = df[revised_column].value_counts().values
-        labels = df[revised_column].value_counts().index
-        # Get a gradient of colors
-        # colors = list(mcolors.TABLEAU_COLORS.values())
-        n_colors = len(sizes)
-        colors = plt.cm.Oranges(np.linspace(0.3, 0.9, n_colors))[::-1]
-        # Get % of each category
-        percents = sizes / sizes.sum()
-        # Prepare labels with percentages
-        labels = [f'{label}\n {percent:.1%}' for label, percent in zip(labels, percents)]
-        fig, ax = plt.subplots(figsize=(20, 12))
-        # Plot the treemap
-        squarify.plot(sizes=sizes, label=labels, alpha=0.7, pad=True, color=colors, text_kwargs={'fontsize': 10})
-        ax = plt.gca()
-        # Iterate over text elements and rectangles (patches) in the axes for color adjustment
-        for text, rect in zip(ax.texts, ax.patches):
-            background_color = rect.get_facecolor()
-            r, g, b, _ = mcolors.to_rgba(background_color)
-            brightness = np.average([r, g, b])
-            text.set_color('white' if brightness < 0.5 else 'black')
-            # Adjust font size based on rectangle's area and wrap long text
-            coef = 0.8
-            font_size = np.sqrt(rect.get_width() * rect.get_height()) * coef
-            text.set_fontsize(font_size)
-            wrapped_text = textwrap.fill(text.get_text(), width=20)
-            text.set_text(wrapped_text)
-        plt.axis('off')
-        plt.gca().invert_yaxis()
-        plt.gcf().set_size_inches(20, 12)
-        fig.patch.set_alpha(0)
-        ax.patch.set_alpha(0)
-        return fig
-def plot_hist(df, column, bins=10, kde=True):
-        fig, ax = plt.subplots(figsize=(12, 6))
-        sns.histplot(data=df, x=column, kde=True, bins=bins,color='orange')
-        # set the ticks and frame in orange
-        ax.spines['bottom'].set_color('orange')
-        ax.spines['top'].set_color('orange')
-        ax.spines['right'].set_color('orange')
-        ax.spines['left'].set_color('orange')
-        ax.xaxis.label.set_color('orange')
-        ax.yaxis.label.set_color('orange')
-        ax.tick_params(axis='x', colors='orange')
-        ax.tick_params(axis='y', colors='orange')
-        ax.title.set_color('orange')
-        # Set transparent background
-        fig.patch.set_alpha(0)
-        ax.patch.set_alpha(0)
-        return fig
-def plot_line(df, x_column, y_columns, figsize=(12, 10), color='orange', title=None, rolling_mean_value=2):
-    import matplotlib.cm as cm
-    # Sort the dataframe by the date column
-    df = df.sort_values(by=x_column)
-    # Calculate rolling mean for each y_column
-    if rolling_mean_value:
-        df[y_columns] = df[y_columns].rolling(len(df) // rolling_mean_value).mean()
-    # Create the plot
-    fig, ax = plt.subplots(figsize=figsize)
-    colors = cm.Oranges(np.linspace(0.2, 1, len(y_columns)))
-    # Plot each y_column as a separate line with a different color
-    for i, y_column in enumerate(y_columns):
-        df.plot(x=x_column, y=y_column, ax=ax, color=colors[i], label=y_column, linewidth=.5)
-    # Rotate x-axis labels
-    ax.set_xticklabels(ax.get_xticklabels(), rotation=30, ha='right')
-    # Format x_column as date if it is
-    if np.issubdtype(df[x_column].dtype, np.datetime64) or np.issubdtype(df[x_column].dtype, np.timedelta64):
-        df[x_column] = pd.to_datetime(df[x_column]).dt.date
-    # Set title, labels, and legend
-    ax.set_title(title or f'{", ".join(y_columns)} over {x_column}', color=color, fontweight='bold')
-    ax.set_xlabel(x_column, color=color)
-    ax.set_ylabel(', '.join(y_columns), color=color)
-    ax.spines['bottom'].set_color('orange')
-    ax.spines['top'].set_color('orange')
-    ax.spines['right'].set_color('orange')
-    ax.spines['left'].set_color('orange')
-    ax.xaxis.label.set_color('orange')
-    ax.yaxis.label.set_color('orange')
-    ax.tick_params(axis='x', colors='orange')
-    ax.tick_params(axis='y', colors='orange')
-    ax.title.set_color('orange')
-    ax.legend(loc='upper right', bbox_to_anchor=(1, 1), facecolor='black', framealpha=.4, labelcolor='orange', edgecolor='orange')
-    # Remove background
-    fig.patch.set_alpha(0)
-    ax.patch.set_alpha(0)
-    return fig
-def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=None):
-    fig, ax = plt.subplots(figsize=figsize)
-    sns.barplot(data=df, x=x_column, y=y_column, color=color, ax=ax)
-    ax.set_title(title if title else f'{y_column} by {x_column}', color=color, fontweight='bold')
-    ax.set_xlabel(x_column, color=color)
-    ax.set_ylabel(y_column, color=color)
-    ax.tick_params(axis='x', colors=color)
-    ax.tick_params(axis='y', colors=color)
-    # Remove background
-    fig.patch.set_alpha(0)
-    ax.patch.set_alpha(0)
-    ax.spines['bottom'].set_color('orange')
-    ax.spines['top'].set_color('orange')
-    ax.spines['right'].set_color('orange')
-    ax.spines['left'].set_color('orange')
-    ax.xaxis.label.set_color('orange')
-    ax.yaxis.label.set_color('orange')
-    ax.tick_params(axis='x', colors='orange')
-    ax.tick_params(axis='y', colors='orange')
-    ax.title.set_color('orange')
-    ax.legend(loc='upper right', bbox_to_anchor=(1, 1), facecolor='black', framealpha=.4, labelcolor='orange', edgecolor='orange')
-    return fig
-def plot_grouped_bar(df, x_columns, y_column, figsize=(12, 10), colors=None, title=None):
-    fig, ax = plt.subplots(figsize=figsize)
-    width = 0.8 / len(x_columns)  # the width of the bars
-    x = np.arange(len(df))  # the label locations
-    for i, x_column in enumerate(x_columns):
-        sns.barplot(data=df, x=x, y=y_column, color=colors[i] if colors else None, ax=ax, width=width, label=x_column)
-        x += width  # add the width of the bar to the x position for the next bar
-    ax.set_title(title if title else f'{y_column} by {", ".join(x_columns)}', color='orange', fontweight='bold')
-    ax.set_xlabel('Groups', color='orange')
-    ax.set_ylabel(y_column, color='orange')
-    ax.set_xticks(x - width * len(x_columns) / 2)
-    ax.set_xticklabels(df.index)
-    ax.tick_params(axis='x', colors='orange')
-    ax.tick_params(axis='y', colors='orange')
-    # Remove background
-    fig.patch.set_alpha(0)
-    ax.patch.set_alpha(0)
-    ax.spines['bottom'].set_color('orange')
-    ax.spines['top'].set_color('orange')
-    ax.spines['right'].set_color('orange')
-    ax.spines['left'].set_color('orange')
-    ax.xaxis.label.set_color('orange')
-    ax.yaxis.label.set_color('orange')
-    ax.title.set_color('orange')
-    ax.legend(loc='upper right', bbox_to_anchor=(1, 1), facecolor='black', framealpha=.4, labelcolor='orange', edgecolor='orange')
-    return fig
-def generate_kepler_map(data):
-    map_config = keplergl(data, height=400)
-    return map_config
-def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
-    """
-    Adds a UI on top of a dataframe to let viewers filter columns
-    Args:
-        df (pd.DataFrame): Original dataframe
-    Returns:
-        pd.DataFrame: Filtered dataframe
-    """
-    title_font = "Arial"
-    body_font = "Arial"
-    title_size = 32
-    colors = ["red", "green", "blue"]
-    interpretation = False
-    extract_docx = False
-    title = "My Chart"
-    regex = ".*"
-    img_path = 'default_image.png'
-    #try:
-    #    modify = st.checkbox("Add filters on raw data")
-    #except:
-    #    try:
-    #        modify = st.checkbox("Add filters on processed data")
-    #    except:
-    #        try:
-    #            modify = st.checkbox("Add filters on parsed data")
-    #        except:
-    #            pass
-    #if not modify:
-    #    return df
-    df_ = df.copy()
-    # Try to convert datetimes into a standard format (datetime, no timezone)
-#modification_container = st.container()
-#with modification_container:
-    try:
-        to_filter_columns = st.multiselect("Filter dataframe on", df_.columns)
-    except:
-        try:
-            to_filter_columns = st.multiselect("Filter dataframe", df_.columns)
-        except:
-            try:
-                to_filter_columns = st.multiselect("Filter the dataframe on", df_.columns)
-            except:
-                pass
-    date_column = None
-    filtered_columns = []
-    for column in to_filter_columns:
-        left, right = st.columns((1, 20))
-        # Treat columns with < 200 unique values as categorical if not date or numeric
-        if is_categorical_dtype(df_[column]) or (df_[column].nunique() < 120 and not is_datetime64_any_dtype(df_[column]) and not is_numeric_dtype(df_[column])):
-            user_cat_input = right.multiselect(
-                f"Values for {column}",
-                df_[column].value_counts().index.tolist(),
-                default=list(df_[column].value_counts().index)
-            )
-            df_ = df_[df_[column].isin(user_cat_input)]
-            filtered_columns.append(column)
-            with st.status(f"Category Distribution: {column}", expanded=False) as stat:
-                st.pyplot(plot_treemap(df_, column))
-        elif is_numeric_dtype(df_[column]):
-            _min = float(df_[column].min())
-            _max = float(df_[column].max())
-            step = (_max - _min) / 100
-            user_num_input = right.slider(
-                f"Values for {column}",
-                min_value=_min,
-                max_value=_max,
-                value=(_min, _max),
-                step=step,
-            )
-            df_ = df_[df_[column].between(*user_num_input)]
-            filtered_columns.append(column)
-            # Chart_GPT = ChartGPT(df_, title_font, body_font, title_size,
-            #      colors, interpretation, extract_docx, img_path)
-            with st.status(f"Numerical Distribution: {column}", expanded=False) as stat_:
-                st.pyplot(plot_hist(df_, column, bins=int(round(len(df_[column].unique())-1)/2)))
-        elif is_object_dtype(df_[column]):
-            try:
-                df_[column] = pd.to_datetime(df_[column], infer_datetime_format=True, errors='coerce')
-            except Exception:
-                try:
-                    df_[column] = df_[column].apply(parser.parse)
-                except Exception:
-                    pass
-            if is_datetime64_any_dtype(df_[column]):
-                df_[column] = df_[column].dt.tz_localize(None)
-                min_date = df_[column].min().date()
-                max_date = df_[column].max().date()
-                user_date_input = right.date_input(
-                    f"Values for {column}",
-                    value=(min_date, max_date),
-                    min_value=min_date,
-                    max_value=max_date,
-                )
-                # if len(user_date_input) == 2:
-                #     start_date, end_date = user_date_input
-                #     df_ = df_.loc[df_[column].dt.date.between(start_date, end_date)]
-                if len(user_date_input) == 2:
-                    user_date_input = tuple(map(pd.to_datetime, user_date_input))
-                    start_date, end_date = user_date_input
-                    df_ = df_.loc[df_[column].between(start_date, end_date)]
-                date_column = column
-                if date_column and filtered_columns:
-                    numeric_columns = [col for col in filtered_columns if is_numeric_dtype(df_[col])]
-                    if numeric_columns:
-                        fig = plot_line(df_, date_column, numeric_columns)
-                        #st.pyplot(fig)
-                    # now to deal with categorical columns
-                    categorical_columns = [col for col in filtered_columns if is_categorical_dtype(df_[col])]
-                    if categorical_columns:
-                        fig2 = plot_bar(df_, date_column, categorical_columns[0])
-                        #st.pyplot(fig2)
-                    with st.status(f"Date Distribution: {column}", expanded=False) as stat:
-                        try:
-                            st.pyplot(fig)
-                        except Exception as e:
-                            st.error(f"Error plotting line chart: {e}")
-                            pass
-                        try:
-                            st.pyplot(fig2)
-                        except Exception as e:
-                            st.error(f"Error plotting bar chart: {e}")
-        else:
-            user_text_input = right.text_input(
-                f"Substring or regex in {column}",
-            )
-            if user_text_input:
-                df_ = df_[df_[column].astype(str).str.contains(user_text_input)]
-    # write len of df after filtering with % of original
-    st.write(f"{len(df_)} rows ({len(df_) / len(df) * 100:.2f}%)")
-    return df_
-def find_lat_lon_columns(df):
-    lat_columns = df.columns[df.columns.str.lower().str.contains('lat')]
-    lon_columns = df.columns[df.columns.str.lower().str.contains('lon|lng')]
-    if len(lat_columns) > 0 and len(lon_columns) > 0:
-        return lat_columns[0], lon_columns[0]
-    else:
-        return None, None
-my_dataset = st.file_uploader("Upload Parsed DataFrame", type=["csv", "xlsx"])
-map_1 = KeplerGl(height=800)
-powerplant = pd.read_csv('global_power_plant_database.csv')
-secret_bases = pd.read_csv('secret_bases.csv')
-map_1.add_data(
-            data=secret_bases, name="secret_bases"
-        )
-map_1.add_data(
-        data=powerplant, name='nuclear_powerplants'
-        )
-if my_dataset is not None :
-    try:
-        if my_dataset.type == "text/csv":
-            data = pd.read_csv(my_dataset)
-        elif my_dataset.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
-            data = pd.read_excel(my_dataset)
-        else:
-            st.error("Unsupported file type. Please upload a CSV, Excel or HD5 file.")
-            st.stop()
-        parser = filter_dataframe(data)
-        st.session_state['parsed_responses'] = parser
-        st.dataframe(parser)
-        st.success(f"Successfully loaded and displayed data from {my_dataset.name}")
-        #h3_hex_id_df = pd.read_csv("keplergl/h3_data.csv")
-        st.session_state['data_loaded'] = True
-        # Load the base config
-        with open('military_config.kgl', 'r') as f:
-            base_config = json.load(f)
-        with open('uap_config.kgl', 'r') as f:
-            uap_config = json.load(f)
-        if parser.columns.str.contains('date').any():
-            # Get the date column name
-            date_column = parser.columns[parser.columns.str.contains('date')].values[0]
-            # Create a new filter
-            new_filter = {
-                "dataId": "uap_sightings",
-                "name": date_column
-            }
-            # Append the new filter to the existing filters
-            base_config['config']['visState']['filters'].append(new_filter)
-            # Update the map config
-            map_1.config = base_config
-        map_1.add_data(
-            data=parser, name="uap_sightings"
-            )
-        # Find the latitude and longitude columns in the dataframe
-        lat_col, lon_col = find_lat_lon_columns(parser)
-        if lat_col and lon_col:
-            # Update the layer configurations
-            for layer in uap_config['config']['visState']['layers']:
-                if 'config' in layer and 'columns' in layer['config']:
-                    if 'lat' in layer['config']['columns']:
-                        layer['config']['columns']['lat'] = lat_col
-                    if 'lng' in layer['config']['columns']:
-                        layer['config']['columns']['lng'] = lon_col
-            # Now extend the base_config with the updated uap_config layers
-            base_config['config']['visState']['layers'].extend(uap_config['config']['visState']['layers'])
-            map_1.config = base_config
-        else:
-            base_config['config']['visState']['layers'].extend([layer for layer in uap_config['config']['visState']['layers']])
-            map_1.config = base_config
-        keplergl_static(map_1, center_map=True)
-        st.session_state['map_generated'] = True
-    except Exception as e:
-        st.error(f"An error occurred while reading the file: {e}")
-else:
-    st.warning("Please upload a file to get started.")

+import json
+import streamlit as st
+#import geopandas as gpd
+from keplergl import keplergl
+import pandas as pd
+import streamlit as st
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from uap_analyzer import UAPParser, UAPAnalyzer, UAPVisualizer
+# import ChartGen
+# from ChartGen import ChartGPT
+from Levenshtein import distance
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import confusion_matrix
+from stqdm import stqdm
+stqdm.pandas()
+import streamlit.components.v1 as components
+from dateutil import parser
+from sentence_transformers import SentenceTransformer
+import torch
+import squarify
+import matplotlib.colors as mcolors
+import textwrap
+import datamapplot
+from streamlit_extras.stateful_button import button as stateful_button
+from streamlit_keplergl import keplergl_static
+from keplergl import KeplerGl
+st.set_option('deprecation.showPyplotGlobalUse', False)
+from pandas.api.types import (
+    is_categorical_dtype,
+    is_datetime64_any_dtype,
+    is_numeric_dtype,
+    is_object_dtype,
+)
+st.title('Interactive Map')
+# Initialize session state
+if 'analyzers' not in st.session_state:
+    st.session_state['analyzers'] = []
+if 'col_names' not in st.session_state:
+    st.session_state['col_names'] = []
+if 'clusters' not in st.session_state:
+    st.session_state['clusters'] = {}
+if 'new_data' not in st.session_state:
+    st.session_state['new_data'] = pd.DataFrame()
+if 'dataset' not in st.session_state:
+    st.session_state['dataset'] = pd.DataFrame()
+if 'data_processed' not in st.session_state:
+    st.session_state['data_processed'] = False
+if 'stage' not in st.session_state:
+    st.session_state['stage'] = 0
+if 'filtered_data' not in st.session_state:
+    st.session_state['filtered_data'] = None
+if 'gemini_answer' not in st.session_state:
+    st.session_state['gemini_answer'] = None
+if 'parsed_responses' not in st.session_state:
+    st.session_state['parsed_responses'] = None
+if 'map_generated' not in st.session_state:
+    st.session_state['map_generated'] = False
+if 'date_loaded' not in st.session_state:
+    st.session_state['data_loaded'] = False
+if "datasets" not in st.session_state:
+    st.session_state.datasets = []
+# sf_zip_geo_gdf = gpd.read_file("sf_zip_geo.geojson")
+# sf_zip_geo_gdf.label = "SF Zip Geo"
+# sf_zip_geo_gdf.id = "sf-zip-geo"
+# st.session_state.datasets.append(sf_zip_geo_gdf)
+def plot_treemap(df, column, top_n=32):
+        # Get the value counts and the top N labels
+        value_counts = df[column].value_counts()
+        top_labels = value_counts.iloc[:top_n].index
+        # Use np.where to replace all values not in the top N with 'Other'
+        revised_column = f'{column}_revised'
+        df[revised_column] = np.where(df[column].isin(top_labels), df[column], 'Other')
+        # Get the value counts including the 'Other' category
+        sizes = df[revised_column].value_counts().values
+        labels = df[revised_column].value_counts().index
+        # Get a gradient of colors
+        # colors = list(mcolors.TABLEAU_COLORS.values())
+        n_colors = len(sizes)
+        colors = plt.cm.Oranges(np.linspace(0.3, 0.9, n_colors))[::-1]
+        # Get % of each category
+        percents = sizes / sizes.sum()
+        # Prepare labels with percentages
+        labels = [f'{label}\n {percent:.1%}' for label, percent in zip(labels, percents)]
+        fig, ax = plt.subplots(figsize=(20, 12))
+        # Plot the treemap
+        squarify.plot(sizes=sizes, label=labels, alpha=0.7, pad=True, color=colors, text_kwargs={'fontsize': 10})
+        ax = plt.gca()
+        # Iterate over text elements and rectangles (patches) in the axes for color adjustment
+        for text, rect in zip(ax.texts, ax.patches):
+            background_color = rect.get_facecolor()
+            r, g, b, _ = mcolors.to_rgba(background_color)
+            brightness = np.average([r, g, b])
+            text.set_color('white' if brightness < 0.5 else 'black')
+            # Adjust font size based on rectangle's area and wrap long text
+            coef = 0.8
+            font_size = np.sqrt(rect.get_width() * rect.get_height()) * coef
+            text.set_fontsize(font_size)
+            wrapped_text = textwrap.fill(text.get_text(), width=20)
+            text.set_text(wrapped_text)
+        plt.axis('off')
+        plt.gca().invert_yaxis()
+        plt.gcf().set_size_inches(20, 12)
+        fig.patch.set_alpha(0)
+        ax.patch.set_alpha(0)
+        return fig
+def plot_hist(df, column, bins=10, kde=True):
+        fig, ax = plt.subplots(figsize=(12, 6))
+        sns.histplot(data=df, x=column, kde=True, bins=bins,color='orange')
+        # set the ticks and frame in orange
+        ax.spines['bottom'].set_color('orange')
+        ax.spines['top'].set_color('orange')
+        ax.spines['right'].set_color('orange')
+        ax.spines['left'].set_color('orange')
+        ax.xaxis.label.set_color('orange')
+        ax.yaxis.label.set_color('orange')
+        ax.tick_params(axis='x', colors='orange')
+        ax.tick_params(axis='y', colors='orange')
+        ax.title.set_color('orange')
+        # Set transparent background
+        fig.patch.set_alpha(0)
+        ax.patch.set_alpha(0)
+        return fig
+def plot_line(df, x_column, y_columns, figsize=(12, 10), color='orange', title=None, rolling_mean_value=2):
+    import matplotlib.cm as cm
+    # Sort the dataframe by the date column
+    df = df.sort_values(by=x_column)
+    # Calculate rolling mean for each y_column
+    if rolling_mean_value:
+        df[y_columns] = df[y_columns].rolling(len(df) // rolling_mean_value).mean()
+    # Create the plot
+    fig, ax = plt.subplots(figsize=figsize)
+    colors = cm.Oranges(np.linspace(0.2, 1, len(y_columns)))
+    # Plot each y_column as a separate line with a different color
+    for i, y_column in enumerate(y_columns):
+        df.plot(x=x_column, y=y_column, ax=ax, color=colors[i], label=y_column, linewidth=.5)
+    # Rotate x-axis labels
+    ax.set_xticklabels(ax.get_xticklabels(), rotation=30, ha='right')
+    # Format x_column as date if it is
+    if np.issubdtype(df[x_column].dtype, np.datetime64) or np.issubdtype(df[x_column].dtype, np.timedelta64):
+        df[x_column] = pd.to_datetime(df[x_column]).dt.date
+    # Set title, labels, and legend
+    ax.set_title(title or f'{", ".join(y_columns)} over {x_column}', color=color, fontweight='bold')
+    ax.set_xlabel(x_column, color=color)
+    ax.set_ylabel(', '.join(y_columns), color=color)
+    ax.spines['bottom'].set_color('orange')
+    ax.spines['top'].set_color('orange')
+    ax.spines['right'].set_color('orange')
+    ax.spines['left'].set_color('orange')
+    ax.xaxis.label.set_color('orange')
+    ax.yaxis.label.set_color('orange')
+    ax.tick_params(axis='x', colors='orange')
+    ax.tick_params(axis='y', colors='orange')
+    ax.title.set_color('orange')
+    ax.legend(loc='upper right', bbox_to_anchor=(1, 1), facecolor='black', framealpha=.4, labelcolor='orange', edgecolor='orange')
+    # Remove background
+    fig.patch.set_alpha(0)
+    ax.patch.set_alpha(0)
+    return fig
+def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=None):
+    fig, ax = plt.subplots(figsize=figsize)
+    sns.barplot(data=df, x=x_column, y=y_column, color=color, ax=ax)
+    ax.set_title(title if title else f'{y_column} by {x_column}', color=color, fontweight='bold')
+    ax.set_xlabel(x_column, color=color)
+    ax.set_ylabel(y_column, color=color)
+    ax.tick_params(axis='x', colors=color)
+    ax.tick_params(axis='y', colors=color)
+    # Remove background
+    fig.patch.set_alpha(0)
+    ax.patch.set_alpha(0)
+    ax.spines['bottom'].set_color('orange')
+    ax.spines['top'].set_color('orange')
+    ax.spines['right'].set_color('orange')
+    ax.spines['left'].set_color('orange')
+    ax.xaxis.label.set_color('orange')
+    ax.yaxis.label.set_color('orange')
+    ax.tick_params(axis='x', colors='orange')
+    ax.tick_params(axis='y', colors='orange')
+    ax.title.set_color('orange')
+    ax.legend(loc='upper right', bbox_to_anchor=(1, 1), facecolor='black', framealpha=.4, labelcolor='orange', edgecolor='orange')
+    return fig
+def plot_grouped_bar(df, x_columns, y_column, figsize=(12, 10), colors=None, title=None):
+    fig, ax = plt.subplots(figsize=figsize)
+    width = 0.8 / len(x_columns)  # the width of the bars
+    x = np.arange(len(df))  # the label locations
+    for i, x_column in enumerate(x_columns):
+        sns.barplot(data=df, x=x, y=y_column, color=colors[i] if colors else None, ax=ax, width=width, label=x_column)
+        x += width  # add the width of the bar to the x position for the next bar
+    ax.set_title(title if title else f'{y_column} by {", ".join(x_columns)}', color='orange', fontweight='bold')
+    ax.set_xlabel('Groups', color='orange')
+    ax.set_ylabel(y_column, color='orange')
+    ax.set_xticks(x - width * len(x_columns) / 2)
+    ax.set_xticklabels(df.index)
+    ax.tick_params(axis='x', colors='orange')
+    ax.tick_params(axis='y', colors='orange')
+    # Remove background
+    fig.patch.set_alpha(0)
+    ax.patch.set_alpha(0)
+    ax.spines['bottom'].set_color('orange')
+    ax.spines['top'].set_color('orange')
+    ax.spines['right'].set_color('orange')
+    ax.spines['left'].set_color('orange')
+    ax.xaxis.label.set_color('orange')
+    ax.yaxis.label.set_color('orange')
+    ax.title.set_color('orange')
+    ax.legend(loc='upper right', bbox_to_anchor=(1, 1), facecolor='black', framealpha=.4, labelcolor='orange', edgecolor='orange')
+    return fig
+def generate_kepler_map(data):
+    map_config = keplergl(data, height=400)
+    return map_config
+def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Adds a UI on top of a dataframe to let viewers filter columns
+    Args:
+        df (pd.DataFrame): Original dataframe
+    Returns:
+        pd.DataFrame: Filtered dataframe
+    """
+    title_font = "Arial"
+    body_font = "Arial"
+    title_size = 32
+    colors = ["red", "green", "blue"]
+    interpretation = False
+    extract_docx = False
+    title = "My Chart"
+    regex = ".*"
+    img_path = 'default_image.png'
+    #try:
+    #    modify = st.checkbox("Add filters on raw data")
+    #except:
+    #    try:
+    #        modify = st.checkbox("Add filters on processed data")
+    #    except:
+    #        try:
+    #            modify = st.checkbox("Add filters on parsed data")
+    #        except:
+    #            pass
+    #if not modify:
+    #    return df
+    df_ = df.copy()
+    # Try to convert datetimes into a standard format (datetime, no timezone)
+#modification_container = st.container()
+#with modification_container:
+    try:
+        to_filter_columns = st.multiselect("Filter dataframe on", df_.columns)
+    except:
+        try:
+            to_filter_columns = st.multiselect("Filter dataframe", df_.columns)
+        except:
+            try:
+                to_filter_columns = st.multiselect("Filter the dataframe on", df_.columns)
+            except:
+                pass
+    date_column = None
+    filtered_columns = []
+    for column in to_filter_columns:
+        left, right = st.columns((1, 20))
+        # Treat columns with < 200 unique values as categorical if not date or numeric
+        if is_categorical_dtype(df_[column]) or (df_[column].nunique() < 120 and not is_datetime64_any_dtype(df_[column]) and not is_numeric_dtype(df_[column])):
+            user_cat_input = right.multiselect(
+                f"Values for {column}",
+                df_[column].value_counts().index.tolist(),
+                default=list(df_[column].value_counts().index)
+            )
+            df_ = df_[df_[column].isin(user_cat_input)]
+            filtered_columns.append(column)
+            with st.status(f"Category Distribution: {column}", expanded=False) as stat:
+                st.pyplot(plot_treemap(df_, column))
+        elif is_numeric_dtype(df_[column]):
+            _min = float(df_[column].min())
+            _max = float(df_[column].max())
+            step = (_max - _min) / 100
+            user_num_input = right.slider(
+                f"Values for {column}",
+                min_value=_min,
+                max_value=_max,
+                value=(_min, _max),
+                step=step,
+            )
+            df_ = df_[df_[column].between(*user_num_input)]
+            filtered_columns.append(column)
+            # Chart_GPT = ChartGPT(df_, title_font, body_font, title_size,
+            #      colors, interpretation, extract_docx, img_path)
+            with st.status(f"Numerical Distribution: {column}", expanded=False) as stat_:
+                st.pyplot(plot_hist(df_, column, bins=int(round(len(df_[column].unique())-1)/2)))
+        elif is_object_dtype(df_[column]):
+            try:
+                df_[column] = pd.to_datetime(df_[column], infer_datetime_format=True, errors='coerce')
+            except Exception:
+                try:
+                    df_[column] = df_[column].apply(parser.parse)
+                except Exception:
+                    pass
+            if is_datetime64_any_dtype(df_[column]):
+                df_[column] = df_[column].dt.tz_localize(None)
+                min_date = df_[column].min().date()
+                max_date = df_[column].max().date()
+                user_date_input = right.date_input(
+                    f"Values for {column}",
+                    value=(min_date, max_date),
+                    min_value=min_date,
+                    max_value=max_date,
+                )
+                # if len(user_date_input) == 2:
+                #     start_date, end_date = user_date_input
+                #     df_ = df_.loc[df_[column].dt.date.between(start_date, end_date)]
+                if len(user_date_input) == 2:
+                    user_date_input = tuple(map(pd.to_datetime, user_date_input))
+                    start_date, end_date = user_date_input
+                    df_ = df_.loc[df_[column].between(start_date, end_date)]
+                date_column = column
+                df_[column] = df_[column].dt.strftime('%Y-%m-%d %H:%M:%S')
+                if date_column and filtered_columns:
+                    numeric_columns = [col for col in filtered_columns if is_numeric_dtype(df_[col])]
+                    if numeric_columns:
+                        fig = plot_line(df_, date_column, numeric_columns)
+                        #st.pyplot(fig)
+                    # now to deal with categorical columns
+                    categorical_columns = [col for col in filtered_columns if is_categorical_dtype(df_[col])]
+                    if categorical_columns:
+                        fig2 = plot_bar(df_, date_column, categorical_columns[0])
+                        #st.pyplot(fig2)
+                    with st.status(f"Date Distribution: {column}", expanded=False) as stat:
+                        try:
+                            st.pyplot(fig)
+                        except Exception as e:
+                            st.error(f"Error plotting line chart: {e}")
+                            pass
+                        try:
+                            st.pyplot(fig2)
+                        except Exception as e:
+                            st.error(f"Error plotting bar chart: {e}")
+        else:
+            user_text_input = right.text_input(
+                f"Substring or regex in {column}",
+            )
+            if user_text_input:
+                df_ = df_[df_[column].astype(str).str.contains(user_text_input)]
+    # write len of df after filtering with % of original
+    st.write(f"{len(df_)} rows ({len(df_) / len(df) * 100:.2f}%)")
+    return df_
+def find_lat_lon_columns(df):
+    lat_columns = df.columns[df.columns.str.lower().str.contains('lat')]
+    lon_columns = df.columns[df.columns.str.lower().str.contains('lon|lng')]
+    if len(lat_columns) > 0 and len(lon_columns) > 0:
+        return lat_columns[0], lon_columns[0]
+    else:
+        return None, None
+my_dataset = st.file_uploader("Upload Parsed DataFrame", type=["csv", "xlsx"])
+map_1 = KeplerGl(height=800)
+powerplant = pd.read_csv('global_power_plant_database.csv')
+secret_bases = pd.read_csv('secret_bases.csv')
+map_1.add_data(
+            data=secret_bases, name="secret_bases"
+        )
+map_1.add_data(
+        data=powerplant, name='nuclear_powerplants'
+        )
+if my_dataset is not None :
+    try:
+        if my_dataset.type == "text/csv":
+            data = pd.read_csv(my_dataset)
+        elif my_dataset.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
+            data = pd.read_excel(my_dataset)
+        else:
+            st.error("Unsupported file type. Please upload a CSV, Excel or HD5 file.")
+            st.stop()
+        parser = filter_dataframe(data)
+        st.session_state['parsed_responses'] = parser
+        st.dataframe(parser)
+        st.success(f"Successfully loaded and displayed data from {my_dataset.name}")
+        #h3_hex_id_df = pd.read_csv("keplergl/h3_data.csv")
+        st.session_state['data_loaded'] = True
+        # Load the base config
+        with open('military_config.kgl', 'r') as f:
+            base_config = json.load(f)
+        with open('uap_config.kgl', 'r') as f:
+            uap_config = json.load(f)
+        if parser.columns.str.contains('date').any():
+            # Get the date column name
+            date_column = parser.columns[parser.columns.str.contains('date')].values[0]
+            # Create a new filter
+            new_filter = {
+                "dataId": "uap_sightings",
+                "name": date_column
+            }
+            # Append the new filter to the existing filters
+            base_config['config']['visState']['filters'].append(new_filter)
+            # Update the map config
+            map_1.config = base_config
+        map_1.add_data(
+            data=parser, name="uap_sightings"
+            )
+        # Find the latitude and longitude columns in the dataframe
+        lat_col, lon_col = find_lat_lon_columns(parser)
+        if lat_col and lon_col:
+            # Update the layer configurations
+            for layer in uap_config['config']['visState']['layers']:
+                if 'config' in layer and 'columns' in layer['config']:
+                    if 'lat' in layer['config']['columns']:
+                        layer['config']['columns']['lat'] = lat_col
+                    if 'lng' in layer['config']['columns']:
+                        layer['config']['columns']['lng'] = lon_col
+            # Now extend the base_config with the updated uap_config layers
+            base_config['config']['visState']['layers'].extend(uap_config['config']['visState']['layers'])
+            map_1.config = base_config
+        else:
+            base_config['config']['visState']['layers'].extend([layer for layer in uap_config['config']['visState']['layers']])
+            map_1.config = base_config
+        keplergl_static(map_1, center_map=True)
+        st.session_state['map_generated'] = True
+    except Exception as e:
+        st.error(f"An error occurred while reading the file: {e}")
+else:
+    st.warning("Please upload a file to get started.")