Spaces:

klinic-hackupc
/

klinic

Sleeping

App Files Files Community

ACMCMC commited on May 5, 2024

Commit

1b1c01c

1 Parent(s): 3276764

WIP

Browse files

Files changed (6) hide show

MATLAB/main.m +1 -1
MATLAB/visualize_app.mlapp +0 -0
MATLAB/visualize_connectedNodes_continuous.m +1 -1
app.py +55 -48
llm_res.py +21 -21
utils.py +3 -2

MATLAB/main.m CHANGED Viewed

@@ -15,7 +15,7 @@ function display_elementsForKey(connectionsMap, key)
     end
 end
-data = readtable('MGREL.RRF', Delimiter='|', FileType='text', NumHeaderLines=0, VariableNamingRule='preserve');
 data = renamevars(data,"#CUI1","CUI1");
 data = data(1:1000,:);

     end
 end
+data = readtable('../MGREL.RRF', Delimiter='|', FileType='text', NumHeaderLines=0, VariableNamingRule='preserve');
 data = renamevars(data,"#CUI1","CUI1");
 data = data(1:1000,:);

MATLAB/visualize_app.mlapp CHANGED Viewed

Binary files a/MATLAB/visualize_app.mlapp and b/MATLAB/visualize_app.mlapp differ

MATLAB/visualize_connectedNodes_continuous.m CHANGED Viewed

@@ -1,6 +1,6 @@
 function visualize_connectedNodes_continuous()
     % Read the data and create the connections map
-    data = readtable('MGREL.RRF', 'Delimiter', '|', 'FileType', 'text', 'NumHeaderLines', 0, 'VariableNamingRule', 'preserve');
     data = renamevars(data, '#CUI1', 'CUI1');
     data = data(1:10000,:);

 function visualize_connectedNodes_continuous()
     % Read the data and create the connections map
+    data = readtable('../MGREL.RRF', 'Delimiter', '|', 'FileType', 'text', 'NumHeaderLines', 0, 'VariableNamingRule', 'preserve');
     data = renamevars(data, '#CUI1', 'CUI1');
     data = data(1:10000,:);

app.py CHANGED Viewed

@@ -105,15 +105,17 @@ with st.container():
             status.divider()
             # 7. Use an LLM to get a summary of the clinical trials, in plain text format.
             status.write("Getting a summary of the clinical trials...")
-            response, stats_dict = get_short_summary_out_of_json_files(json_of_clinical_trials)
             disease_overview = response
-            status.write(f'Response from LLM summarization: {response}')
             # 8. Use an LLM to extract numerical data from the clinical trials (e.g. number of patients, number of deaths, etc.). Get summary statistics out of that.
-            status.write("Getting summary statistics of the clinical trials...")
-            #response = tagging_insights_from_json(json_of_clinical_trials)
-            response = ""
-            print(f'Response from LLM tagging: {response}')
-            status.write(f'Response from LLM tagging: {response}')
             # 9. Show the results to the user: graph of the diseases chosen, summary of the clinical trials, summary statistics of the clinical trials, and list of the details of the clinical trials considered
             status.update(label="Done!", state="complete")
             status.balloons()
@@ -187,52 +189,57 @@ with st.container():
             with tabs[i]:
                 render_trial_details(trials[i])
-chosen_disease_name = st.selectbox(
-    "Choose a disease",
-    get_all_diseases_name(engine))
-st.write("You selected:", chosen_disease_name)
-chosen_disease_uri = get_uri_from_name(engine, chosen_disease_name)
-nodes = []
-edges = []
-nodes.append( Node(id=chosen_disease_uri,
-    label=chosen_disease_name,
-    size=25,
-    shape="circular")
-)
-similar_diseases = get_most_similar_diseases_from_uri(engine, chosen_disease_uri, threshold=0.6)
-print(similar_diseases)
-for uri, name, weight in similar_diseases:
-    nodes.append( Node(id=uri,
-        label=name,
         size=25,
         shape="circular")
     )
-    print(True if float(weight) > 0.7 else False)
-    edges.append( Edge(source=chosen_disease_uri,
-                   target=uri,
-                   color="red" if float(weight) > 0.7 else "blue",
-                   weight=float(weight)**10,
-                   type="CURVE_SMOOTH"
-                #    type="STRAIGHT"
-                   )
-            )
-config = Config(width=750,
-                height=950,
-                directed=False,
-                physics=True,
-                hierarchical=False,
-                collapsible=False,
-                # **kwargs
-                )
-return_value = agraph(nodes=nodes,
-                      edges=edges,
-                      config=config)

             status.divider()
             # 7. Use an LLM to get a summary of the clinical trials, in plain text format.
             status.write("Getting a summary of the clinical trials...")
+            response = get_short_summary_out_of_json_files(json_of_clinical_trials)
             disease_overview = response
+            try:
             # 8. Use an LLM to extract numerical data from the clinical trials (e.g. number of patients, number of deaths, etc.). Get summary statistics out of that.
+                status.write("Getting summary statistics of the clinical trials...")
+                response = tagging_insights_from_json(json_of_clinical_trials)
+                print(f'Response from LLM tagging: {response}')
+                status.write(f'Response from LLM tagging: {response}')
+            except Exception as e:
+                print(f'Error while extracting numerical data from the clinical trials: {e}')
+                status.warning(f'Error while extracting numerical data from the clinical trials. This information will not be shown.')
             # 9. Show the results to the user: graph of the diseases chosen, summary of the clinical trials, summary statistics of the clinical trials, and list of the details of the clinical trials considered
             status.update(label="Done!", state="complete")
             status.balloons()
             with tabs[i]:
                 render_trial_details(trials[i])
+show_graph_of_all_diseases = False
+if show_graph_of_all_diseases:
+    # If disease_names is not defined, define it
+    if "disease_names" not in st.session_state:
+        st.session_state.disease_names = get_all_diseases_name(engine)
+    chosen_disease_name = st.selectbox(
+        "Choose a disease",
+        st.session_state.disease_names,
+        )
+    st.write("You selected:", chosen_disease_name)
+    chosen_disease_uri = get_uri_from_name(engine, chosen_disease_name)
+    nodes = []
+    edges = []
+    nodes.append( Node(id=chosen_disease_uri,
+        label=chosen_disease_name,
         size=25,
         shape="circular")
     )
+    similar_diseases = get_most_similar_diseases_from_uri(engine, chosen_disease_uri, threshold=0.6)
+    print(similar_diseases)
+    for uri, name, weight in similar_diseases:
+        nodes.append( Node(id=uri,
+            label=name,
+            size=25,
+            shape="circular")
+        )
+        print(True if float(weight) > 0.7 else False)
+        edges.append( Edge(source=chosen_disease_uri,
+                    target=uri,
+                    color="red" if float(weight) > 0.7 else "blue",
+                    weight=float(weight)**10,
+                    type="CURVE_SMOOTH"
+                    #    type="STRAIGHT"
+                    )
+                )
+    config = Config(width=750,
+                    height=950,
+                    directed=False,
+                    physics=True,
+                    hierarchical=False,
+                    collapsible=False,
+                    # **kwargs
+                    )
+    return_value = agraph(nodes=nodes,
+                        edges=edges,
+                        config=config)

llm_res.py CHANGED Viewed

@@ -301,7 +301,7 @@ def tagging_insights_from_json(data_json):
     Extract the desired information from the following JSON data.
-    Only extract the properties mentioned in the 'Classification' function.
     JSON data:
     {input}
@@ -317,20 +317,20 @@ def tagging_insights_from_json(data_json):
         # status: list = Field(
         #     description="Extract the status of all the clinical trials"
         # )
-        keywords: list = Field(
-           description="Extract the most relevant keywords for each clinical trials"
-        )
         # interventions: list = Field(
         #     description="describe the interventions for each clinical trial using title, name and description"
         # )
-        primary_outcomes: list = Field(
-            description="get the timeframe of each clinical trial"
-        )
-        secondary_outcomes: list= Field(description= "get the secondary outcomes of each clinical trial")
-        eligibility: list = Field(
-           description="get the timeframe of each clinical trial"
-        )
-        healthy_volunteers: list= Field(description= "determine whether the clinical trial requires healthy volunteers")
         minimum_age: list = Field(
            description="get the minimum age from each experiment"
         )
@@ -343,12 +343,12 @@ def tagging_insights_from_json(data_json):
             return {
                 # "project_title": self.project_title,
                 # "status": self.status,
-                "keywords": self.keywords,
                 # "interventions": self.interventions,
-                "primary_outcomes": self.primary_outcomes,
-                "secondary_outcomes": self.secondary_outcomes,
                 # "eligibility": self.eligibility,
-                "healthy_volunteers": self.healthy_volunteers,
                 "minimum_age": self.minimum_age,
                 "maximum_age": self.maximum_age,
                 "gender": self.gender
@@ -370,13 +370,13 @@ def tagging_insights_from_json(data_json):
     avg_min_age, avg_max_age, most_common_gender, common_keywords= analyze_data(result_dict)
-    stats_dict= {'Average Minimum age': avg_min_age,
-                 'Average Maximum age': avg_max_age,
-                 'Most common gender undergoing the trials': most_common_gender,
-                 'common keywords found in the trials': common_keywords}
     print(f"Result_tagging: {result_dict}")
-    return result_dict, stats_dict
 # clinical_record_info = get_clinical_records_by_ids(['NCT00841061', 'NCT03035123', 'NCT02272751', 'NCT03035123', 'NCT03055377'])

     Extract the desired information from the following JSON data.
+    Only extract the properties mentioned in the 'Classification' function. Output a list of the extracted properties, starting with [ and ending with ].
     JSON data:
     {input}
         # status: list = Field(
         #     description="Extract the status of all the clinical trials"
         # )
+        #keywords: list = Field(
+        #   description="Extract the most relevant keywords for each clinical trials"
+        #)
         # interventions: list = Field(
         #     description="describe the interventions for each clinical trial using title, name and description"
         # )
+        #primary_outcomes: list = Field(
+        #    description="get the timeframe of each clinical trial"
+        #)
+        #secondary_outcomes: list= Field(description= "get the secondary outcomes of each clinical trial")
+        #eligibility: list = Field(
+        #   description="get the timeframe of each clinical trial"
+        #)
+        # healthy_volunteers: list= Field(description= "determine whether the clinical trial requires healthy volunteers")
         minimum_age: list = Field(
            description="get the minimum age from each experiment"
         )
             return {
                 # "project_title": self.project_title,
                 # "status": self.status,
+                #"keywords": self.keywords,
                 # "interventions": self.interventions,
+                #"primary_outcomes": self.primary_outcomes,
+                #"secondary_outcomes": self.secondary_outcomes,
                 # "eligibility": self.eligibility,
+                # "healthy_volunteers": self.healthy_volunteers,
                 "minimum_age": self.minimum_age,
                 "maximum_age": self.maximum_age,
                 "gender": self.gender
     avg_min_age, avg_max_age, most_common_gender, common_keywords= analyze_data(result_dict)
+    #stats_dict= {'Average Minimum age': avg_min_age,
+    #             'Average Maximum age': avg_max_age,
+    #             'Most common gender undergoing the trials': most_common_gender,
+    #             'common keywords found in the trials': common_keywords}
     print(f"Result_tagging: {result_dict}")
+    return result_dict#, stats_dict
 # clinical_record_info = get_clinical_records_by_ids(['NCT00841061', 'NCT03035123', 'NCT02272751', 'NCT03035123', 'NCT03055377'])

utils.py CHANGED Viewed

@@ -18,15 +18,16 @@ engine = create_engine(CONNECTION_STRING)
 def get_all_diseases_name(engine) -> List[List[str]]:
     with engine.connect() as conn:
         with conn.begin():
             sql = f"""
-                    SELECT * FROM Test.EntityEmbeddings
                     """
             result = conn.execute(text(sql))
             data = result.fetchall()
-    all_diseases = [row[1] for row in data if row[1] != "nan"]
     return all_diseases

 def get_all_diseases_name(engine) -> List[List[str]]:
+    print("Fetching all disease names...")
     with engine.connect() as conn:
         with conn.begin():
             sql = f"""
+                    SELECT label FROM Test.EntityEmbeddings
                     """
             result = conn.execute(text(sql))
             data = result.fetchall()
+    all_diseases = [row[0] for row in data if row[0] != "nan"]
     return all_diseases