Spaces:

taufeeque
/

codebook-features

Running

App Files Files Community

taufeeque commited on Aug 25, 2023

Commit

835c841

1 Parent(s): 811a8b4

Update UI

Browse files

Files changed (3) hide show

.streamlit/config.toml +8 -0
Code_Browser.py +13 -7
utils.py +31 -30

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,8 @@

+[global]
+disableWidgetStateDuplicationWarning = true
+[theme]
+base="dark"
+primaryColor="DeepSkyBlue"
+backgroundColor="#121821"
+secondaryBackgroundColor="#303540"

Code_Browser.py CHANGED Viewed

@@ -25,10 +25,12 @@ dirs = glob.glob(base_cache_dir + "models/*/")
 model_name_options = [d.split("/")[-2].split("_")[:-2] for d in dirs]
 model_name_options = ["_".join(m) for m in model_name_options]
 model_name_options = sorted(set(model_name_options))
 model_name = st.selectbox(
     "Model",
     model_name_options,
     key=webapp_utils.persist("model_name"),
 )
@@ -87,10 +89,11 @@ st.session_state["ccb"] = ccb
 st.session_state["cb_at"] = cb_at
 st.session_state["is_attn"] = is_attn
-st.markdown("## Metrics")
-# hide metrics by default
-if st.checkbox("Show Model Metrics"):
-    st.write(metrics)
 st.markdown("## Demo Codes")
 demo_file_path = codes_cache_path + "demo_codes.txt"
@@ -122,7 +125,7 @@ if st.checkbox("Show Demo Codes"):
     if len(demo_codes) == 0:
         st.markdown(
             f"""
-            <div style="font-size: 1.3rem; color: red;">
             No demo codes found in file {demo_file_path}
             </div>
             """,
@@ -214,7 +217,10 @@ if regex_pattern:
         ccb,
         model_name,
     )
-    st.markdown(f"Found :green[{re_token_matches}] matches")
     num_search_cols = 7 if is_attn else 6
     non_deploy_offset = 0
     if not DEPLOY_MODE:
@@ -296,7 +302,7 @@ if regex_pattern:
 st.markdown("## Code Token Activations")
-filter_codes = st.checkbox("Filter Codes", key="filter_codes")
 act_range, layer_code_acts = None, None
 if filter_codes:
     act_range = st.slider(

 model_name_options = [d.split("/")[-2].split("_")[:-2] for d in dirs]
 model_name_options = ["_".join(m) for m in model_name_options]
 model_name_options = sorted(set(model_name_options))
+def_model_idx = ["attn" in m for m in model_name_options].index(True)
 model_name = st.selectbox(
     "Model",
     model_name_options,
+    index=def_model_idx,
     key=webapp_utils.persist("model_name"),
 )
 st.session_state["cb_at"] = cb_at
 st.session_state["is_attn"] = is_attn
+if not DEPLOY_MODE:
+    st.markdown("## Metrics")
+    # hide metrics by default
+    if st.checkbox("Show Model Metrics"):
+        st.write(metrics)
 st.markdown("## Demo Codes")
 demo_file_path = codes_cache_path + "demo_codes.txt"
     if len(demo_codes) == 0:
         st.markdown(
             f"""
+            <div style="font-size: 1.0rem; color: red;">
             No demo codes found in file {demo_file_path}
             </div>
             """,
         ccb,
         model_name,
     )
+    st.markdown(
+        f"Found <span style='color:green;'>{re_token_matches}</span> matches",
+        unsafe_allow_html=True,
+    )
     num_search_cols = 7 if is_attn else 6
     non_deploy_offset = 0
     if not DEPLOY_MODE:
 st.markdown("## Code Token Activations")
+filter_codes = st.checkbox("Show filters", key="filter_codes")
 act_range, layer_code_acts = None, None
 if filter_codes:
     act_range = st.slider(

utils.py CHANGED Viewed

@@ -152,39 +152,42 @@ def features_to_tokens(cb_key, cb_acts, num_codes, code=None):
     return features_tokens
-def color_str(s: str, color: str, html: bool):
     """Color the string for html or terminal."""
     if html:
         return f"<span style='color:{color}'>{s}</span>"
     else:
         return colored(s, color)
-def color_tokens_red_automata(tokens, red_idx, html=False):
-    """Separate states with a dash and color red the tokens in red_idx."""
     ret_string = ""
-    itr_over_red_idx = 0
     tokens_enumerate = enumerate(tokens)
     if tokens[0] == "<|endoftext|>":
         next(tokens_enumerate)
-        if red_idx[0] == 0:
-            itr_over_red_idx += 1
     for i, c in tokens_enumerate:
         if i % 2 == 1:
             ret_string += "-"
-        if itr_over_red_idx < len(red_idx) and i == red_idx[itr_over_red_idx]:
-            ret_string += color_str(c, "red", html)
-            itr_over_red_idx += 1
         else:
             ret_string += c
     return ret_string
-def color_tokens_red(tokens, red_idx, n=3, html=False):
-    """Color red the tokens in red_idx."""
     ret_string = ""
     last_colored_token_idx = -1
-    for i in red_idx:
         c_str = tokens[i]
         if i <= last_colored_token_idx + 2 * n + 1:
             ret_string += "".join(tokens[last_colored_token_idx + 1 : i])
@@ -194,7 +197,7 @@ def color_tokens_red(tokens, red_idx, n=3, html=False):
             )
             ret_string += " ... "
             ret_string += "".join(tokens[i - n : i])
-        ret_string += color_str(c_str, "red", html)
         last_colored_token_idx = i
     ret_string += "".join(
         tokens[
@@ -207,15 +210,15 @@ def color_tokens_red(tokens, red_idx, n=3, html=False):
 def prepare_example_print(
     example_id,
     example_tokens,
-    tokens_to_color_red,
     html,
-    color_red_fn=color_tokens_red,
 ):
     """Format example to print."""
-    example_output = color_str(example_id, "green", html)
     example_output += (
         ": "
-        + color_red_fn(example_tokens, tokens_to_color_red, html=html)
         + ("<br>" if html else "\n")
     )
     return example_output
@@ -238,10 +241,8 @@ def tkn_print(
     print_output = [] if return_example_list else ""
     curr_ex = ll[0][0]
     total_examples = 0
-    tokens_to_color_red = []
-    color_red_fn = (
-        color_tokens_red_automata if separate_states else partial(color_tokens_red, n=n)
-    )
     for idx in indices:
         if total_examples > max_examples:
             break
@@ -251,31 +252,31 @@ def tkn_print(
             curr_ex_output = prepare_example_print(
                 curr_ex,
                 tokens[curr_ex],
-                tokens_to_color_red,
                 html,
-                color_red_fn,
             )
             total_examples += 1
             if return_example_list:
-                print_output.append((curr_ex_output, len(tokens_to_color_red)))
             else:
                 print_output += curr_ex_output
             curr_ex = i
-            tokens_to_color_red = []
-        tokens_to_color_red.append(j)
     curr_ex_output = prepare_example_print(
         curr_ex,
         tokens[curr_ex],
-        tokens_to_color_red,
         html,
-        color_red_fn,
     )
     if return_example_list:
-        print_output.append((curr_ex_output, len(tokens_to_color_red)))
     else:
         print_output += curr_ex_output
         asterisk_str = "********************************************"
-        print_output += color_str(asterisk_str, "green", html)
     total_examples += 1
     return print_output

     return features_tokens
+def color_str(s: str, html: bool, color: Optional[str] = None):
     """Color the string for html or terminal."""
     if html:
+        color = "DeepSkyBlue" if color is None else color
         return f"<span style='color:{color}'>{s}</span>"
     else:
+        color = "light_cyan" if color is None else color
         return colored(s, color)
+def color_tokens_automata(tokens, color_idx, html=False):
+    """Separate states with a dash and color red the tokens in color_idx."""
     ret_string = ""
+    itr_over_color_idx = 0
     tokens_enumerate = enumerate(tokens)
     if tokens[0] == "<|endoftext|>":
         next(tokens_enumerate)
+        if color_idx[0] == 0:
+            itr_over_color_idx += 1
     for i, c in tokens_enumerate:
         if i % 2 == 1:
             ret_string += "-"
+        if itr_over_color_idx < len(color_idx) and i == color_idx[itr_over_color_idx]:
+            ret_string += color_str(c, html)
+            itr_over_color_idx += 1
         else:
             ret_string += c
     return ret_string
+def color_tokens(tokens, color_idx, n=3, html=False):
+    """Color the tokens in color_idx."""
     ret_string = ""
     last_colored_token_idx = -1
+    for i in color_idx:
         c_str = tokens[i]
         if i <= last_colored_token_idx + 2 * n + 1:
             ret_string += "".join(tokens[last_colored_token_idx + 1 : i])
             )
             ret_string += " ... "
             ret_string += "".join(tokens[i - n : i])
+        ret_string += color_str(c_str, html)
         last_colored_token_idx = i
     ret_string += "".join(
         tokens[
 def prepare_example_print(
     example_id,
     example_tokens,
+    tokens_to_color,
     html,
+    color_fn=color_tokens,
 ):
     """Format example to print."""
+    example_output = color_str(example_id, html, "green")
     example_output += (
         ": "
+        + color_fn(example_tokens, tokens_to_color, html=html)
         + ("<br>" if html else "\n")
     )
     return example_output
     print_output = [] if return_example_list else ""
     curr_ex = ll[0][0]
     total_examples = 0
+    tokens_to_color = []
+    color_fn = color_tokens_automata if separate_states else partial(color_tokens, n=n)
     for idx in indices:
         if total_examples > max_examples:
             break
             curr_ex_output = prepare_example_print(
                 curr_ex,
                 tokens[curr_ex],
+                tokens_to_color,
                 html,
+                color_fn,
             )
             total_examples += 1
             if return_example_list:
+                print_output.append((curr_ex_output, len(tokens_to_color)))
             else:
                 print_output += curr_ex_output
             curr_ex = i
+            tokens_to_color = []
+        tokens_to_color.append(j)
     curr_ex_output = prepare_example_print(
         curr_ex,
         tokens[curr_ex],
+        tokens_to_color,
         html,
+        color_fn,
     )
     if return_example_list:
+        print_output.append((curr_ex_output, len(tokens_to_color)))
     else:
         print_output += curr_ex_output
         asterisk_str = "********************************************"
+        print_output += color_str(asterisk_str, html, "green")
     total_examples += 1
     return print_output