Spaces:
Running
Running
Update UI
Browse files- .streamlit/config.toml +8 -0
- Code_Browser.py +13 -7
- utils.py +31 -30
.streamlit/config.toml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[global]
|
| 2 |
+
disableWidgetStateDuplicationWarning = true
|
| 3 |
+
|
| 4 |
+
[theme]
|
| 5 |
+
base="dark"
|
| 6 |
+
primaryColor="DeepSkyBlue"
|
| 7 |
+
backgroundColor="#121821"
|
| 8 |
+
secondaryBackgroundColor="#303540"
|
Code_Browser.py
CHANGED
|
@@ -25,10 +25,12 @@ dirs = glob.glob(base_cache_dir + "models/*/")
|
|
| 25 |
model_name_options = [d.split("/")[-2].split("_")[:-2] for d in dirs]
|
| 26 |
model_name_options = ["_".join(m) for m in model_name_options]
|
| 27 |
model_name_options = sorted(set(model_name_options))
|
|
|
|
| 28 |
|
| 29 |
model_name = st.selectbox(
|
| 30 |
"Model",
|
| 31 |
model_name_options,
|
|
|
|
| 32 |
key=webapp_utils.persist("model_name"),
|
| 33 |
)
|
| 34 |
|
|
@@ -87,10 +89,11 @@ st.session_state["ccb"] = ccb
|
|
| 87 |
st.session_state["cb_at"] = cb_at
|
| 88 |
st.session_state["is_attn"] = is_attn
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
st.
|
|
|
|
| 94 |
|
| 95 |
st.markdown("## Demo Codes")
|
| 96 |
demo_file_path = codes_cache_path + "demo_codes.txt"
|
|
@@ -122,7 +125,7 @@ if st.checkbox("Show Demo Codes"):
|
|
| 122 |
if len(demo_codes) == 0:
|
| 123 |
st.markdown(
|
| 124 |
f"""
|
| 125 |
-
<div style="font-size: 1.
|
| 126 |
No demo codes found in file {demo_file_path}
|
| 127 |
</div>
|
| 128 |
""",
|
|
@@ -214,7 +217,10 @@ if regex_pattern:
|
|
| 214 |
ccb,
|
| 215 |
model_name,
|
| 216 |
)
|
| 217 |
-
st.markdown(
|
|
|
|
|
|
|
|
|
|
| 218 |
num_search_cols = 7 if is_attn else 6
|
| 219 |
non_deploy_offset = 0
|
| 220 |
if not DEPLOY_MODE:
|
|
@@ -296,7 +302,7 @@ if regex_pattern:
|
|
| 296 |
|
| 297 |
st.markdown("## Code Token Activations")
|
| 298 |
|
| 299 |
-
filter_codes = st.checkbox("
|
| 300 |
act_range, layer_code_acts = None, None
|
| 301 |
if filter_codes:
|
| 302 |
act_range = st.slider(
|
|
|
|
| 25 |
model_name_options = [d.split("/")[-2].split("_")[:-2] for d in dirs]
|
| 26 |
model_name_options = ["_".join(m) for m in model_name_options]
|
| 27 |
model_name_options = sorted(set(model_name_options))
|
| 28 |
+
def_model_idx = ["attn" in m for m in model_name_options].index(True)
|
| 29 |
|
| 30 |
model_name = st.selectbox(
|
| 31 |
"Model",
|
| 32 |
model_name_options,
|
| 33 |
+
index=def_model_idx,
|
| 34 |
key=webapp_utils.persist("model_name"),
|
| 35 |
)
|
| 36 |
|
|
|
|
| 89 |
st.session_state["cb_at"] = cb_at
|
| 90 |
st.session_state["is_attn"] = is_attn
|
| 91 |
|
| 92 |
+
if not DEPLOY_MODE:
|
| 93 |
+
st.markdown("## Metrics")
|
| 94 |
+
# hide metrics by default
|
| 95 |
+
if st.checkbox("Show Model Metrics"):
|
| 96 |
+
st.write(metrics)
|
| 97 |
|
| 98 |
st.markdown("## Demo Codes")
|
| 99 |
demo_file_path = codes_cache_path + "demo_codes.txt"
|
|
|
|
| 125 |
if len(demo_codes) == 0:
|
| 126 |
st.markdown(
|
| 127 |
f"""
|
| 128 |
+
<div style="font-size: 1.0rem; color: red;">
|
| 129 |
No demo codes found in file {demo_file_path}
|
| 130 |
</div>
|
| 131 |
""",
|
|
|
|
| 217 |
ccb,
|
| 218 |
model_name,
|
| 219 |
)
|
| 220 |
+
st.markdown(
|
| 221 |
+
f"Found <span style='color:green;'>{re_token_matches}</span> matches",
|
| 222 |
+
unsafe_allow_html=True,
|
| 223 |
+
)
|
| 224 |
num_search_cols = 7 if is_attn else 6
|
| 225 |
non_deploy_offset = 0
|
| 226 |
if not DEPLOY_MODE:
|
|
|
|
| 302 |
|
| 303 |
st.markdown("## Code Token Activations")
|
| 304 |
|
| 305 |
+
filter_codes = st.checkbox("Show filters", key="filter_codes")
|
| 306 |
act_range, layer_code_acts = None, None
|
| 307 |
if filter_codes:
|
| 308 |
act_range = st.slider(
|
utils.py
CHANGED
|
@@ -152,39 +152,42 @@ def features_to_tokens(cb_key, cb_acts, num_codes, code=None):
|
|
| 152 |
return features_tokens
|
| 153 |
|
| 154 |
|
| 155 |
-
def color_str(s: str,
|
| 156 |
"""Color the string for html or terminal."""
|
|
|
|
| 157 |
if html:
|
|
|
|
| 158 |
return f"<span style='color:{color}'>{s}</span>"
|
| 159 |
else:
|
|
|
|
| 160 |
return colored(s, color)
|
| 161 |
|
| 162 |
|
| 163 |
-
def
|
| 164 |
-
"""Separate states with a dash and color red the tokens in
|
| 165 |
ret_string = ""
|
| 166 |
-
|
| 167 |
tokens_enumerate = enumerate(tokens)
|
| 168 |
if tokens[0] == "<|endoftext|>":
|
| 169 |
next(tokens_enumerate)
|
| 170 |
-
if
|
| 171 |
-
|
| 172 |
for i, c in tokens_enumerate:
|
| 173 |
if i % 2 == 1:
|
| 174 |
ret_string += "-"
|
| 175 |
-
if
|
| 176 |
-
ret_string += color_str(c,
|
| 177 |
-
|
| 178 |
else:
|
| 179 |
ret_string += c
|
| 180 |
return ret_string
|
| 181 |
|
| 182 |
|
| 183 |
-
def
|
| 184 |
-
"""Color
|
| 185 |
ret_string = ""
|
| 186 |
last_colored_token_idx = -1
|
| 187 |
-
for i in
|
| 188 |
c_str = tokens[i]
|
| 189 |
if i <= last_colored_token_idx + 2 * n + 1:
|
| 190 |
ret_string += "".join(tokens[last_colored_token_idx + 1 : i])
|
|
@@ -194,7 +197,7 @@ def color_tokens_red(tokens, red_idx, n=3, html=False):
|
|
| 194 |
)
|
| 195 |
ret_string += " ... "
|
| 196 |
ret_string += "".join(tokens[i - n : i])
|
| 197 |
-
ret_string += color_str(c_str,
|
| 198 |
last_colored_token_idx = i
|
| 199 |
ret_string += "".join(
|
| 200 |
tokens[
|
|
@@ -207,15 +210,15 @@ def color_tokens_red(tokens, red_idx, n=3, html=False):
|
|
| 207 |
def prepare_example_print(
|
| 208 |
example_id,
|
| 209 |
example_tokens,
|
| 210 |
-
|
| 211 |
html,
|
| 212 |
-
|
| 213 |
):
|
| 214 |
"""Format example to print."""
|
| 215 |
-
example_output = color_str(example_id, "green"
|
| 216 |
example_output += (
|
| 217 |
": "
|
| 218 |
-
+
|
| 219 |
+ ("<br>" if html else "\n")
|
| 220 |
)
|
| 221 |
return example_output
|
|
@@ -238,10 +241,8 @@ def tkn_print(
|
|
| 238 |
print_output = [] if return_example_list else ""
|
| 239 |
curr_ex = ll[0][0]
|
| 240 |
total_examples = 0
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
color_tokens_red_automata if separate_states else partial(color_tokens_red, n=n)
|
| 244 |
-
)
|
| 245 |
for idx in indices:
|
| 246 |
if total_examples > max_examples:
|
| 247 |
break
|
|
@@ -251,31 +252,31 @@ def tkn_print(
|
|
| 251 |
curr_ex_output = prepare_example_print(
|
| 252 |
curr_ex,
|
| 253 |
tokens[curr_ex],
|
| 254 |
-
|
| 255 |
html,
|
| 256 |
-
|
| 257 |
)
|
| 258 |
total_examples += 1
|
| 259 |
if return_example_list:
|
| 260 |
-
print_output.append((curr_ex_output, len(
|
| 261 |
else:
|
| 262 |
print_output += curr_ex_output
|
| 263 |
curr_ex = i
|
| 264 |
-
|
| 265 |
-
|
| 266 |
curr_ex_output = prepare_example_print(
|
| 267 |
curr_ex,
|
| 268 |
tokens[curr_ex],
|
| 269 |
-
|
| 270 |
html,
|
| 271 |
-
|
| 272 |
)
|
| 273 |
if return_example_list:
|
| 274 |
-
print_output.append((curr_ex_output, len(
|
| 275 |
else:
|
| 276 |
print_output += curr_ex_output
|
| 277 |
asterisk_str = "********************************************"
|
| 278 |
-
print_output += color_str(asterisk_str, "green"
|
| 279 |
total_examples += 1
|
| 280 |
|
| 281 |
return print_output
|
|
|
|
| 152 |
return features_tokens
|
| 153 |
|
| 154 |
|
| 155 |
+
def color_str(s: str, html: bool, color: Optional[str] = None):
|
| 156 |
"""Color the string for html or terminal."""
|
| 157 |
+
|
| 158 |
if html:
|
| 159 |
+
color = "DeepSkyBlue" if color is None else color
|
| 160 |
return f"<span style='color:{color}'>{s}</span>"
|
| 161 |
else:
|
| 162 |
+
color = "light_cyan" if color is None else color
|
| 163 |
return colored(s, color)
|
| 164 |
|
| 165 |
|
| 166 |
+
def color_tokens_automata(tokens, color_idx, html=False):
|
| 167 |
+
"""Separate states with a dash and color red the tokens in color_idx."""
|
| 168 |
ret_string = ""
|
| 169 |
+
itr_over_color_idx = 0
|
| 170 |
tokens_enumerate = enumerate(tokens)
|
| 171 |
if tokens[0] == "<|endoftext|>":
|
| 172 |
next(tokens_enumerate)
|
| 173 |
+
if color_idx[0] == 0:
|
| 174 |
+
itr_over_color_idx += 1
|
| 175 |
for i, c in tokens_enumerate:
|
| 176 |
if i % 2 == 1:
|
| 177 |
ret_string += "-"
|
| 178 |
+
if itr_over_color_idx < len(color_idx) and i == color_idx[itr_over_color_idx]:
|
| 179 |
+
ret_string += color_str(c, html)
|
| 180 |
+
itr_over_color_idx += 1
|
| 181 |
else:
|
| 182 |
ret_string += c
|
| 183 |
return ret_string
|
| 184 |
|
| 185 |
|
| 186 |
+
def color_tokens(tokens, color_idx, n=3, html=False):
|
| 187 |
+
"""Color the tokens in color_idx."""
|
| 188 |
ret_string = ""
|
| 189 |
last_colored_token_idx = -1
|
| 190 |
+
for i in color_idx:
|
| 191 |
c_str = tokens[i]
|
| 192 |
if i <= last_colored_token_idx + 2 * n + 1:
|
| 193 |
ret_string += "".join(tokens[last_colored_token_idx + 1 : i])
|
|
|
|
| 197 |
)
|
| 198 |
ret_string += " ... "
|
| 199 |
ret_string += "".join(tokens[i - n : i])
|
| 200 |
+
ret_string += color_str(c_str, html)
|
| 201 |
last_colored_token_idx = i
|
| 202 |
ret_string += "".join(
|
| 203 |
tokens[
|
|
|
|
| 210 |
def prepare_example_print(
|
| 211 |
example_id,
|
| 212 |
example_tokens,
|
| 213 |
+
tokens_to_color,
|
| 214 |
html,
|
| 215 |
+
color_fn=color_tokens,
|
| 216 |
):
|
| 217 |
"""Format example to print."""
|
| 218 |
+
example_output = color_str(example_id, html, "green")
|
| 219 |
example_output += (
|
| 220 |
": "
|
| 221 |
+
+ color_fn(example_tokens, tokens_to_color, html=html)
|
| 222 |
+ ("<br>" if html else "\n")
|
| 223 |
)
|
| 224 |
return example_output
|
|
|
|
| 241 |
print_output = [] if return_example_list else ""
|
| 242 |
curr_ex = ll[0][0]
|
| 243 |
total_examples = 0
|
| 244 |
+
tokens_to_color = []
|
| 245 |
+
color_fn = color_tokens_automata if separate_states else partial(color_tokens, n=n)
|
|
|
|
|
|
|
| 246 |
for idx in indices:
|
| 247 |
if total_examples > max_examples:
|
| 248 |
break
|
|
|
|
| 252 |
curr_ex_output = prepare_example_print(
|
| 253 |
curr_ex,
|
| 254 |
tokens[curr_ex],
|
| 255 |
+
tokens_to_color,
|
| 256 |
html,
|
| 257 |
+
color_fn,
|
| 258 |
)
|
| 259 |
total_examples += 1
|
| 260 |
if return_example_list:
|
| 261 |
+
print_output.append((curr_ex_output, len(tokens_to_color)))
|
| 262 |
else:
|
| 263 |
print_output += curr_ex_output
|
| 264 |
curr_ex = i
|
| 265 |
+
tokens_to_color = []
|
| 266 |
+
tokens_to_color.append(j)
|
| 267 |
curr_ex_output = prepare_example_print(
|
| 268 |
curr_ex,
|
| 269 |
tokens[curr_ex],
|
| 270 |
+
tokens_to_color,
|
| 271 |
html,
|
| 272 |
+
color_fn,
|
| 273 |
)
|
| 274 |
if return_example_list:
|
| 275 |
+
print_output.append((curr_ex_output, len(tokens_to_color)))
|
| 276 |
else:
|
| 277 |
print_output += curr_ex_output
|
| 278 |
asterisk_str = "********************************************"
|
| 279 |
+
print_output += color_str(asterisk_str, html, "green")
|
| 280 |
total_examples += 1
|
| 281 |
|
| 282 |
return print_output
|