Spaces:

CISCai
/

gguf-editor

Running

App Files Files Community

CISCai commited on Apr 19

Commit

c5aea59

verified ·

1 Parent(s): 0f1d531

Implement running locally with local GGUF files

Browse files

Also added architecture related metadata defaults

Files changed (4) hide show

README.md +1 -1
_hf_gguf.py +76 -1
app.py +124 -20
requirements.txt +2 -2

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🏢
 colorFrom: blue
 colorTo: purple
 sdk: gradio
-sdk_version: 5.5.0
 python_version: 3.11
 app_file: app.py
 pinned: false

 colorFrom: blue
 colorTo: purple
 sdk: gradio
+sdk_version: 5.25.2
 python_version: 3.11
 app_file: app.py
 pinned: false

_hf_gguf.py CHANGED Viewed

@@ -71,6 +71,8 @@ class GGUFValueType(IntEnum):
 standard_metadata = {
     "general.type": (GGUFValueType.STRING, "model"),
     "general.architecture": (GGUFValueType.STRING, "llama"),
     "general.quantization_version": (GGUFValueType.UINT32, 2),
@@ -141,6 +143,79 @@ standard_metadata = {
 }
 deprecated_metadata = {
     "tokenizer.ggml.prefix_token_id",
     "tokenizer.ggml.suffix_token_id",
@@ -374,7 +449,7 @@ class HuggingGGUFstream:
             if (alignment := self.metadata.get('general.alignment')) is not None:
                 self.alignment = alignment.value
-            self.metaend = self.fp.loc
             self.offset = self.metaend % self.alignment
     def adjust_padding(

 standard_metadata = {
+    "adapter.type": (GGUFValueType.STRING, "lora"),
+    "adapter.lora.alpha": (GGUFValueType.FLOAT32, 16),
     "general.type": (GGUFValueType.STRING, "model"),
     "general.architecture": (GGUFValueType.STRING, "llama"),
     "general.quantization_version": (GGUFValueType.UINT32, 2),
 }
+standard_metadata_architecture = {
+    "{arch}.vocab_size": (GGUFValueType.UINT32, 0),
+    "{arch}.context_length": (GGUFValueType.UINT32, 0),
+    "{arch}.embedding_length": (GGUFValueType.UINT32, 0),
+    "{arch}.features_length": (GGUFValueType.UINT32, 0),
+    "{arch}.block_count": (GGUFValueType.UINT32, 0),
+    "{arch}.leading_dense_block_count": (GGUFValueType.UINT32, 0),
+    "{arch}.feed_forward_length": (GGUFValueType.UINT32, 0),
+    "{arch}.expert_feed_forward_length": (GGUFValueType.UINT32, 0),
+    "{arch}.expert_shared_feed_forward_length": (GGUFValueType.UINT32, 0),
+    "{arch}.use_parallel_residual": (GGUFValueType.BOOL, False),
+    "{arch}.tensor_data_layout": (GGUFValueType.STRING, "Meta AI original pth"),
+    "{arch}.expert_count": (GGUFValueType.UINT32, 0),
+    "{arch}.expert_used_count": (GGUFValueType.UINT32, 0),
+    "{arch}.expert_shared_count": (GGUFValueType.UINT32, 0),
+    "{arch}.expert_weights_scale": (GGUFValueType.FLOAT32, 1),
+    "{arch}.expert_weights_norm": (GGUFValueType.BOOL, False),
+    "{arch}.expert_gating_func": (GGUFValueType.UINT32, 1),
+    "{arch}.pooling_type": (GGUFValueType.UINT32, 0),
+    "{arch}.logit_scale": (GGUFValueType.FLOAT32, 1),
+    "{arch}.decoder_start_token_id": (GGUFValueType.UINT32, 0),
+    "{arch}.attn_logit_softcapping": (GGUFValueType.FLOAT32, 0),
+    "{arch}.final_logit_softcapping": (GGUFValueType.FLOAT32, 0),
+    "{arch}.swin_norm": (GGUFValueType.BOOL, False),
+    "{arch}.rescale_every_n_layers": (GGUFValueType.UINT32, 0),
+    "{arch}.time_mix_extra_dim": (GGUFValueType.UINT32, 0),
+    "{arch}.time_decay_extra_dim": (GGUFValueType.UINT32, 0),
+    "{arch}.residual_scale": (GGUFValueType.FLOAT32, 1),
+    "{arch}.embedding_scale": (GGUFValueType.FLOAT32, 1),
+    "{arch}.token_shift_count": (GGUFValueType.UINT32, 0),
+    "{arch}.interleave_moe_layer_step": (GGUFValueType.UINT32, 0),
+    "{arch}.attention.head_count": (GGUFValueType.UINT32, 0),
+    "{arch}.attention.head_count_kv": (GGUFValueType.UINT32, 0),
+    "{arch}.attention.max_alibi_bias": (GGUFValueType.FLOAT32, 0),
+    "{arch}.attention.clamp_kqv": (GGUFValueType.FLOAT32, 0),
+    "{arch}.attention.key_length": (GGUFValueType.UINT32, 0),
+    "{arch}.attention.value_length": (GGUFValueType.UINT32, 0),
+    "{arch}.attention.layer_norm_epsilon": (GGUFValueType.FLOAT32, 0),
+    "{arch}.attention.layer_norm_rms_epsilon": (GGUFValueType.FLOAT32, 0),
+    "{arch}.attention.group_norm_epsilon": (GGUFValueType.FLOAT32, 0),
+    "{arch}.attention.group_norm_groups": (GGUFValueType.UINT32, 0),
+    "{arch}.attention.causal": (GGUFValueType.BOOL, False),
+    "{arch}.attention.q_lora_rank": (GGUFValueType.UINT32, 0),
+    "{arch}.attention.kv_lora_rank": (GGUFValueType.UINT32, 0),
+    "{arch}.attention.decay_lora_rank": (GGUFValueType.UINT32, 0),
+    "{arch}.attention.iclr_lora_rank": (GGUFValueType.UINT32, 0),
+    "{arch}.attention.value_residual_mix_lora_rank": (GGUFValueType.UINT32, 0),
+    "{arch}.attention.gate_lora_rank": (GGUFValueType.UINT32, 0),
+    "{arch}.attention.relative_buckets_count": (GGUFValueType.UINT32, 0),
+    "{arch}.attention.sliding_window": (GGUFValueType.UINT32, 0),
+    "{arch}.attention.scale": (GGUFValueType.FLOAT32, 1),
+    "{arch}.rope.dimension_count": (GGUFValueType.UINT32, 0),
+    "{arch}.rope.dimension_sections": (GGUFValueType.UINT32, []),
+    "{arch}.rope.freq_base": (GGUFValueType.FLOAT32, 0),
+    "{arch}.rope.scaling.type": (GGUFValueType.STRING, "none"),
+    "{arch}.rope.scaling.factor": (GGUFValueType.FLOAT32, 1),
+    "{arch}.rope.scaling.attn_factor": (GGUFValueType.FLOAT32, 1),
+    "{arch}.rope.scaling.original_context_length": (GGUFValueType.UINT32, 0),
+    "{arch}.rope.scaling.finetuned": (GGUFValueType.BOOL, False),
+    "{arch}.rope.scaling.yarn_log_multiplier": (GGUFValueType.FLOAT32, 1),
+    "{arch}.ssm.conv_kernel": (GGUFValueType.UINT32, 0),
+    "{arch}.ssm.inner_size": (GGUFValueType.UINT32, 0),
+    "{arch}.ssm.state_size": (GGUFValueType.UINT32, 0),
+    "{arch}.ssm.time_step_rank": (GGUFValueType.UINT32, 0),
+    "{arch}.ssm.dt_b_c_rms": (GGUFValueType.BOOL, False),
+    "{arch}.wkv.head_size": (GGUFValueType.UINT32, 0),
+    "{arch}.posnet.embedding_length": (GGUFValueType.UINT32, 0),
+    "{arch}.posnet.block_count": (GGUFValueType.UINT32, 0),
+    "{arch}.convnext.embedding_length": (GGUFValueType.UINT32, 0),
+    "{arch}.convnext.block_count": (GGUFValueType.UINT32, 0),
+}
 deprecated_metadata = {
     "tokenizer.ggml.prefix_token_id",
     "tokenizer.ggml.suffix_token_id",
             if (alignment := self.metadata.get('general.alignment')) is not None:
                 self.alignment = alignment.value
+            self.metaend = self.fp.loc if hasattr(self.fp, 'loc') else self.fp.tell()
             self.offset = self.metaend % self.alignment
     def adjust_padding(

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import gradio as gr
 import json
 import posixpath
 from fastapi import HTTPException, Path, Query, Request
 from fastapi.responses import StreamingResponse
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
@@ -8,8 +10,15 @@ from huggingface_hub import HfApi, HfFileSystem, auth_check
 from typing import Annotated, Any, NamedTuple
 from urllib.parse import urlencode
-from _hf_explorer import FileExplorer
-from _hf_gguf import standard_metadata, deprecated_metadata, TokenType, LlamaFileType, GGUFValueType, HuggingGGUFstream
 hfapi = HfApi()
@@ -78,21 +87,27 @@ with gr.Blocks(
                 search_type = "model",
                 sumbit_on_select = True,
                 scale = 2,
             )
             hf_branch = gr.Dropdown(
                 None,
                 label = "Branch",
                 scale = 1,
             )
             gr.LoginButton(
                 "Sign in to access gated/private repos",
                 scale = 1,
             )
         hf_file = FileExplorer(
-            visible=False,
         )
         with gr.Row():
@@ -203,6 +218,8 @@ with gr.Blocks(
             column_widths = ["35%", "15%", "50%"],
             wrap = True,
             interactive = False,
             visible = False,
         )
@@ -212,6 +229,46 @@ with gr.Blocks(
 An advanced GGUF editor, reading GGUF files directly from Hugging Face repositories and applying changes to your own copies.
 Below you will find a collection of example use-cases to show you how to perform a few common GGUF editing operations:
             """,
         )
@@ -678,7 +735,7 @@ Any framework based on `llama-cpp-python` will let you select which chat templat
             hf_file: FileExplorer(
                 "**/*.gguf",
                 file_count = "single",
-                root_dir = repo,
                 branch = branch,
                 token = oauth_token.token if oauth_token else False,
                 visible = True,
@@ -815,9 +872,12 @@ Any framework based on `llama-cpp-python` will let you select which chat templat
         if not repo_file:
             return
-        fs = HfFileSystem(
-            token = oauth_token.token if oauth_token else None,
-        )
         try:
             progress(0, desc = 'Loading file...')
@@ -829,11 +889,18 @@ Any framework based on `llama-cpp-python` will let you select which chat templat
                 cache_type = "readahead",
             ) as fp:
                 progress(0, desc = 'Reading header...')
                 gguf = HuggingGGUFstream(fp)
                 num_metadata = gguf.header['metadata'].value
                 metadata = gguf.read_metadata()
-                meta.var['repo_file'] = repo_file
                 meta.var['branch'] = branch
                 for k, v in progress.tqdm(metadata, desc = 'Reading metadata...', total = num_metadata, unit = f' of {num_metadata} metadata keys...'):
@@ -861,10 +928,15 @@ Any framework based on `llama-cpp-python` will let you select which chat templat
             )
             return
         yield {
             meta_state: meta,
             meta_keys: gr.Dropdown(
-                sorted(meta.key.keys() | standard_metadata.keys()),
                 value = '',
                 visible = True,
             ),
@@ -892,8 +964,12 @@ Any framework based on `llama-cpp-python` will let you select which chat templat
         meta: MetadataState,
         key: str | None,
     ):
         typ = None
-        if (val := meta.key.get(key, standard_metadata.get(key))) is not None:
             typ = GGUFValueType(val[0]).name
         elif key:
             if key.startswith('tokenizer.chat_template.'):
@@ -944,7 +1020,11 @@ Any framework based on `llama-cpp-python` will let you select which chat templat
         val = None
         tokens = meta.key.get('tokenizer.ggml.tokens', (-1, []))[1]
-        if (data := meta.key.get(key, standard_metadata.get(key))) is not None:
             typ = data[0]
             val = data[1]
         elif not key:
@@ -1048,9 +1128,17 @@ Any framework based on `llama-cpp-python` will let you select which chat templat
         for k, v in meta.key.items():
             m.append([*human_readable_metadata(meta, k, v[0], v[1])])
-        link = str(request.request.url_for('download', repo_file = meta.var['repo_file']).include_query_params(branch = meta.var['branch'], session = request.session_hash, state = str(meta_state._id)))
-        if link.startswith('http:'):
-            link = 'https' + link[4:]
         permalink = None
         if meta.rem or meta.add:
@@ -1063,9 +1151,13 @@ Any framework based on `llama-cpp-python` will let you select which chat templat
                 safe = '[]{}:"\',',
             )
-            if len(permalink) > 8192:
                 permalink = None
         return {
             meta_state: meta,
             meta_changes: gr.HighlightedText(
@@ -1076,7 +1168,7 @@ Any framework based on `llama-cpp-python` will let you select which chat templat
                 m,
             ),
             meta_keys: gr.Dropdown(
-                sorted(meta.key.keys() | standard_metadata.keys()),
                 value = '',
             ),
             meta_permalink: gr.Markdown(
@@ -1409,9 +1501,12 @@ def stream_repo_file(
     rem_meta: list[str] | None,
     token: str | None = None,
 ):
-    fs = HfFileSystem(
-        token = token,
-    )
     with fs.open(
         repo_file,
@@ -1426,6 +1521,12 @@ def stream_repo_file(
         if not add_meta:
             add_meta = []
         gguf = HuggingGGUFstream(fp)
         for _ in gguf.read_metadata():
             pass
@@ -1489,12 +1590,15 @@ if __name__ == "__main__":
     ):
         token = request.session.get('oauth_info', {}).get('access_token')
-        if posixpath.normpath(repo_file) != repo_file or '\\' in repo_file or repo_file.startswith('../') or repo_file.startswith('/') or repo_file.count('/') < 2:
             raise HTTPException(
                 status_code = 404,
                 detail = 'Invalid repository',
             )
         if session and state is not None and session in request.app.state_holder and state in request.app.state_holder[session]:
             meta: MetadataState = request.app.state_holder[session][state]

 import gradio as gr
 import json
+import os
 import posixpath
+import sys
 from fastapi import HTTPException, Path, Query, Request
 from fastapi.responses import StreamingResponse
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from typing import Annotated, Any, NamedTuple
 from urllib.parse import urlencode
+local_folder = None
+if len(sys.argv) == 2:
+    from fsspec.implementations.local import LocalFileSystem
+    from gradio import FileExplorer
+    local_folder = posixpath.normpath(os.path.abspath(os.path.expanduser(sys.argv[1])).replace('\\', '/'))
+else:
+    from _hf_explorer import FileExplorer
+from _hf_gguf import standard_metadata, standard_metadata_architecture, deprecated_metadata, TokenType, LlamaFileType, GGUFValueType, HuggingGGUFstream
 hfapi = HfApi()
                 search_type = "model",
                 sumbit_on_select = True,
                 scale = 2,
+                visible = not local_folder,
             )
             hf_branch = gr.Dropdown(
                 None,
                 label = "Branch",
                 scale = 1,
+                visible = not local_folder,
             )
             gr.LoginButton(
                 "Sign in to access gated/private repos",
                 scale = 1,
+                visible = not local_folder,
             )
         hf_file = FileExplorer(
+            "**/*.gguf",
+            file_count = "single",
+            root_dir = local_folder,
+            visible = bool(local_folder),
         )
         with gr.Row():
             column_widths = ["35%", "15%", "50%"],
             wrap = True,
             interactive = False,
+            show_search = "filter",
+            show_copy_button = True,
             visible = False,
         )
 An advanced GGUF editor, reading GGUF files directly from Hugging Face repositories and applying changes to your own copies.
+<details>
+<summary>Running the editor locally</summary>
+* Clone the space
+  ```bash
+  git clone https://huggingface.co/spaces/CISCai/gguf-editor
+  cd gguf-editor
+  ```
+* Create a virtual environment
+  ```bash
+  python3 -m venv .venv
+  ```
+* Install dependencies
+  ```
+  .venv/bin/pip install -r requirements.txt
+  ```
+* Gradio requires a HF_TOKEN for the LoginButton/OAuth, so log in to Hugging Face
+  ```
+  .venv/bin/huggingface-cli login
+  ```
+* Start the server
+  ```
+  .venv/bin/python app.py
+  ```
+* Browse to http://localhost:7860
+</details>
+<details>
+<summary>Editing local GGUF files</summary>
+* Set up the application locally (see above)
+* Start the server with additional parameter
+  ```
+  .venv/bin/python app.py "path/to/gguf/folder"
+  ```
+* Browse to http://localhost:7860
+</details>
 Below you will find a collection of example use-cases to show you how to perform a few common GGUF editing operations:
             """,
         )
             hf_file: FileExplorer(
                 "**/*.gguf",
                 file_count = "single",
+                root_dir = local_folder or repo,
                 branch = branch,
                 token = oauth_token.token if oauth_token else False,
                 visible = True,
         if not repo_file:
             return
+        if local_folder:
+            fs = LocalFileSystem()
+        else:
+            fs = HfFileSystem(
+                token = oauth_token.token if oauth_token else None,
+            )
         try:
             progress(0, desc = 'Loading file...')
                 cache_type = "readahead",
             ) as fp:
                 progress(0, desc = 'Reading header...')
+                if local_folder:
+                    fp.details = fs.ls(
+                        repo_file,
+                        detail = True,
+                    )[0]
                 gguf = HuggingGGUFstream(fp)
                 num_metadata = gguf.header['metadata'].value
                 metadata = gguf.read_metadata()
+                meta.var['repo_file'] = repo_file[len(local_folder) + 1:] if local_folder else repo_file
                 meta.var['branch'] = branch
                 for k, v in progress.tqdm(metadata, desc = 'Reading metadata...', total = num_metadata, unit = f' of {num_metadata} metadata keys...'):
             )
             return
+        model_architecture = meta.key.get('general.architecture', (None, None))[1]
+        standard_metakeys = set(standard_metadata.keys())
+        if model_architecture:
+            standard_metakeys |= set(k.format(arch = model_architecture) for k in standard_metadata_architecture.keys())
         yield {
             meta_state: meta,
             meta_keys: gr.Dropdown(
+                sorted(meta.key.keys() | standard_metakeys),
                 value = '',
                 visible = True,
             ),
         meta: MetadataState,
         key: str | None,
     ):
+        default_metadata = None
+        if model_architecture := meta.key.get('general.architecture', (None, None))[1]:
+            default_metadata = standard_metadata_architecture.get(key.replace(model_architecture + '.', '{arch}.'))
         typ = None
+        if (val := meta.key.get(key, standard_metadata.get(key, default_metadata))) is not None:
             typ = GGUFValueType(val[0]).name
         elif key:
             if key.startswith('tokenizer.chat_template.'):
         val = None
         tokens = meta.key.get('tokenizer.ggml.tokens', (-1, []))[1]
+        default_metadata = None
+        if model_architecture := meta.key.get('general.architecture', (None, None))[1]:
+            default_metadata = standard_metadata_architecture.get(key.replace(model_architecture + '.', '{arch}.'))
+        if (data := meta.key.get(key, standard_metadata.get(key, default_metadata))) is not None:
             typ = data[0]
             val = data[1]
         elif not key:
         for k, v in meta.key.items():
             m.append([*human_readable_metadata(meta, k, v[0], v[1])])
+        query_params = {
+            'branch': meta.var['branch'],
+            'session': request.session_hash,
+            'state': str(meta_state._id),
+        }
+        if not query_params['branch'] or query_params['branch'] == 'main':
+            del query_params['branch']
+        link = request.request.url_for('download', repo_file = meta.var['repo_file']).include_query_params(**query_params)
+        link = 'https' + str(link)[4:] if link.hostname not in ('localhost', '127.0.0.1', '::1') and link.scheme == 'http' else str(link)
         permalink = None
         if meta.rem or meta.add:
                 safe = '[]{}:"\',',
             )
+            if len(permalink) > 8192 or local_folder:
                 permalink = None
+        standard_metakeys = set(standard_metadata.keys())
+        if model_architecture := meta.key.get('general.architecture', (None, None))[1]:
+            standard_metakeys |= set(k.format(arch = model_architecture) for k in standard_metadata_architecture.keys())
         return {
             meta_state: meta,
             meta_changes: gr.HighlightedText(
                 m,
             ),
             meta_keys: gr.Dropdown(
+                sorted(meta.key.keys() | standard_metakeys),
                 value = '',
             ),
             meta_permalink: gr.Markdown(
     rem_meta: list[str] | None,
     token: str | None = None,
 ):
+    if local_folder:
+        fs = LocalFileSystem()
+    else:
+        fs = HfFileSystem(
+            token = token,
+        )
     with fs.open(
         repo_file,
         if not add_meta:
             add_meta = []
+        if local_folder:
+            fp.details = fs.ls(
+                repo_file,
+                detail = True,
+            )[0]
         gguf = HuggingGGUFstream(fp)
         for _ in gguf.read_metadata():
             pass
     ):
         token = request.session.get('oauth_info', {}).get('access_token')
+        if posixpath.normpath(repo_file) != repo_file or '\\' in repo_file or ':' in repo_file or repo_file.startswith('../') or repo_file.startswith('/') or (repo_file.count('/') < 2 and not local_folder):
             raise HTTPException(
                 status_code = 404,
                 detail = 'Invalid repository',
             )
+        if local_folder:
+            repo_file = os.path.join(local_folder, repo_file)
         if session and state is not None and session in request.app.state_holder and state in request.app.state_holder[session]:
             meta: MetadataState = request.app.state_holder[session][state]

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-# gradio[oauth]==5.5.0
-huggingface_hub==0.26.2
 # gradio_huggingfacehub_search==0.0.8
 https://huggingface.co/spaces/CISCai/chat-template-editor/resolve/main/gradio_huggingfacehub_search-0.0.8-py3-none-any.whl

+gradio[oauth]==5.25.2
+huggingface_hub==0.30.2
 # gradio_huggingfacehub_search==0.0.8
 https://huggingface.co/spaces/CISCai/chat-template-editor/resolve/main/gradio_huggingfacehub_search-0.0.8-py3-none-any.whl