Commit
·
141e879
1
Parent(s):
a854397
add custom handler
Browse files- README.md +145 -72
- optimize_model.ipynb +46 -4
README.md
CHANGED
|
@@ -7,20 +7,21 @@ tags:
|
|
| 7 |
library_name: generic
|
| 8 |
---
|
| 9 |
|
| 10 |
-
# Optimized and Quantized [
|
| 11 |
|
| 12 |
|
| 13 |
-
This repository implements a `custom`
|
| 14 |
|
| 15 |
-
Below is also describe how we converted & optimized the model, based on the [Accelerate
|
| 16 |
-
|
| 17 |
-
To use deploy this model a an Inference Endpoint you have to select `Custom` as task to use the `pipeline.py` file. -> _double check if it is selected_
|
| 18 |
|
| 19 |
### expected Request payload
|
| 20 |
|
| 21 |
```json
|
| 22 |
{
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
| 24 |
}
|
| 25 |
```
|
| 26 |
|
|
@@ -38,9 +39,8 @@ ENDPOINT_URL = ""
|
|
| 38 |
HF_TOKEN = ""
|
| 39 |
|
| 40 |
|
| 41 |
-
def predict(
|
| 42 |
-
|
| 43 |
-
payload = {"inputs": document_string}
|
| 44 |
response = r.post(
|
| 45 |
ENDPOINT_URL, headers={"Authorization": f"Bearer {HF_TOKEN}"}, json=payload
|
| 46 |
)
|
|
@@ -48,65 +48,114 @@ def predict(document_string:str=None):
|
|
| 48 |
|
| 49 |
|
| 50 |
prediction = predict(
|
| 51 |
-
|
|
|
|
| 52 |
)
|
| 53 |
```
|
| 54 |
|
| 55 |
expected output
|
| 56 |
|
| 57 |
```python
|
| 58 |
-
{
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
-0.013600599952042103,
|
| 64 |
-
...
|
| 65 |
}
|
| 66 |
```
|
| 67 |
|
| 68 |
|
| 69 |
|
| 70 |
-
|
| 71 |
|
| 72 |
Steps:
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
| 76 |
|
| 77 |
Helpful links:
|
| 78 |
-
* [Accelerate
|
|
|
|
|
|
|
| 79 |
* [Create Custom Handler Endpoints](https://link-to-docs)
|
| 80 |
|
| 81 |
## Setup & Installation
|
| 82 |
|
|
|
|
| 83 |
```python
|
| 84 |
%%writefile requirements.txt
|
| 85 |
-
optimum[onnxruntime]==1.
|
| 86 |
mkl-include
|
| 87 |
mkl
|
| 88 |
```
|
| 89 |
|
| 90 |
-
install requirements
|
| 91 |
|
| 92 |
```python
|
| 93 |
!pip install -r requirements.txt
|
| 94 |
```
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
## 1. Convert model to ONNX
|
| 97 |
|
| 98 |
|
| 99 |
```python
|
| 100 |
-
from optimum.onnxruntime import
|
| 101 |
from transformers import AutoTokenizer
|
| 102 |
from pathlib import Path
|
| 103 |
|
| 104 |
|
| 105 |
-
model_id="
|
| 106 |
onnx_path = Path(".")
|
| 107 |
|
| 108 |
# load vanilla transformers and convert to onnx
|
| 109 |
-
model =
|
| 110 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 111 |
|
| 112 |
# save onnx checkpoint and tokenizer
|
|
@@ -122,55 +171,48 @@ tokenizer.save_pretrained(onnx_path)
|
|
| 122 |
from optimum.onnxruntime import ORTOptimizer, ORTQuantizer
|
| 123 |
from optimum.onnxruntime.configuration import OptimizationConfig, AutoQuantizationConfig
|
| 124 |
|
| 125 |
-
#
|
| 126 |
-
optimizer = ORTOptimizer.from_pretrained(
|
|
|
|
|
|
|
| 127 |
optimization_config = OptimizationConfig(optimization_level=99) # enable all optimizations
|
| 128 |
|
| 129 |
-
#
|
| 130 |
-
optimizer.
|
| 131 |
-
|
| 132 |
-
onnx_optimized_model_output_path=onnx_path / "model-optimized.onnx",
|
| 133 |
-
optimization_config=optimization_config,
|
| 134 |
-
)
|
| 135 |
|
| 136 |
|
|
|
|
| 137 |
# create ORTQuantizer and define quantization configuration
|
| 138 |
-
dynamic_quantizer = ORTQuantizer.from_pretrained(
|
| 139 |
dqconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=False)
|
| 140 |
|
| 141 |
# apply the quantization configuration to the model
|
| 142 |
-
model_quantized_path = dynamic_quantizer.
|
| 143 |
-
|
| 144 |
-
onnx_quantized_model_output_path=onnx_path / "model-quantized.onnx",
|
| 145 |
quantization_config=dqconfig,
|
| 146 |
)
|
| 147 |
|
| 148 |
-
|
| 149 |
```
|
| 150 |
|
| 151 |
## 3. Create Custom Handler for Inference Endpoints
|
| 152 |
|
| 153 |
|
|
|
|
| 154 |
```python
|
| 155 |
-
%%writefile
|
| 156 |
from typing import Dict, List, Any
|
| 157 |
-
from optimum.onnxruntime import
|
| 158 |
-
from transformers import AutoTokenizer
|
| 159 |
-
import torch.nn.functional as F
|
| 160 |
-
import torch
|
| 161 |
-
|
| 162 |
-
# copied from the model card
|
| 163 |
-
def mean_pooling(model_output, attention_mask):
|
| 164 |
-
token_embeddings = model_output[0] #First element of model_output contains all token embeddings
|
| 165 |
-
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
|
| 166 |
-
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
| 167 |
|
| 168 |
|
| 169 |
-
class
|
| 170 |
def __init__(self, path=""):
|
| 171 |
# load the optimized model
|
| 172 |
-
self.model =
|
| 173 |
self.tokenizer = AutoTokenizer.from_pretrained(path)
|
|
|
|
|
|
|
| 174 |
|
| 175 |
def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
|
| 176 |
"""
|
|
@@ -178,42 +220,73 @@ class PreTrainedPipeline():
|
|
| 178 |
data (:obj:):
|
| 179 |
includes the input data and the parameters for the inference.
|
| 180 |
Return:
|
| 181 |
-
A :obj:`list`:. The list contains the
|
| 182 |
"""
|
| 183 |
inputs = data.get("inputs", data)
|
| 184 |
-
|
| 185 |
-
# tokenize the input
|
| 186 |
-
encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, return_tensors='pt')
|
| 187 |
# run the model
|
| 188 |
-
|
| 189 |
-
#
|
| 190 |
-
|
| 191 |
-
# Normalize embeddings
|
| 192 |
-
sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
|
| 193 |
-
# postprocess the prediction
|
| 194 |
-
return {"embeddings": sentence_embeddings.tolist()}
|
| 195 |
```
|
| 196 |
|
| 197 |
-
|
|
|
|
| 198 |
|
| 199 |
|
| 200 |
```python
|
| 201 |
-
from
|
| 202 |
|
| 203 |
# init handler
|
| 204 |
-
my_handler =
|
| 205 |
|
| 206 |
# prepare sample payload
|
| 207 |
-
|
|
|
|
| 208 |
|
| 209 |
-
|
| 210 |
-
%timeit my_handler(request)
|
| 211 |
|
|
|
|
|
|
|
| 212 |
```
|
| 213 |
|
| 214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
```
|
| 217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
```
|
| 219 |
|
|
|
|
| 7 |
library_name: generic
|
| 8 |
---
|
| 9 |
|
| 10 |
+
# Optimized and Quantized [deepset/roberta-base-squad2](https://huggingface.co/deepset/roberta-base-squad2) with a custom handler.py
|
| 11 |
|
| 12 |
|
| 13 |
+
This repository implements a `custom` handler for `question-answering` for 🤗 Inference Endpoints for accelerated inference using [🤗 Optiumum](https://huggingface.co/docs/optimum/index). The code for the customized handler is in the [handler.py](https://huggingface.co/philschmid/roberta-base-squad2-optimized/blob/main/handler.py).
|
| 14 |
|
| 15 |
+
Below is also describe how we converted & optimized the model, based on the [Accelerate Transformers with Hugging Face Optimum](https://huggingface.co/blog/optimum-inference) blog post. You can also check out the [notebook](https://huggingface.co/philschmid/roberta-base-squad2-optimized/blob/main/optimize_model.ipynb).
|
|
|
|
|
|
|
| 16 |
|
| 17 |
### expected Request payload
|
| 18 |
|
| 19 |
```json
|
| 20 |
{
|
| 21 |
+
"inputs": {
|
| 22 |
+
"question": "As what is Philipp working?",
|
| 23 |
+
"context": "Hello, my name is Philipp and I live in Nuremberg, Germany. Currently I am working as a Technical Lead at Hugging Face to democratize artificial intelligence through open source and open science. In the past I designed and implemented cloud-native machine learning architectures for fin-tech and insurance companies. I found my passion for cloud concepts and machine learning 5 years ago. Since then I never stopped learning. Currently, I am focusing myself in the area NLP and how to leverage models like BERT, Roberta, T5, ViT, and GPT2 to generate business value."
|
| 24 |
+
}
|
| 25 |
}
|
| 26 |
```
|
| 27 |
|
|
|
|
| 39 |
HF_TOKEN = ""
|
| 40 |
|
| 41 |
|
| 42 |
+
def predict(question:str=None,context:str=None):
|
| 43 |
+
payload = {"inputs": {"question": question, "context": context}}
|
|
|
|
| 44 |
response = r.post(
|
| 45 |
ENDPOINT_URL, headers={"Authorization": f"Bearer {HF_TOKEN}"}, json=payload
|
| 46 |
)
|
|
|
|
| 48 |
|
| 49 |
|
| 50 |
prediction = predict(
|
| 51 |
+
question="As what is Philipp working?",
|
| 52 |
+
context="Hello, my name is Philipp and I live in Nuremberg, Germany. Currently I am working as a Technical Lead at Hugging Face to democratize artificial intelligence through open source and open science."
|
| 53 |
)
|
| 54 |
```
|
| 55 |
|
| 56 |
expected output
|
| 57 |
|
| 58 |
```python
|
| 59 |
+
{
|
| 60 |
+
'score': 0.4749588668346405,
|
| 61 |
+
'start': 88,
|
| 62 |
+
'end': 102,
|
| 63 |
+
'answer': 'Technical Lead'
|
|
|
|
|
|
|
| 64 |
}
|
| 65 |
```
|
| 66 |
|
| 67 |
|
| 68 |
|
| 69 |
+
# Convert & Optimize model with Optimum
|
| 70 |
|
| 71 |
Steps:
|
| 72 |
+
1. [Convert model to ONNX](#1-convert-model-to-onnx)
|
| 73 |
+
2. [Optimize & quantize model with Optimum](#2-optimize--quantize-model-with-optimum)
|
| 74 |
+
3. [Create Custom Handler for Inference Endpoints](#3-create-custom-handler-for-inference-endpoints)
|
| 75 |
+
4. [Test Custom Handler Locally](#4-test-custom-handler-locally)
|
| 76 |
+
5. [Push to repository and create Inference Endpoint](#5-push-to-repository-and-create-inference-endpoint)
|
| 77 |
|
| 78 |
Helpful links:
|
| 79 |
+
* [Accelerate Transformers with Hugging Face Optimum](https://huggingface.co/blog/optimum-inference)
|
| 80 |
+
* [Optimizing Transformers for GPUs with Optimum](https://www.philschmid.de/optimizing-transformers-with-optimum-gpu)
|
| 81 |
+
* [Optimum Documentation](https://huggingface.co/docs/optimum/onnxruntime/modeling_ort)
|
| 82 |
* [Create Custom Handler Endpoints](https://link-to-docs)
|
| 83 |
|
| 84 |
## Setup & Installation
|
| 85 |
|
| 86 |
+
|
| 87 |
```python
|
| 88 |
%%writefile requirements.txt
|
| 89 |
+
optimum[onnxruntime]==1.4.0
|
| 90 |
mkl-include
|
| 91 |
mkl
|
| 92 |
```
|
| 93 |
|
|
|
|
| 94 |
|
| 95 |
```python
|
| 96 |
!pip install -r requirements.txt
|
| 97 |
```
|
| 98 |
|
| 99 |
+
## 0. Base line Performance
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
```python
|
| 103 |
+
from transformers import pipeline
|
| 104 |
+
|
| 105 |
+
qa = pipeline("question-answering",model="deepset/roberta-base-squad2")
|
| 106 |
+
```
|
| 107 |
+
|
| 108 |
+
Okay, let's test the performance (latency) with sequence length of 128.
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
```python
|
| 112 |
+
context="Hello, my name is Philipp and I live in Nuremberg, Germany. Currently I am working as a Technical Lead at Hugging Face to democratize artificial intelligence through open source and open science. In the past I designed and implemented cloud-native machine learning architectures for fin-tech and insurance companies. I found my passion for cloud concepts and machine learning 5 years ago. Since then I never stopped learning. Currently, I am focusing myself in the area NLP and how to leverage models like BERT, Roberta, T5, ViT, and GPT2 to generate business value."
|
| 113 |
+
question="As what is Philipp working?"
|
| 114 |
+
|
| 115 |
+
payload = {"inputs": {"question": question, "context": context}}
|
| 116 |
+
```
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
```python
|
| 120 |
+
from time import perf_counter
|
| 121 |
+
import numpy as np
|
| 122 |
+
|
| 123 |
+
def measure_latency(pipe,payload):
|
| 124 |
+
latencies = []
|
| 125 |
+
# warm up
|
| 126 |
+
for _ in range(10):
|
| 127 |
+
_ = pipe(question=payload["inputs"]["question"], context=payload["inputs"]["context"])
|
| 128 |
+
# Timed run
|
| 129 |
+
for _ in range(50):
|
| 130 |
+
start_time = perf_counter()
|
| 131 |
+
_ = pipe(question=payload["inputs"]["question"], context=payload["inputs"]["context"])
|
| 132 |
+
latency = perf_counter() - start_time
|
| 133 |
+
latencies.append(latency)
|
| 134 |
+
# Compute run statistics
|
| 135 |
+
time_avg_ms = 1000 * np.mean(latencies)
|
| 136 |
+
time_std_ms = 1000 * np.std(latencies)
|
| 137 |
+
return f"Average latency (ms) - {time_avg_ms:.2f} +\- {time_std_ms:.2f}"
|
| 138 |
+
|
| 139 |
+
print(f"Vanilla model {measure_latency(qa,payload)}")
|
| 140 |
+
# Vanilla model Average latency (ms) - 64.15 +\- 2.44
|
| 141 |
+
```
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
|
| 145 |
## 1. Convert model to ONNX
|
| 146 |
|
| 147 |
|
| 148 |
```python
|
| 149 |
+
from optimum.onnxruntime import ORTModelForQuestionAnswering
|
| 150 |
from transformers import AutoTokenizer
|
| 151 |
from pathlib import Path
|
| 152 |
|
| 153 |
|
| 154 |
+
model_id="deepset/roberta-base-squad2"
|
| 155 |
onnx_path = Path(".")
|
| 156 |
|
| 157 |
# load vanilla transformers and convert to onnx
|
| 158 |
+
model = ORTModelForQuestionAnswering.from_pretrained(model_id, from_transformers=True)
|
| 159 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 160 |
|
| 161 |
# save onnx checkpoint and tokenizer
|
|
|
|
| 171 |
from optimum.onnxruntime import ORTOptimizer, ORTQuantizer
|
| 172 |
from optimum.onnxruntime.configuration import OptimizationConfig, AutoQuantizationConfig
|
| 173 |
|
| 174 |
+
# Create the optimizer
|
| 175 |
+
optimizer = ORTOptimizer.from_pretrained(model)
|
| 176 |
+
|
| 177 |
+
# Define the optimization strategy by creating the appropriate configuration
|
| 178 |
optimization_config = OptimizationConfig(optimization_level=99) # enable all optimizations
|
| 179 |
|
| 180 |
+
# Optimize the model
|
| 181 |
+
optimizer.optimize(save_dir=onnx_path, optimization_config=optimization_config)
|
| 182 |
+
```
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
|
| 185 |
+
```python
|
| 186 |
# create ORTQuantizer and define quantization configuration
|
| 187 |
+
dynamic_quantizer = ORTQuantizer.from_pretrained(onnx_path, file_name="model_optimized.onnx")
|
| 188 |
dqconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=False)
|
| 189 |
|
| 190 |
# apply the quantization configuration to the model
|
| 191 |
+
model_quantized_path = dynamic_quantizer.quantize(
|
| 192 |
+
save_dir=onnx_path,
|
|
|
|
| 193 |
quantization_config=dqconfig,
|
| 194 |
)
|
| 195 |
|
|
|
|
| 196 |
```
|
| 197 |
|
| 198 |
## 3. Create Custom Handler for Inference Endpoints
|
| 199 |
|
| 200 |
|
| 201 |
+
|
| 202 |
```python
|
| 203 |
+
%%writefile handler.py
|
| 204 |
from typing import Dict, List, Any
|
| 205 |
+
from optimum.onnxruntime import ORTModelForQuestionAnswering
|
| 206 |
+
from transformers import AutoTokenizer, pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
|
| 208 |
|
| 209 |
+
class EndpointHandler():
|
| 210 |
def __init__(self, path=""):
|
| 211 |
# load the optimized model
|
| 212 |
+
self.model = ORTModelForQuestionAnswering.from_pretrained(path, file_name="model_optimized_quantized.onnx")
|
| 213 |
self.tokenizer = AutoTokenizer.from_pretrained(path)
|
| 214 |
+
# create pipeline
|
| 215 |
+
self.pipeline = pipeline("question-answering", model=self.model, tokenizer=self.tokenizer)
|
| 216 |
|
| 217 |
def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
|
| 218 |
"""
|
|
|
|
| 220 |
data (:obj:):
|
| 221 |
includes the input data and the parameters for the inference.
|
| 222 |
Return:
|
| 223 |
+
A :obj:`list`:. The list contains the answer and scores of the inference inputs
|
| 224 |
"""
|
| 225 |
inputs = data.get("inputs", data)
|
|
|
|
|
|
|
|
|
|
| 226 |
# run the model
|
| 227 |
+
prediction = self.pipeline(**inputs)
|
| 228 |
+
# return prediction
|
| 229 |
+
return prediction
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
```
|
| 231 |
|
| 232 |
+
## 4. Test Custom Handler Locally
|
| 233 |
+
|
| 234 |
|
| 235 |
|
| 236 |
```python
|
| 237 |
+
from handler import EndpointHandler
|
| 238 |
|
| 239 |
# init handler
|
| 240 |
+
my_handler = EndpointHandler(path=".")
|
| 241 |
|
| 242 |
# prepare sample payload
|
| 243 |
+
context="Hello, my name is Philipp and I live in Nuremberg, Germany. Currently I am working as a Technical Lead at Hugging Face to democratize artificial intelligence through open source and open science. In the past I designed and implemented cloud-native machine learning architectures for fin-tech and insurance companies. I found my passion for cloud concepts and machine learning 5 years ago. Since then I never stopped learning. Currently, I am focusing myself in the area NLP and how to leverage models like BERT, Roberta, T5, ViT, and GPT2 to generate business value."
|
| 244 |
+
question="As what is Philipp working?"
|
| 245 |
|
| 246 |
+
payload = {"inputs": {"question": question, "context": context}}
|
|
|
|
| 247 |
|
| 248 |
+
# test the handler
|
| 249 |
+
my_handler(payload)
|
| 250 |
```
|
| 251 |
|
| 252 |
+
|
| 253 |
+
```python
|
| 254 |
+
from time import perf_counter
|
| 255 |
+
import numpy as np
|
| 256 |
+
|
| 257 |
+
def measure_latency(handler,payload):
|
| 258 |
+
latencies = []
|
| 259 |
+
# warm up
|
| 260 |
+
for _ in range(10):
|
| 261 |
+
_ = handler(payload)
|
| 262 |
+
# Timed run
|
| 263 |
+
for _ in range(50):
|
| 264 |
+
start_time = perf_counter()
|
| 265 |
+
_ = handler(payload)
|
| 266 |
+
latency = perf_counter() - start_time
|
| 267 |
+
latencies.append(latency)
|
| 268 |
+
# Compute run statistics
|
| 269 |
+
time_avg_ms = 1000 * np.mean(latencies)
|
| 270 |
+
time_std_ms = 1000 * np.std(latencies)
|
| 271 |
+
return f"Average latency (ms) - {time_avg_ms:.2f} +\- {time_std_ms:.2f}"
|
| 272 |
+
|
| 273 |
+
print(f"Optimized & Quantized model {measure_latency(my_handler,payload)}")
|
| 274 |
+
# Optimized & Quantized model Average latency (ms) - 29.90 +\- 0.53
|
| 275 |
|
| 276 |
```
|
| 277 |
+
|
| 278 |
+
`Vanilla model Average latency (ms) - 64.15 +\- 2.44`
|
| 279 |
+
|
| 280 |
+
## 5. Push to repository and create Inference Endpoint
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
```python
|
| 285 |
+
# add all our new files
|
| 286 |
+
!git add *
|
| 287 |
+
# commit our files
|
| 288 |
+
!git commit -m "add custom handler"
|
| 289 |
+
# push the files to the hub
|
| 290 |
+
!git push
|
| 291 |
```
|
| 292 |
|
optimize_model.ipynb
CHANGED
|
@@ -84,9 +84,20 @@
|
|
| 84 |
},
|
| 85 |
{
|
| 86 |
"cell_type": "code",
|
| 87 |
-
"execution_count":
|
| 88 |
"metadata": {},
|
| 89 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
"source": [
|
| 91 |
"context=\"Hello, my name is Philipp and I live in Nuremberg, Germany. Currently I am working as a Technical Lead at Hugging Face to democratize artificial intelligence through open source and open science. In the past I designed and implemented cloud-native machine learning architectures for fin-tech and insurance companies. I found my passion for cloud concepts and machine learning 5 years ago. Since then I never stopped learning. Currently, I am focusing myself in the area NLP and how to leverage models like BERT, Roberta, T5, ViT, and GPT2 to generate business value.\" \n",
|
| 92 |
"question=\"As what is Philipp working?\" \n",
|
|
@@ -395,9 +406,33 @@
|
|
| 395 |
},
|
| 396 |
{
|
| 397 |
"cell_type": "code",
|
| 398 |
-
"execution_count":
|
| 399 |
"metadata": {},
|
| 400 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
"source": [
|
| 402 |
"# add all our new files\n",
|
| 403 |
"!git add * \n",
|
|
@@ -406,6 +441,13 @@
|
|
| 406 |
"# push the files to the hub\n",
|
| 407 |
"!git push"
|
| 408 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
}
|
| 410 |
],
|
| 411 |
"metadata": {
|
|
|
|
| 84 |
},
|
| 85 |
{
|
| 86 |
"cell_type": "code",
|
| 87 |
+
"execution_count": 3,
|
| 88 |
"metadata": {},
|
| 89 |
+
"outputs": [
|
| 90 |
+
{
|
| 91 |
+
"data": {
|
| 92 |
+
"text/plain": [
|
| 93 |
+
"'{\"inputs\": {\"question\": \"As what is Philipp working?\", \"context\": \"Hello, my name is Philipp and I live in Nuremberg, Germany. Currently I am working as a Technical Lead at Hugging Face to democratize artificial intelligence through open source and open science. In the past I designed and implemented cloud-native machine learning architectures for fin-tech and insurance companies. I found my passion for cloud concepts and machine learning 5 years ago. Since then I never stopped learning. Currently, I am focusing myself in the area NLP and how to leverage models like BERT, Roberta, T5, ViT, and GPT2 to generate business value.\"}}'"
|
| 94 |
+
]
|
| 95 |
+
},
|
| 96 |
+
"execution_count": 3,
|
| 97 |
+
"metadata": {},
|
| 98 |
+
"output_type": "execute_result"
|
| 99 |
+
}
|
| 100 |
+
],
|
| 101 |
"source": [
|
| 102 |
"context=\"Hello, my name is Philipp and I live in Nuremberg, Germany. Currently I am working as a Technical Lead at Hugging Face to democratize artificial intelligence through open source and open science. In the past I designed and implemented cloud-native machine learning architectures for fin-tech and insurance companies. I found my passion for cloud concepts and machine learning 5 years ago. Since then I never stopped learning. Currently, I am focusing myself in the area NLP and how to leverage models like BERT, Roberta, T5, ViT, and GPT2 to generate business value.\" \n",
|
| 103 |
"question=\"As what is Philipp working?\" \n",
|
|
|
|
| 406 |
},
|
| 407 |
{
|
| 408 |
"cell_type": "code",
|
| 409 |
+
"execution_count": 1,
|
| 410 |
"metadata": {},
|
| 411 |
+
"outputs": [
|
| 412 |
+
{
|
| 413 |
+
"name": "stdout",
|
| 414 |
+
"output_type": "stream",
|
| 415 |
+
"text": [
|
| 416 |
+
"[main a854397] add custom handler\n",
|
| 417 |
+
" 14 files changed, 151227 insertions(+)\n",
|
| 418 |
+
" create mode 100644 README.md\n",
|
| 419 |
+
" create mode 100644 config.json\n",
|
| 420 |
+
" create mode 100644 handler.py\n",
|
| 421 |
+
" create mode 100644 merges.txt\n",
|
| 422 |
+
" create mode 100644 model.onnx\n",
|
| 423 |
+
" create mode 100644 model_optimized.onnx\n",
|
| 424 |
+
" create mode 100644 model_optimized_quantized.onnx\n",
|
| 425 |
+
" create mode 100644 optimize_model.ipynb\n",
|
| 426 |
+
" create mode 100644 ort_config.json\n",
|
| 427 |
+
" create mode 100644 requirements.txt\n",
|
| 428 |
+
" create mode 100644 special_tokens_map.json\n",
|
| 429 |
+
" create mode 100644 tokenizer.json\n",
|
| 430 |
+
" create mode 100644 tokenizer_config.json\n",
|
| 431 |
+
" create mode 100644 vocab.json\n",
|
| 432 |
+
"Username for 'https://huggingface.co': ^C\n"
|
| 433 |
+
]
|
| 434 |
+
}
|
| 435 |
+
],
|
| 436 |
"source": [
|
| 437 |
"# add all our new files\n",
|
| 438 |
"!git add * \n",
|
|
|
|
| 441 |
"# push the files to the hub\n",
|
| 442 |
"!git push"
|
| 443 |
]
|
| 444 |
+
},
|
| 445 |
+
{
|
| 446 |
+
"cell_type": "code",
|
| 447 |
+
"execution_count": null,
|
| 448 |
+
"metadata": {},
|
| 449 |
+
"outputs": [],
|
| 450 |
+
"source": []
|
| 451 |
}
|
| 452 |
],
|
| 453 |
"metadata": {
|