Commit
·
4da517f
1
Parent(s):
326a7a8
Update README.md
Browse files
README.md
CHANGED
|
@@ -270,10 +270,11 @@ def measure_latency(handler,payload):
|
|
| 270 |
return f"Average latency (ms) - {time_avg_ms:.2f} +\- {time_std_ms:.2f}"
|
| 271 |
|
| 272 |
print(f"Optimized & Quantized model {measure_latency(my_handler,payload)}")
|
| 273 |
-
#
|
| 274 |
|
| 275 |
```
|
| 276 |
|
|
|
|
| 277 |
`Vanilla model Average latency (ms) - 64.15 +\- 2.44`
|
| 278 |
|
| 279 |
## 5. Push to repository and create Inference Endpoint
|
|
|
|
| 270 |
return f"Average latency (ms) - {time_avg_ms:.2f} +\- {time_std_ms:.2f}"
|
| 271 |
|
| 272 |
print(f"Optimized & Quantized model {measure_latency(my_handler,payload)}")
|
| 273 |
+
#
|
| 274 |
|
| 275 |
```
|
| 276 |
|
| 277 |
+
`Optimized & Quantized model Average latency (ms) - 29.90 +\- 0.53`
|
| 278 |
`Vanilla model Average latency (ms) - 64.15 +\- 2.44`
|
| 279 |
|
| 280 |
## 5. Push to repository and create Inference Endpoint
|