Update uncertainty/README.md
Browse files- uncertainty/README.md +43 -1
uncertainty/README.md
CHANGED
|
@@ -70,7 +70,49 @@ Scenario 2. Predicting the certainty score from the question only, *prior* to ge
|
|
| 70 |
|
| 71 |
### Quickstart Example
|
| 72 |
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
|
| 76 |
|
|
|
|
| 70 |
|
| 71 |
### Quickstart Example
|
| 72 |
|
| 73 |
+
First, see information elsewhere in this repo on how to start up a vLLM server hosting the LoRAs and/or aLoRAs. Once this server is started, it can be queried via the OpenAI API.
|
| 74 |
+
An example for this intrinsic follows.
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
```
|
| 78 |
+
import os
|
| 79 |
+
import openai
|
| 80 |
+
import json
|
| 81 |
+
import granite_common
|
| 82 |
+
|
| 83 |
+
QUESTION = "What is IBM?"
|
| 84 |
+
RESPONSE = ... # this should be generated by the base model corresponding to the chosen adapter
|
| 85 |
+
|
| 86 |
+
request = {
|
| 87 |
+
"messages": [
|
| 88 |
+
{
|
| 89 |
+
"content": QUESTION,
|
| 90 |
+
"role": "user"
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"content": RESPONSE,
|
| 94 |
+
"role": "assistant"
|
| 95 |
+
}
|
| 96 |
+
],
|
| 97 |
+
"model": "uncertainty",
|
| 98 |
+
"temperature": 0.0
|
| 99 |
+
}
|
| 100 |
+
openai_base_url = ...
|
| 101 |
+
openai_api_key = ...
|
| 102 |
+
io_yaml_file = "./rag_intrinsics_lib/uncertainty/.../io.yaml"
|
| 103 |
+
|
| 104 |
+
rewriter = granite_common.IntrinsicsRewriter(config_file=io_yaml_file)
|
| 105 |
+
result_processor = granite_common.IntrinsicsResultProcessor(config_file=io_yaml_file)
|
| 106 |
+
|
| 107 |
+
rewritten_request = rewriter.transform(request)
|
| 108 |
+
|
| 109 |
+
client = openai.OpenAI(base_url=openai_base_url, api_key=openai_api_key)
|
| 110 |
+
chat_completion = client.chat.completions.create(**rewritten_request.model_dump())
|
| 111 |
+
|
| 112 |
+
transformed_completion = result_processor.transform(chat_completion)
|
| 113 |
+
|
| 114 |
+
print(transformed_completion.model_dump_json(indent=2))
|
| 115 |
+
```
|
| 116 |
|
| 117 |
|
| 118 |
|