low low cpu Granite 4.0 ?

#2
Files changed (1) hide show
  1. README.md +61 -0
README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: bsl-1.0
3
+ datasets:
4
+ - JDhruv14/Bhagavad-Gita_Dataset
5
+ metrics:
6
+ - character
7
+ base_model:
8
+ - ibm-granite/granite-docling-258M
9
+ new_version: ibm-granite/granite-docling-258M
10
+ pipeline_tag: summarization
11
+ library_name: fastai
12
+ tags:
13
+ - art
14
+ ---
15
+ import torch
16
+ from transformers import AutoModelForCausalLM, AutoTokenizer
17
+
18
+ device = "cuda"
19
+ model_path = "ibm-granite/granite-4.0-micro"
20
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
21
+ # drop device_map if running on CPU
22
+ model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)
23
+ model.eval()
24
+
25
+ tools = [
26
+ {
27
+ "type": "function",
28
+ "function": {
29
+ "name": "get_current_weather",
30
+ "description": "Get the current weather for a specified city.",
31
+ "parameters": {
32
+ "type": "object",
33
+ "properties": {
34
+ "city": {
35
+ "type": "string",
36
+ "description": "Name of the city"
37
+ }
38
+ },
39
+ "required": ["city"]
40
+ }
41
+ }
42
+ }
43
+ ]
44
+
45
+ # change input text as desired
46
+ chat = [
47
+ { "role": "user", "content": "What's the weather like in Boston right now?" },
48
+ ]
49
+ chat = tokenizer.apply_chat_template(chat, \
50
+ tokenize=False, \
51
+ tools=tools, \
52
+ add_generation_prompt=True)
53
+ # tokenize the text
54
+ input_tokens = tokenizer(chat, return_tensors="pt").to(device)
55
+ # generate output tokens
56
+ output = model.generate(**input_tokens,
57
+ max_new_tokens=100)
58
+ # decode output tokens into text
59
+ output = tokenizer.batch_decode(output)
60
+ # print output
61
+ print(output[0])