laolida-w commited on
Commit
b28fb05
·
verified ·
1 Parent(s): b7aaab4

Upload 5 files

Browse files
Files changed (5) hide show
  1. README.md +1 -2
  2. app.py +224 -0
  3. eval_dataset.py +283 -0
  4. requirements.txt +17 -0
  5. temp.py +7 -0
README.md CHANGED
@@ -8,5 +8,4 @@ sdk_version: 5.49.1
8
  app_file: app.py
9
  pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
8
  app_file: app.py
9
  pinned: false
10
  ---
11
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
app.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from eval_dataset import SingleRegionCaptionDataset
3
+ from segment_anything import sam_model_registry, SamPredictor
4
+ import gradio as gr
5
+ import numpy as np
6
+ import cv2
7
+ import base64
8
+ import torch
9
+ from PIL import Image
10
+ import io
11
+ import argparse
12
+ from fastapi import FastAPI
13
+ from fastapi.staticfiles import StaticFiles
14
+ from transformers import AutoModel, AutoProcessor, GenerationConfig
15
+ from transformers import SamModel, SamProcessor
16
+ try:
17
+ from spaces import GPU
18
+ except ImportError:
19
+ print("Spaces not installed, using dummy GPU decorator")
20
+ def GPU(*args, **kwargs):
21
+ def decorator(fn):
22
+ return fn
23
+ return decorator
24
+
25
+ # Load SAM model
26
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
27
+ sam_model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
28
+ sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
29
+ print("sam ready")
30
+ model_path = "HaochenWang/GAR-1B"
31
+ # Initialize the captioning model and processor
32
+ model = AutoModel.from_pretrained(
33
+ model_path,
34
+ trust_remote_code=True,
35
+ torch_dtype=torch.bfloat16,
36
+ device_map="cuda:0",
37
+ ).eval()
38
+
39
+ processor = AutoProcessor.from_pretrained(
40
+ model_path,
41
+ trust_remote_code=True,
42
+ )
43
+
44
+ @GPU(duration=75)
45
+ def image_to_sam_embedding(base64_image):
46
+ try:
47
+ # Decode base64 string to bytes
48
+ image_bytes = base64.b64decode(base64_image)
49
+
50
+ # Convert bytes to PIL Image
51
+ image = Image.open(io.BytesIO(image_bytes))
52
+
53
+ # Process image with SAM processor
54
+ inputs = sam_processor(image, return_tensors="pt").to(device)
55
+
56
+ # Get image embedding
57
+ with torch.no_grad():
58
+ image_embedding = sam_model.get_image_embeddings(inputs["pixel_values"])
59
+
60
+ # Convert to CPU and numpy
61
+ image_embedding = image_embedding.cpu().numpy()
62
+
63
+ # Encode the embedding as base64
64
+ embedding_bytes = image_embedding.tobytes()
65
+ embedding_base64 = base64.b64encode(embedding_bytes).decode('utf-8')
66
+
67
+ return embedding_base64
68
+ except Exception as e:
69
+ print(f"Error processing image: {str(e)}")
70
+ raise gr.Error(f"Failed to process image: {str(e)}")
71
+
72
+ @GPU(duration=75)
73
+ def describe(image_base64: str, mask_base64: str, query: str):
74
+ # Convert base64 to PIL Image
75
+ image_bytes = base64.b64decode(image_base64.split(',')[1] if ',' in image_base64 else image_base64)
76
+ img = Image.open(io.BytesIO(image_bytes))
77
+ mask_bytes = base64.b64decode(mask_base64.split(',')[1] if ',' in mask_base64 else mask_base64)
78
+ mask = Image.open(io.BytesIO(mask_bytes))
79
+ mask = np.array(mask.convert('L'))
80
+
81
+
82
+ prompt_number = model.config.prompt_numbers
83
+ prompt_tokens = [f"<Prompt{i_p}>" for i_p in range(prompt_number)] + ["<NO_Prompt>"]
84
+
85
+ # Assuming mask is given as a numpy array and the image is a PIL image
86
+ dataset = SingleRegionCaptionDataset(
87
+ image=img,
88
+ mask=mask,
89
+ processor=processor,
90
+ prompt_number=prompt_number,
91
+ visual_prompt_tokens=prompt_tokens,
92
+ data_dtype=torch.bfloat16,
93
+ )
94
+
95
+ data_sample = dataset[0]
96
+
97
+ # Generate the caption
98
+ with torch.no_grad():
99
+ generate_ids = model.generate(
100
+ **data_sample,
101
+ generation_config=GenerationConfig(
102
+ max_new_tokens=1024,
103
+ # do_sample= False,
104
+ eos_token_id=processor.tokenizer.eos_token_id,
105
+ pad_token_id=processor.tokenizer.pad_token_id,
106
+ ),
107
+ return_dict=True,
108
+ )
109
+
110
+ output_caption = processor.tokenizer.decode(generate_ids.sequences[0], skip_special_tokens=True).strip()
111
+
112
+
113
+ # Stream the tokens
114
+ text = ""
115
+ for token in output_caption:
116
+ text += token
117
+ yield text
118
+
119
+ @GPU(duration=75)
120
+ def describe_without_streaming(image_base64: str, mask_base64: str, query: str):
121
+ # Convert base64 to PIL Image
122
+ image_bytes = base64.b64decode(image_base64.split(',')[1] if ',' in image_base64 else image_base64)
123
+ img = Image.open(io.BytesIO(image_bytes))
124
+ mask_bytes = base64.b64decode(mask_base64.split(',')[1] if ',' in mask_base64 else mask_base64)
125
+ mask = Image.open(io.BytesIO(mask_bytes))
126
+ mask = np.array(mask.convert('L'))
127
+ prompt_number = model.config.prompt_numbers
128
+ prompt_tokens = [f"<Prompt{i_p}>" for i_p in range(prompt_number)] + ["<NO_Prompt>"]
129
+
130
+ # Assuming mask is given as a numpy array and the image is a PIL image
131
+ dataset = SingleRegionCaptionDataset(
132
+ image=img,
133
+ mask=mask,
134
+ processor=processor,
135
+ prompt_number=prompt_number,
136
+ visual_prompt_tokens=prompt_tokens,
137
+ data_dtype=torch.bfloat16,
138
+ )
139
+
140
+ data_sample = dataset[0]
141
+
142
+ # Generate the caption
143
+ with torch.no_grad():
144
+ generate_ids = model.generate(
145
+ **data_sample,
146
+ generation_config=GenerationConfig(
147
+ max_new_tokens=1024,
148
+ # do_sample=False,
149
+ eos_token_id=processor.tokenizer.eos_token_id,
150
+ pad_token_id=processor.tokenizer.pad_token_id,
151
+ ),
152
+ return_dict=True,
153
+ )
154
+
155
+ output_caption = processor.tokenizer.decode(generate_ids.sequences[0], skip_special_tokens=True).strip()
156
+
157
+
158
+ return output_caption
159
+
160
+ if __name__ == "__main__":
161
+ parser = argparse.ArgumentParser(description="Describe Anything gradio demo")
162
+ parser.add_argument("--server_addr", "--host", type=str, default=None, help="The server address to listen on.")
163
+ parser.add_argument("--server_port", "--port", type=int, default=None, help="The port to listen on.")
164
+ parser.add_argument("--model-path", type=str, default="HaochenWang/GAR-1B", help="Path to the model checkpoint")
165
+ parser.add_argument("--prompt-mode", type=str, default="full+focal_crop", help="Prompt mode")
166
+ parser.add_argument("--conv-mode", type=str, default="v1", help="Conversation mode")
167
+ parser.add_argument("--temperature", type=float, default=0.2, help="Sampling temperature")
168
+ parser.add_argument("--top_p", type=float, default=0.5, help="Top-p for sampling")
169
+
170
+ args = parser.parse_args()
171
+
172
+
173
+ # Create Gradio interface
174
+ with gr.Blocks() as demo:
175
+ gr.Interface(
176
+ fn=image_to_sam_embedding,
177
+ inputs=gr.Textbox(label="Image Base64"),
178
+ outputs=gr.Textbox(label="Embedding Base64"),
179
+ title="Image Embedding Generator",
180
+ api_name="image_to_sam_embedding"
181
+ )
182
+ gr.Interface(
183
+ fn=describe,
184
+ inputs=[
185
+ gr.Textbox(label="Image Base64"),
186
+ gr.Text(label="Mask Base64"),
187
+ gr.Text(label="Prompt")
188
+ ],
189
+ outputs=[
190
+ gr.Text(label="Description")
191
+ ],
192
+ title="Mask Description Generator",
193
+ api_name="describe"
194
+ )
195
+ gr.Interface(
196
+ fn=describe_without_streaming,
197
+ inputs=[
198
+ gr.Textbox(label="Image Base64"),
199
+ gr.Text(label="Mask Base64"),
200
+ gr.Text(label="Prompt")
201
+ ],
202
+ outputs=[
203
+ gr.Text(label="Description")
204
+ ],
205
+ title="Mask Description Generator (Non-Streaming)",
206
+ api_name="describe_without_streaming"
207
+ )
208
+
209
+ demo._block_thread = demo.block_thread
210
+ demo.block_thread = lambda: None
211
+ demo.launch(
212
+ share=True,
213
+ server_name=args.server_addr,
214
+ server_port=args.server_port,
215
+ ssr_mode=False,
216
+ )
217
+
218
+ for route in demo.app.routes:
219
+ if route.path == "/":
220
+ demo.app.routes.remove(route)
221
+ demo.app.mount("/", StaticFiles(directory="dist", html=True), name="demo")
222
+
223
+ demo._block_thread()
224
+
eval_dataset.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --------------------------------------------------------
2
+ # Copyright (2025) Bytedance Ltd. and/or its affiliates
3
+ # Licensed under the Apache License, Version 2.0 (the "License")
4
+ # Grasp Any Region Project
5
+ # Written by Haochen Wang
6
+ # --------------------------------------------------------
7
+
8
+ import os
9
+ import re
10
+ from copy import deepcopy
11
+
12
+ import numpy as np
13
+ import torch
14
+ from torch.utils.data import Dataset
15
+ from PIL import Image
16
+
17
+
18
+ class SingleRegionCaptionDataset(Dataset):
19
+ os.environ["TOKENIZERS_PARALLELISM"] = "true"
20
+
21
+ def __init__(
22
+ self,
23
+ image,
24
+ mask,
25
+ processor,
26
+ prompt_token="<Prompt1>",
27
+ prompt_number=5,
28
+ visual_prompt_tokens=[
29
+ "<Prompt0>",
30
+ "<Prompt1>",
31
+ "<Prompt2>",
32
+ "<Prompt3>",
33
+ "<Prompt4>",
34
+ "<NO_Prompt>",
35
+ ],
36
+ data_dtype=torch.bfloat16,
37
+ **kwargs,
38
+ ):
39
+ self.processor = processor
40
+ self.prompt_token = prompt_token
41
+
42
+ self.prompt_number = prompt_number
43
+ self.special_tokens = visual_prompt_tokens
44
+ self.visual_prompt_ids = {
45
+ token: self.processor.tokenizer.convert_tokens_to_ids(token) - 128256
46
+ for token in self.special_tokens
47
+ }
48
+
49
+ self.image = image
50
+ self.mask = mask
51
+ self.data_dtype = data_dtype
52
+
53
+ def __len__(self):
54
+ return len(self.coco.anns)
55
+
56
+ def _parse_annotations(self):
57
+ image = self.image
58
+ mask = self.mask # binary mask
59
+
60
+ np.array(image)
61
+ mask_np = mask.astype(np.uint8)
62
+
63
+ filled_matrix = -1 * np.ones((image.height, image.width), dtype=np.uint8)
64
+ prompt_token = self.prompt_token
65
+ prompt_id = self.visual_prompt_ids.get(
66
+ prompt_token, self.visual_prompt_ids["<NO_Prompt>"]
67
+ )
68
+ assert prompt_id < 16, f"prompt_id should be less than {16}, got {prompt_id}"
69
+ fill_area = (filled_matrix == -1) & mask_np.astype(bool)
70
+ filled_matrix[fill_area] = prompt_id
71
+
72
+ filled_matrix[filled_matrix == -1] = self.visual_prompt_ids["<NO_Prompt>"]
73
+
74
+ bboxes = {}
75
+
76
+ prompt_idx = int(re.match(r"<Prompt(\d+)>", prompt_token).group(1))
77
+ non_zero_coords = np.argwhere(mask_np)
78
+ y_min, x_min = non_zero_coords.min(axis=0)
79
+ y_max, x_max = non_zero_coords.max(axis=0)
80
+ bbox = (
81
+ x_min / image.width,
82
+ y_min / image.height,
83
+ x_max / image.width,
84
+ y_max / image.height,
85
+ )
86
+ bboxes[
87
+ str(
88
+ self.processor.tokenizer.convert_tokens_to_ids(
89
+ f"<|reserved_special_token_{prompt_idx + 2}|>"
90
+ )
91
+ )
92
+ ] = bbox
93
+
94
+ data_dict = {
95
+ "image": image,
96
+ "visual_prompt": Image.fromarray(filled_matrix),
97
+ "bboxes": bboxes,
98
+ }
99
+ return data_dict
100
+
101
+ def __getitem__(self, index):
102
+ data_dict = deepcopy(self._parse_annotations())
103
+ image = data_dict["image"]
104
+ visual_prompt = data_dict["visual_prompt"]
105
+
106
+ prompt_idx = int(re.match(r"<Prompt(\d+)>", self.prompt_token).group(1))
107
+
108
+ # <|reserved_special_token_{idx}|> actually starts from 2
109
+ qs = f"There are some objects I am curious about: {self.prompt_token};\n{self.prompt_token}: <|reserved_special_token_{prompt_idx + 2}|>Describe this masked region in detail."
110
+ qs = qs.replace(
111
+ f"<|reserved_special_token_{prompt_idx + 2}|>",
112
+ f"<|reserved_special_token_{prompt_idx + 2}|>" * 256,
113
+ )
114
+
115
+ user_content = [{"type": "image", "image": image}, {"type": "text", "text": qs}]
116
+
117
+ messages = [
118
+ {"role": "user", "content": user_content},
119
+ ]
120
+
121
+ # Prepare input for model
122
+ raw_prompt = self.processor.apply_chat_template(
123
+ messages,
124
+ add_generation_prompt=True,
125
+ tokenize=False,
126
+ )
127
+
128
+ model_inputs = self.processor(text=[raw_prompt], images=[image], visual_prompts=[visual_prompt], return_tensors="pt")
129
+
130
+ pixel_values = model_inputs["pixel_values"]
131
+ mask_values = model_inputs["mask_values"]
132
+ input_ids = model_inputs["input_ids"].squeeze(0)
133
+ attention_mask = model_inputs["attention_mask"].squeeze(0)
134
+ aspect_ratio = model_inputs["aspect_ratio"]
135
+
136
+ ret = dict(
137
+ input_ids=input_ids.cuda().unsqueeze(0),
138
+ attention_mask=attention_mask.cuda().to(self.data_dtype).unsqueeze(0),
139
+ pixel_values=pixel_values.cuda().to(self.data_dtype).flatten(0, 1),
140
+ global_mask_values=mask_values.cuda().to(self.data_dtype).squeeze(),
141
+ bboxes=[data_dict["bboxes"]],
142
+ aspect_ratios=aspect_ratio.unsqueeze(0).cuda(),
143
+ )
144
+ return ret
145
+
146
+
147
+ class MultiRegionDataset(Dataset):
148
+ os.environ["TOKENIZERS_PARALLELISM"] = "true"
149
+
150
+ def __init__(
151
+ self,
152
+ image,
153
+ masks,
154
+ question_str,
155
+ processor,
156
+ prompt_token="<Prompt1>",
157
+ prompt_number=5,
158
+ visual_prompt_tokens=[
159
+ "<Prompt0>",
160
+ "<Prompt1>",
161
+ "<Prompt2>",
162
+ "<Prompt3>",
163
+ "<Prompt4>",
164
+ "<NO_Prompt>",
165
+ ],
166
+ data_dtype=torch.bfloat16,
167
+ **kwargs,
168
+ ):
169
+ self.processor = processor
170
+ self.prompt_token = prompt_token
171
+
172
+ self.prompt_number = prompt_number
173
+ self.special_tokens = visual_prompt_tokens
174
+ self.visual_prompt_ids = {
175
+ token: self.processor.tokenizer.convert_tokens_to_ids(token) - 128256
176
+ for token in self.special_tokens
177
+ }
178
+
179
+ self.image = image
180
+ self.masks = masks
181
+ self.question_str = question_str
182
+ self.data_dtype = data_dtype
183
+
184
+ def __len__(self):
185
+ return len(self.coco.anns)
186
+
187
+ def _parse_annotations(self):
188
+ image = self.image
189
+ masks = self.masks # binary mask
190
+
191
+ width, height = image.size
192
+
193
+ np.array(image)
194
+ masks_np = [np.array(mask).astype(np.uint8) for mask in masks]
195
+
196
+ for mask_id, mask in enumerate(masks_np):
197
+ if image.width != mask.shape[1] or image.height != mask.shape[0]:
198
+ mask = mask.resize(image.size, Image.NEAREST)
199
+ masks[mask_id] = mask
200
+ masks_np[mask_id] = np.array(mask).astype(np.unint8)
201
+
202
+ prompt_matches = set(re.findall(r'<Prompt\d+>', self.question_str))
203
+ assert len(prompt_matches) == len(masks)
204
+
205
+ objects_desc = "There are some objects I am curious about: "
206
+ sub_image_desc = ""
207
+ for matched_prompt in prompt_matches:
208
+ objects_desc += f"{matched_prompt}; "
209
+
210
+ prompt_idx = int(re.match(r'<Prompt(\d+)>', matched_prompt).group(1))
211
+ sub_image_desc += f"{matched_prompt}: <|reserved_special_token_{prompt_idx + 2}|>\n"
212
+ sub_image_desc = sub_image_desc.replace(f"<|reserved_special_token_{prompt_idx + 2}|>", f"<|reserved_special_token_{prompt_idx + 2}|>" * 256)
213
+
214
+ prompt = objects_desc + "\n" + sub_image_desc + "\n" + self.question_str
215
+
216
+ filled_matrix = -1 * np.ones((image.height, image.width), dtype=np.uint8)
217
+ bboxes = {}
218
+ for matched_prompt in prompt_matches:
219
+ prompt_idx = int(re.match(r'<Prompt(\d+)>', matched_prompt).group(1))
220
+ mask = masks[prompt_idx]
221
+ prompt_token = matched_prompt
222
+ prompt_id = self.visual_prompt_ids.get(prompt_token, self.visual_prompt_ids["<NO_Prompt>"])
223
+ assert prompt_id < self.prompt_number + 1, f"prompt_id should be less than {self.prompt_numbers + 1}, got {prompt_id}"
224
+ fill_area = (filled_matrix == -1) & mask.astype(bool)
225
+ filled_matrix[fill_area] = prompt_id
226
+
227
+ non_zero_coords = np.argwhere(masks_np[mask_id])
228
+ y_min, x_min = non_zero_coords.min(axis=0)
229
+ y_max, x_max = non_zero_coords.max(axis=0)
230
+ bbox = (x_min / image.width, y_min / image.height, x_max / image.width, y_max / image.height)
231
+ bboxes[str(self.processor.tokenizer.convert_tokens_to_ids(f"<|reserved_special_token_{prompt_idx + 2}|>"))] = bbox
232
+
233
+ filled_matrix[filled_matrix == -1] = self.visual_prompt_ids["<NO_Prompt>"]
234
+ # convert masks to PIL.Image
235
+ masks = [Image.fromarray((masks_np[i] * 255).astype(np.uint8)) for i in range(len(masks))]
236
+
237
+ data_dict = {
238
+ 'image': image,
239
+ 'visual_prompt': Image.fromarray(filled_matrix),
240
+ 'bboxes': bboxes,
241
+ 'prompt': prompt,
242
+ }
243
+ return data_dict
244
+
245
+ def __getitem__(self, index):
246
+ data_dict = self._parse_annotations()
247
+ image = data_dict["image"]
248
+ visual_prompt = data_dict["visual_prompt"]
249
+ qs = data_dict["prompt"]
250
+
251
+ user_content = [
252
+ {"type": "image", "image": image},
253
+ {"type": "text", "text": qs}
254
+ ]
255
+
256
+ messages = [
257
+ {"role": "user", "content": user_content},
258
+ ]
259
+
260
+ # Prepare input for model
261
+ raw_prompt = self.processor.apply_chat_template(
262
+ messages,
263
+ add_generation_prompt=True,
264
+ tokenize=False,
265
+ )
266
+
267
+ model_inputs = self.processor(text=[raw_prompt], images=[image], visual_prompts=[visual_prompt], return_tensors="pt")
268
+
269
+ pixel_values = model_inputs["pixel_values"]
270
+ mask_values = model_inputs["mask_values"]
271
+ input_ids = model_inputs["input_ids"].squeeze(0)
272
+ attention_mask = model_inputs["attention_mask"].squeeze(0)
273
+ aspect_ratio = model_inputs["aspect_ratio"]
274
+
275
+ ret = dict(
276
+ input_ids=input_ids.cuda().unsqueeze(0),
277
+ attention_mask=attention_mask.cuda().to(self.data_dtype).unsqueeze(0),
278
+ pixel_values=pixel_values.cuda().to(self.data_dtype).flatten(0, 1),
279
+ global_mask_values=mask_values.cuda().to(self.data_dtype).squeeze(),
280
+ bboxes=[data_dict["bboxes"]],
281
+ aspect_ratios=aspect_ratio.unsqueeze(0).cuda(),
282
+ )
283
+ return ret
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ sentencepiece
2
+ accelerate>=0.28.0
3
+ pydantic>=2.10.1
4
+ numpy>=1.23.5,<2.0.0
5
+ pillow>=9.4.0
6
+ gradio>=5.5.0
7
+ requests
8
+ httpx
9
+ uvicorn
10
+ fastapi
11
+ protobuf
12
+ opencv-python
13
+ openai>=1.55.0
14
+ spaces==0.30.4
15
+ git+https://github.com/facebookresearch/segment-anything.git
16
+ torch
17
+ torchvision
temp.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def greet(name):
4
+ return "Hello " + name + "!!"
5
+
6
+ demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
+ demo.launch()