merve
/

smol-vision

@@ -1691,34 +1691,47 @@
     {
       "cell_type": "code",
       "source": [
-        "import torch\n",
-        "from PIL import Image\n",
-        "from typing import List, Dict\n",
-        "\n",
-        "\n",
-        "model.eval()\n",
-        "\n",
-        "images = [\"/content/pizza.jpg\", \"/content/spaghetti.JPG\"]\n",
-        "\n",
-        "pil_images = [Image.open(p).convert(\"RGB\") for p in images]\n",
-        "inputs = image_processor(images=pil_images, return_tensors=\"pt\").to(device)\n",
-        "\n",
-        "with torch.no_grad():\n",
-        "  logits = model(inputs[\"pixel_values\"])\n",
-        "\n",
-        "# take top 2 classes\n",
-        "probs = logits.softmax(dim=-1)\n",
-        "scores, indices = probs.topk(2, dim=-1)\n",
-        "\n",
-        "results = []\n",
-        "for path, idxs, scs in zip(images, indices, scores):\n",
-        "    preds = [\n",
-        "        {\"label_id\": int(i.item()),\n",
-        "          \"label\": id2label.get(int(i.item()), f\"class_{int(i)}\"),\n",
-        "          \"score\": float(s.item())}\n",
-        "        for i, s in zip(idxs, scs)\n",
-        "    ]\n",
-        "    results.append({\"image\": path, \"topk\": preds})\n"
       ],
       "metadata": {
         "id": "RGZntYQEaVbA"

     {
       "cell_type": "code",
       "source": [
+        "import torch\n" \
+        "from PIL import Image\n" \
+        "from typing import List, Dict\n" \
+        "\n" \
+        "# --- Load checkpoint ---\n" \
+        "ckpt_path = \"./checkpoints_dinov3_class/best_acc_0.9025.pt\"\n" \
+        "\n" \
+        "model = DinoV3Linear(backbone, hidden_size, num_classes, freeze_backbone=True).to(device)\n" \
+        "checkpoint = torch.load(ckpt_path, map_location=device)\n" \
+        "model.load_state_dict(checkpoint[\"model_state_dict\"])\n" \
+        "model.eval()\n" \
+        "\n" \
+        "# --- Prepare images ---\n" \
+        "images = [\"/content/pizza.jpg\", \"/content/spaghetti.JPG\"]\n" \
+        "\n" \
+        "pil_images = [Image.open(p).convert(\"RGB\") for p in images]\n" \
+        "inputs = image_processor(images=pil_images, return_tensors=\"pt\").to(device)\n" \
+        "\n" \
+        "# --- Inference ---\n" \
+        "with torch.no_grad():\n" \
+        "    logits = model(inputs[\"pixel_values\"])\n" \
+        "\n" \
+        "# take top 2 classes\n" \
+        "probs = logits.softmax(dim=-1)\n" \
+        "scores, indices = probs.topk(2, dim=-1)\n" \
+        "\n" \
+        "# --- Format results ---\n" \
+        "results = []\n" \
+        "for path, idxs, scs in zip(images, indices, scores):\n" \
+        "    preds = [\n" \
+        "        {\n" \
+        "            \"label_id\": int(i.item()),\n" \
+        "            \"label\": id2label.get(int(i.item()), f\"class_{int(i)}\"),\n" \
+        "            \"score\": float(s.item())\n" \
+        "        }\n" \
+        "        for i, s in zip(idxs, scs)\n" \
+        "    ]\n" \
+        "    results.append({\"image\": path, \"topk\": preds})\n" \
+        "\n" \
+        "print(results)\n"
       ],
       "metadata": {
         "id": "RGZntYQEaVbA"