Darsh1234Tayal commited on
Commit
fb9b581
·
verified ·
1 Parent(s): 73987d7

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ chroma_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
.ipynb_checkpoints/requirements-checkpoint.txt ADDED
File without changes
.ipynb_checkpoints/secrets-checkpoint.env ADDED
File without changes
.ipynb_checkpoints/youtube-checkpoint.ipynb ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [],
3
+ "metadata": {},
4
+ "nbformat": 4,
5
+ "nbformat_minor": 5
6
+ }
.ipynb_checkpoints/youtube-checkpoint.py ADDED
File without changes
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Yt Transcipt Project
3
- emoji: 💻
4
- colorFrom: indigo
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 5.49.1
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: yt_transcipt_project
3
+ app_file: youtube.py
 
 
4
  sdk: gradio
5
+ sdk_version: 5.23.1
 
 
6
  ---
 
 
chroma_db/3771a9ee-1dca-46f8-a50d-2d47c8099db5/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cee0d7bf70893a0e49d41e1305cece975b8923f0e6d4d052fc812cec909e6f19
3
+ size 6424000
chroma_db/3771a9ee-1dca-46f8-a50d-2d47c8099db5/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd3d807657781f5b32fa855c67963966ed05400edb8e3a6a85821ff87b4b2eb9
3
+ size 100
chroma_db/3771a9ee-1dca-46f8-a50d-2d47c8099db5/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f646e7ff63de9c57ec7f82ba0c07bc06e951bd14fa1511b99931ad4bea4f4f9f
3
+ size 113967
chroma_db/3771a9ee-1dca-46f8-a50d-2d47c8099db5/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19d430a4c4e9fb6f8c7a427418dca1bdb2dec293c6a2d296d5023c60fab84ed1
3
+ size 8000
chroma_db/3771a9ee-1dca-46f8-a50d-2d47c8099db5/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d84195a42f04486d3c42a6f80506351ff5f998347c2ced71ef82eea6170f09a9
3
+ size 17044
chroma_db/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41b4db74ce3b458cf36cc18b018a5df9814dd125d897716a47df3d56734f19d5
3
+ size 8577024
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ youtube-transcript-api
2
+ bytez
secrets.env ADDED
@@ -0,0 +1 @@
 
 
1
+ BYTEZ_API_KEY = "69729741cfd0da8dc190e5c7fa746401"
youtube.ipynb ADDED
@@ -0,0 +1,808 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "cdbd4c42-21fc-4796-8161-8b80e7b310c7",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
11
+ "from langchain_chroma import Chroma\n",
12
+ "from langchain_huggingface import HuggingFaceEmbeddings\n",
13
+ "from bytez import Bytez\n",
14
+ "from youtube_transcript_api import YouTubeTranscriptApi\n",
15
+ "import gradio as gr\n",
16
+ "from dotenv import load_dotenv\n",
17
+ "import os\n",
18
+ "from urllib.parse import urlparse, parse_qs"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 2,
24
+ "id": "d1943b98-7f49-43fb-81cc-cae13781b4b1",
25
+ "metadata": {},
26
+ "outputs": [
27
+ {
28
+ "data": {
29
+ "text/plain": [
30
+ "True"
31
+ ]
32
+ },
33
+ "execution_count": 2,
34
+ "metadata": {},
35
+ "output_type": "execute_result"
36
+ }
37
+ ],
38
+ "source": [
39
+ "load_dotenv(\"secrets.env\")"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": 3,
45
+ "id": "e8ad246a-8136-4916-a086-eb28ad34a958",
46
+ "metadata": {},
47
+ "outputs": [],
48
+ "source": [
49
+ "api_key = os.getenv(\"BYTEZ_API_KEY\")\n",
50
+ "sdk = Bytez(api_key)"
51
+ ]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": 4,
56
+ "id": "6d9a1500-148a-4ff3-a1ec-81e1faaa3bfc",
57
+ "metadata": {},
58
+ "outputs": [],
59
+ "source": [
60
+ "#toy function\n",
61
+ "def video_id_extractor(link):\n",
62
+ " if \"watch?v=\" in link:\n",
63
+ " return link[32:43]\n",
64
+ " else:\n",
65
+ " return link[17:28]"
66
+ ]
67
+ },
68
+ {
69
+ "cell_type": "code",
70
+ "execution_count": 5,
71
+ "id": "313a5521-41b6-4453-a26b-b2142f7992c0",
72
+ "metadata": {},
73
+ "outputs": [],
74
+ "source": [
75
+ "#production ready function\n",
76
+ "def video_id_extractor(link):\n",
77
+ " parsed_url = urlparse(link)\n",
78
+ " \n",
79
+ " if \"youtube.com\" in parsed_url.netloc:\n",
80
+ " return parse_qs(parsed_url.query).get(\"v\", [None])[0]\n",
81
+ " \n",
82
+ " elif \"youtu.be\" in parsed_url.netloc:\n",
83
+ " return parsed_url.path.lstrip(\"/\")\n",
84
+ " \n",
85
+ " return None"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "execution_count": 6,
91
+ "id": "ef1b022a-b9b9-481d-bbc8-df9cae7137b1",
92
+ "metadata": {},
93
+ "outputs": [],
94
+ "source": [
95
+ "def generate_transcript(video_id):\n",
96
+ " trans = YouTubeTranscriptApi()\n",
97
+ " try:\n",
98
+ " transcript_raw = trans.fetch(video_id = video_id)\n",
99
+ " except Exception:\n",
100
+ " return None\n",
101
+ " transcript = \"\"\n",
102
+ " for i in transcript_raw.snippets:\n",
103
+ " transcript += f\" {i.text}\"\n",
104
+ " return transcript"
105
+ ]
106
+ },
107
+ {
108
+ "cell_type": "code",
109
+ "execution_count": 7,
110
+ "id": "052cfe69-ff3e-40f5-b574-00f1df69446d",
111
+ "metadata": {},
112
+ "outputs": [],
113
+ "source": [
114
+ "def create_and_save_vs(trans):\n",
115
+ " try:\n",
116
+ " splitter = RecursiveCharacterTextSplitter(chunk_size = 100, chunk_overlap = 50)\n",
117
+ " docs = splitter.split_text(trans)\n",
118
+ " embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')\n",
119
+ " vector_store_db = Chroma.from_texts(docs, embeddings, persist_directory='chroma_db')\n",
120
+ " except Exception:\n",
121
+ " return None\n",
122
+ " return vector_store_db"
123
+ ]
124
+ },
125
+ {
126
+ "cell_type": "code",
127
+ "execution_count": 19,
128
+ "id": "b6e02c40-008d-46af-8728-96f8f359cc75",
129
+ "metadata": {},
130
+ "outputs": [],
131
+ "source": [
132
+ "def generate_summary(trans):\n",
133
+ " try: \n",
134
+ " model = sdk.model(\"openai/gpt-4o\")\n",
135
+ " if len(trans.split(\" \")) > 90000:\n",
136
+ " trans = trans.split(\" \")[0:85000]\n",
137
+ " trans = \" \".join(trans)\n",
138
+ " except Exception:\n",
139
+ " return None\n",
140
+ " Inp = [{\"role\": \"system\", \"content\": \"You are a youtube transcipt sammurizer. Sammurize the transcript under 100 words\"}, {\"role\":\"user\", \"content\":trans}]\n",
141
+ " trails = 4\n",
142
+ " failed = True\n",
143
+ " time_to_sleep = 3\n",
144
+ " while failed and trails > 0:\n",
145
+ " res = model.run(Inp)\n",
146
+ " if type(res) == list and len(res) == 3:\n",
147
+ " failed = False\n",
148
+ " trails -= 1\n",
149
+ " return res[0][\"content\"]\n",
150
+ " else:\n",
151
+ " time.sleep(time_to_sleep)\n",
152
+ " time_to_sleep = time_to_sleep **2\n",
153
+ " trails -= 1\n",
154
+ " return None\n",
155
+ " "
156
+ ]
157
+ },
158
+ {
159
+ "cell_type": "code",
160
+ "execution_count": 10,
161
+ "id": "07a21fd9-7c21-4349-9b0b-eccc84a6c243",
162
+ "metadata": {},
163
+ "outputs": [],
164
+ "source": [
165
+ "def setter(link):\n",
166
+ " yield gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), \"\", \"\"\n",
167
+ " video_id = video_id_extractor(link)\n",
168
+ " if not video_id:\n",
169
+ " yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), \"\", \"\"\n",
170
+ " transcript = generate_transcript(video_id)\n",
171
+ " if not transcript:\n",
172
+ " yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), \"\", \"\"\n",
173
+ " vectorstore = create_and_save_vs(transcript)\n",
174
+ " if not vectorstore:\n",
175
+ " yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), \"\", \"\"\n",
176
+ " summary = generate_summary(transcript)\n",
177
+ " if not summary:\n",
178
+ " yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), \"\", \"\"\n",
179
+ " yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), summary, vectorstore"
180
+ ]
181
+ },
182
+ {
183
+ "cell_type": "code",
184
+ "execution_count": 11,
185
+ "id": "6acf3a97-a4a6-4e56-aa77-578dfdb0a4e8",
186
+ "metadata": {},
187
+ "outputs": [],
188
+ "source": [
189
+ "def execute(vec, query):\n",
190
+ " try:\n",
191
+ " res = vec.similarity_search(query, k=3)\n",
192
+ " result = \"\"\n",
193
+ " for i in res:\n",
194
+ " result += f\"\\n{i.page_content}\"\n",
195
+ " model = sdk.model(\"openai/gpt-4o\")\n",
196
+ " inp = [{\"role\": \"system\", \"content\": \"You are a helpful assistant - you will be asked a query and provided with a context. You have to answer that query based on the provided context - do not make things up. Do not reveal the whole context, answer as like you already knew the context\"}, {\"role\":\"user\", \"content\":f\"query: {query} | context: {result}\"}]\n",
197
+ " res = model.run(inp)\n",
198
+ " return res[0]['content'], gr.update(visible=True), gr.update(visible=False)\n",
199
+ " except Exception:\n",
200
+ " return \"\", gr.update(visible=False), gr.update(visible=True)"
201
+ ]
202
+ },
203
+ {
204
+ "cell_type": "code",
205
+ "execution_count": 21,
206
+ "id": "0c0ad086-1de1-4a88-92cf-ad442ef9cb0f",
207
+ "metadata": {},
208
+ "outputs": [
209
+ {
210
+ "name": "stdout",
211
+ "output_type": "stream",
212
+ "text": [
213
+ "* Running on local URL: http://127.0.0.1:7865\n",
214
+ "\n",
215
+ "To create a public link, set `share=True` in `launch()`.\n"
216
+ ]
217
+ },
218
+ {
219
+ "data": {
220
+ "text/html": [
221
+ "<div><iframe src=\"http://127.0.0.1:7865/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
222
+ ],
223
+ "text/plain": [
224
+ "<IPython.core.display.HTML object>"
225
+ ]
226
+ },
227
+ "metadata": {},
228
+ "output_type": "display_data"
229
+ },
230
+ {
231
+ "data": {
232
+ "text/plain": []
233
+ },
234
+ "execution_count": 21,
235
+ "metadata": {},
236
+ "output_type": "execute_result"
237
+ }
238
+ ],
239
+ "source": [
240
+ "with gr.Blocks(\n",
241
+ " theme=gr.themes.Soft(\n",
242
+ " primary_hue=\"blue\",\n",
243
+ " secondary_hue=\"indigo\",\n",
244
+ " ),\n",
245
+ " css=\"\"\"\n",
246
+ " /* Global Styles */\n",
247
+ " .gradio-container {\n",
248
+ " font-family: 'Inter', 'Segoe UI', sans-serif !important;\n",
249
+ " max-width: 1200px !important;\n",
250
+ " margin: 0 auto !important;\n",
251
+ " }\n",
252
+ " \n",
253
+ " /* Header Branding */\n",
254
+ " .header-brand {\n",
255
+ " background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);\n",
256
+ " padding: 2rem;\n",
257
+ " border-radius: 16px;\n",
258
+ " margin-bottom: 2rem;\n",
259
+ " box-shadow: 0 10px 40px rgba(102, 126, 234, 0.3);\n",
260
+ " animation: fadeInDown 0.8s ease-out;\n",
261
+ " }\n",
262
+ " \n",
263
+ " .header-brand h1 {\n",
264
+ " color: white;\n",
265
+ " font-size: 2.5rem;\n",
266
+ " font-weight: 700;\n",
267
+ " margin: 0;\n",
268
+ " text-shadow: 2px 2px 4px rgba(0,0,0,0.2);\n",
269
+ " }\n",
270
+ " \n",
271
+ " .header-brand p {\n",
272
+ " color: rgba(255,255,255,0.95);\n",
273
+ " font-size: 1.1rem;\n",
274
+ " margin: 0.5rem 0 0 0;\n",
275
+ " }\n",
276
+ " \n",
277
+ " /* Footer Branding */\n",
278
+ " .footer-brand {\n",
279
+ " background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);\n",
280
+ " padding: 1.5rem;\n",
281
+ " border-radius: 12px;\n",
282
+ " margin-top: 2rem;\n",
283
+ " text-align: center;\n",
284
+ " box-shadow: 0 -5px 20px rgba(102, 126, 234, 0.2);\n",
285
+ " }\n",
286
+ " \n",
287
+ " .footer-brand p {\n",
288
+ " color: white;\n",
289
+ " margin: 0.3rem 0;\n",
290
+ " font-size: 0.95rem;\n",
291
+ " }\n",
292
+ " \n",
293
+ " .footer-brand a {\n",
294
+ " color: #ffd700;\n",
295
+ " text-decoration: none;\n",
296
+ " font-weight: 600;\n",
297
+ " transition: all 0.3s ease;\n",
298
+ " }\n",
299
+ " \n",
300
+ " .footer-brand a:hover {\n",
301
+ " color: #fff;\n",
302
+ " text-shadow: 0 0 10px rgba(255,255,255,0.5);\n",
303
+ " }\n",
304
+ " \n",
305
+ " /* Main Title Animation */\n",
306
+ " .main-title {\n",
307
+ " background: linear-gradient(90deg, #667eea, #764ba2, #667eea);\n",
308
+ " background-size: 200% auto;\n",
309
+ " color: white;\n",
310
+ " padding: 1.5rem;\n",
311
+ " border-radius: 12px;\n",
312
+ " text-align: center;\n",
313
+ " font-size: 1.8rem;\n",
314
+ " font-weight: 600;\n",
315
+ " margin-bottom: 2rem;\n",
316
+ " box-shadow: 0 8px 32px rgba(102, 126, 234, 0.4);\n",
317
+ " animation: gradientShift 3s ease infinite, fadeIn 1s ease-out;\n",
318
+ " }\n",
319
+ " \n",
320
+ " /* Button Styles */\n",
321
+ " .gr-button {\n",
322
+ " background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;\n",
323
+ " border: none !important;\n",
324
+ " color: white !important;\n",
325
+ " font-weight: 600 !important;\n",
326
+ " padding: 12px 32px !important;\n",
327
+ " border-radius: 8px !important;\n",
328
+ " transition: all 0.3s ease !important;\n",
329
+ " box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;\n",
330
+ " text-transform: uppercase;\n",
331
+ " letter-spacing: 0.5px;\n",
332
+ " }\n",
333
+ " \n",
334
+ " .gr-button:hover {\n",
335
+ " transform: translateY(-2px) !important;\n",
336
+ " box-shadow: 0 6px 25px rgba(102, 126, 234, 0.6) !important;\n",
337
+ " }\n",
338
+ " \n",
339
+ " .gr-button:active {\n",
340
+ " transform: translateY(0px) !important;\n",
341
+ " }\n",
342
+ " \n",
343
+ " /* Input Fields */\n",
344
+ " .gr-textbox, .gr-text-input {\n",
345
+ " border-radius: 8px !important;\n",
346
+ " border: 2px solid #e0e7ff !important;\n",
347
+ " transition: all 0.3s ease !important;\n",
348
+ " }\n",
349
+ " \n",
350
+ " .gr-textbox:focus, .gr-text-input:focus {\n",
351
+ " border-color: #667eea !important;\n",
352
+ " box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;\n",
353
+ " }\n",
354
+ " \n",
355
+ " /* Loading Animation */\n",
356
+ " .loading-container {\n",
357
+ " text-align: center;\n",
358
+ " padding: 3rem;\n",
359
+ " }\n",
360
+ " \n",
361
+ " .loading-text {\n",
362
+ " font-size: 1.5rem;\n",
363
+ " color: #667eea;\n",
364
+ " animation: pulse 1.5s ease-in-out infinite;\n",
365
+ " }\n",
366
+ " \n",
367
+ " /* Error Messages */\n",
368
+ " .error-message {\n",
369
+ " background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);\n",
370
+ " color: white;\n",
371
+ " padding: 1.5rem;\n",
372
+ " border-radius: 12px;\n",
373
+ " text-align: center;\n",
374
+ " font-size: 1.3rem;\n",
375
+ " font-weight: 600;\n",
376
+ " box-shadow: 0 8px 32px rgba(245, 87, 108, 0.3);\n",
377
+ " animation: shake 0.5s ease-in-out;\n",
378
+ " }\n",
379
+ " \n",
380
+ " /* Success/Summary Box */\n",
381
+ " .summary-box {\n",
382
+ " background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);\n",
383
+ " padding: 1.5rem;\n",
384
+ " border-radius: 12px;\n",
385
+ " margin-bottom: 1.5rem;\n",
386
+ " box-shadow: 0 8px 24px rgba(168, 237, 234, 0.3);\n",
387
+ " animation: fadeInUp 0.6s ease-out;\n",
388
+ " }\n",
389
+ " \n",
390
+ " /* Chat Section */\n",
391
+ " .chat-section {\n",
392
+ " animation: fadeInUp 0.8s ease-out;\n",
393
+ " }\n",
394
+ " \n",
395
+ " /* Animations */\n",
396
+ " @keyframes fadeIn {\n",
397
+ " from {\n",
398
+ " opacity: 0;\n",
399
+ " }\n",
400
+ " to {\n",
401
+ " opacity: 1;\n",
402
+ " }\n",
403
+ " }\n",
404
+ " \n",
405
+ " @keyframes fadeInDown {\n",
406
+ " from {\n",
407
+ " opacity: 0;\n",
408
+ " transform: translateY(-30px);\n",
409
+ " }\n",
410
+ " to {\n",
411
+ " opacity: 1;\n",
412
+ " transform: translateY(0);\n",
413
+ " }\n",
414
+ " }\n",
415
+ " \n",
416
+ " @keyframes fadeInUp {\n",
417
+ " from {\n",
418
+ " opacity: 0;\n",
419
+ " transform: translateY(30px);\n",
420
+ " }\n",
421
+ " to {\n",
422
+ " opacity: 1;\n",
423
+ " transform: translateY(0);\n",
424
+ " }\n",
425
+ " }\n",
426
+ " \n",
427
+ " @keyframes pulse {\n",
428
+ " 0%, 100% {\n",
429
+ " opacity: 1;\n",
430
+ " }\n",
431
+ " 50% {\n",
432
+ " opacity: 0.5;\n",
433
+ " }\n",
434
+ " }\n",
435
+ " \n",
436
+ " @keyframes shake {\n",
437
+ " 0%, 100% { transform: translateX(0); }\n",
438
+ " 25% { transform: translateX(-10px); }\n",
439
+ " 75% { transform: translateX(10px); }\n",
440
+ " }\n",
441
+ " \n",
442
+ " @keyframes gradientShift {\n",
443
+ " 0% {\n",
444
+ " background-position: 0% 50%;\n",
445
+ " }\n",
446
+ " 50% {\n",
447
+ " background-position: 100% 50%;\n",
448
+ " }\n",
449
+ " 100% {\n",
450
+ " background-position: 0% 50%;\n",
451
+ " }\n",
452
+ " }\n",
453
+ " \n",
454
+ " /* Responsive Design */\n",
455
+ " @media (max-width: 768px) {\n",
456
+ " .header-brand h1 {\n",
457
+ " font-size: 1.8rem;\n",
458
+ " }\n",
459
+ " .main-title {\n",
460
+ " font-size: 1.3rem;\n",
461
+ " }\n",
462
+ " }\n",
463
+ " \"\"\"\n",
464
+ ") as ui:\n",
465
+ " # Header Branding\n",
466
+ " gr.HTML(\"\"\"\n",
467
+ " <div class=\"header-brand\">\n",
468
+ " <h1>🎓 AI YouTube Study Assistant</h1>\n",
469
+ " <p>Transform lengthy videos into concise knowledge</p>\n",
470
+ " </div>\n",
471
+ " \"\"\")\n",
472
+ " \n",
473
+ " vs = gr.State()\n",
474
+ " gr.HTML('<div class=\"main-title\">📹 Why watch long YouTube videos when you could study from AI?</div>')\n",
475
+ " \n",
476
+ " with gr.Row(visible=True) as first_page:\n",
477
+ " youtube_link = gr.Textbox(\n",
478
+ " label=\"Enter the youtube link here: \", \n",
479
+ " lines=2,\n",
480
+ " placeholder=\"https://www.youtube.com/watch?v=...\"\n",
481
+ " )\n",
482
+ " submit_button = gr.Button(\"SUBMIT!\")\n",
483
+ " \n",
484
+ " with gr.Row(visible=False) as chat_page:\n",
485
+ " with gr.Column():\n",
486
+ " summary = gr.Markdown(elem_classes=\"summary-box\")\n",
487
+ " gr.Markdown(\"### 💬 Now ask any question about the video:\")\n",
488
+ " ques = gr.Textbox(\n",
489
+ " label=\"Enter the question here: \", \n",
490
+ " lines=2,\n",
491
+ " placeholder=\"What is the main topic of this video?\"\n",
492
+ " )\n",
493
+ " submit_answer = gr.Button(\"SUBMIT!\")\n",
494
+ " answer = gr.TextArea(label=\"ANSWER\")\n",
495
+ " \n",
496
+ " with gr.Row(visible=False) as wrong_link_page:\n",
497
+ " gr.HTML('<div class=\"error-message\">❌ Sorry, your link wasn\\'t correct. Please try again!</div>')\n",
498
+ " \n",
499
+ " with gr.Row(visible=False) as cc_not_enabled:\n",
500
+ " gr.HTML('<div class=\"error-message\">⚠️ The link you provided was either not valid or subtitles weren\\'t enabled in that video</div>')\n",
501
+ " \n",
502
+ " with gr.Row(visible=False) as loading_page:\n",
503
+ " gr.HTML('<div class=\"loading-container\"><div class=\"loading-text\">⏳ Loading... Please Wait</div></div>')\n",
504
+ " \n",
505
+ " with gr.Row(visible=False) as normal_error:\n",
506
+ " gr.HTML('<div class=\"error-message\">😔 SORRY, SOME ERROR OCCURRED. PLEASE TRY AGAIN LATER</div>')\n",
507
+ " \n",
508
+ " # Footer Branding\n",
509
+ " gr.HTML(\"\"\"\n",
510
+ " <div class=\"footer-brand\">\n",
511
+ " <p><strong>Developed by Darsh Tayal</strong></p>\n",
512
+ " <p>📧 <a href=\"mailto:[email protected]\">[email protected]</a></p>\n",
513
+ " <p style=\"margin-top: 1rem; font-size: 0.85rem; opacity: 0.9;\">© 2024 All Rights Reserved</p>\n",
514
+ " </div>\n",
515
+ " \"\"\")\n",
516
+ " \n",
517
+ " submit_button.click(setter, inputs=[youtube_link], outputs=[first_page, loading_page, chat_page, wrong_link_page, cc_not_enabled, normal_error, summary, vs])\n",
518
+ " submit_answer.click(execute, inputs=[vs, ques], outputs=[answer, chat_page, normal_error])\n",
519
+ "\n",
520
+ "ui.launch(inbrowser=True)"
521
+ ]
522
+ },
523
+ {
524
+ "cell_type": "code",
525
+ "execution_count": null,
526
+ "id": "b897923e-e4ce-4ed3-9d3c-f9307d4e8cc1",
527
+ "metadata": {},
528
+ "outputs": [],
529
+ "source": [
530
+ "with gr.Blocks() as ui:\n",
531
+ " vs = gr.State()\n",
532
+ " gr.Markdown(\"## Why watch long youtube videos when you could study from AI?\")\n",
533
+ " with gr.Row(visible=True) as first_page:\n",
534
+ " youtube_link = gr.Textbox(label=\"Enter the youtube link here: \", lines=2)\n",
535
+ " submit_button = gr.Button(\"SUBMIT!\")\n",
536
+ " with gr.Row(visible = False) as chat_page:\n",
537
+ " summary = gr.Markdown()\n",
538
+ " gr.Markdown(\"Now ask any any question about the video: \")\n",
539
+ " ques = gr.Textbox(label=\"Enter the question here: \", lines=2)\n",
540
+ " submit_answer = gr.Button(\"SUBMIT!\")\n",
541
+ " answer = gr.TextArea(label = \"ANSWER\")\n",
542
+ " with gr.Row(visible=False) as wrong_link_page:\n",
543
+ " gr.Markdown(\"## Sorry your link wasn't correct, please try again\")\n",
544
+ " with gr.Row(visible=False) as cc_not_enabled:\n",
545
+ " gr.Markdown(\"## The link you provided was either not valid or subtitles wasn't enabled in that video\")\n",
546
+ " with gr.Row(visible=False) as loading_page:\n",
547
+ " gr.Markdown(\"## Loading PLZ Wait\")\n",
548
+ " with gr.Row(visible=False) as normal_error:\n",
549
+ " gr.Markdown(\"## SORRY SOME ERROR OUCCURED, PLEASE TRY AGAIN LATER\")\n",
550
+ " \n",
551
+ " \n",
552
+ " submit_button.click(setter, inputs=[youtube_link], outputs=[first_page, loading_page, chat_page, wrong_link_page, cc_not_enabled, normal_error, summary, vs])\n",
553
+ " submit_answer.click(execute, inputs=[vs, ques], outputs=[answer, chat_page, normal_error])\n",
554
+ "ui.launch(inbrowser=True)"
555
+ ]
556
+ },
557
+ {
558
+ "cell_type": "code",
559
+ "execution_count": 23,
560
+ "id": "a95d8740-281c-41ad-8842-3e989e4169af",
561
+ "metadata": {},
562
+ "outputs": [
563
+ {
564
+ "name": "stdout",
565
+ "output_type": "stream",
566
+ "text": [
567
+ "Requirement already satisfied: ddgs in c:\\users\\admin\\.conda\\envs\\llms\\lib\\site-packages (9.0.2)\n",
568
+ "Requirement already satisfied: click>=8.1.8 in c:\\users\\admin\\.conda\\envs\\llms\\lib\\site-packages (from ddgs) (8.1.8)\n",
569
+ "Requirement already satisfied: primp>=0.15.0 in c:\\users\\admin\\.conda\\envs\\llms\\lib\\site-packages (from ddgs) (0.15.0)\n",
570
+ "Requirement already satisfied: lxml>=5.3.0 in c:\\users\\admin\\.conda\\envs\\llms\\lib\\site-packages (from ddgs) (6.0.0)\n",
571
+ "Requirement already satisfied: colorama in c:\\users\\admin\\.conda\\envs\\llms\\lib\\site-packages (from click>=8.1.8->ddgs) (0.4.6)\n"
572
+ ]
573
+ },
574
+ {
575
+ "name": "stderr",
576
+ "output_type": "stream",
577
+ "text": [
578
+ "C:\\Users\\ADMIN\\.conda\\envs\\llms\\Lib\\site-packages\\IPython\\utils\\_process_win32.py:138: ResourceWarning: unclosed file <_io.BufferedWriter name=7>\n",
579
+ " res = process_handler(cmd, _system_body)\n",
580
+ "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
581
+ "C:\\Users\\ADMIN\\.conda\\envs\\llms\\Lib\\site-packages\\IPython\\utils\\_process_win32.py:138: ResourceWarning: unclosed file <_io.BufferedReader name=8>\n",
582
+ " res = process_handler(cmd, _system_body)\n",
583
+ "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
584
+ "C:\\Users\\ADMIN\\.conda\\envs\\llms\\Lib\\site-packages\\IPython\\utils\\_process_win32.py:138: ResourceWarning: unclosed file <_io.BufferedReader name=9>\n",
585
+ " res = process_handler(cmd, _system_body)\n",
586
+ "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n"
587
+ ]
588
+ }
589
+ ],
590
+ "source": [
591
+ "!pip install ddgs"
592
+ ]
593
+ },
594
+ {
595
+ "cell_type": "code",
596
+ "execution_count": 36,
597
+ "id": "2acc892f-0757-4e52-8b13-2bca684c5517",
598
+ "metadata": {},
599
+ "outputs": [
600
+ {
601
+ "name": "stderr",
602
+ "output_type": "stream",
603
+ "text": [
604
+ "C:\\Users\\ADMIN\\.conda\\envs\\llms\\Lib\\site-packages\\langchain_community\\utilities\\duckduckgo_search.py:63: RuntimeWarning: This package (`duckduckgo_search`) has been renamed to `ddgs`! Use `pip install ddgs` instead.\n",
605
+ " with DDGS() as ddgs:\n"
606
+ ]
607
+ }
608
+ ],
609
+ "source": [
610
+ "from langchain_community.tools import DuckDuckGoSearchResults\n",
611
+ "\n",
612
+ "search = DuckDuckGoSearchResults(output_format=\"list\")\n",
613
+ "\n",
614
+ "# Force DuckDuckGo to return YouTube video links for your topic\n",
615
+ "query = 'site:youtube.com \"AI Agents\"'\n",
616
+ "results = search.invoke(query)\n",
617
+ "\n",
618
+ "for r in results:\n",
619
+ " print(r[\"title\"])\n",
620
+ " print(r[\"link\"])\n",
621
+ " print()"
622
+ ]
623
+ },
624
+ {
625
+ "cell_type": "code",
626
+ "execution_count": 40,
627
+ "id": "77630e0b-994a-4b6e-8a09-13d85a54b848",
628
+ "metadata": {},
629
+ "outputs": [],
630
+ "source": [
631
+ "os.environ[\"SERPER_API_KEY\"] = \"d0114d0a5875a6055b3e0ff87fb60b2fbfea8460\"\n",
632
+ "\n",
633
+ "from langchain_community.utilities import GoogleSerperAPIWrapper\n",
634
+ "\n",
635
+ "search = GoogleSerperAPIWrapper()\n",
636
+ "\n",
637
+ "res = search.run(\"site:youtube.com 'AI Agents'\")\n"
638
+ ]
639
+ },
640
+ {
641
+ "cell_type": "code",
642
+ "execution_count": 41,
643
+ "id": "7c4776bd-89ae-4f1d-92d9-d9d40aa2c814",
644
+ "metadata": {},
645
+ "outputs": [
646
+ {
647
+ "data": {
648
+ "text/plain": [
649
+ "\"In this video we'll follow a simple one two three learning path by building on concepts you already understand like chatbt and then moving on to AI workflows. I'll show you how to build your first agent from scratch on the N8N platform in three relatively simple steps. I'm going to then show you how to create an AI agent workflow which does not require any code. Build a Python AI Agent in 10 Minutes. 28K views · 2 days ago #Python ... Build an AI Agent From Scratch in Python - Tutorial for Beginners. Let's explore the five main types of AI agents to understand what they can and cannot do. Build a full functioning app with Lindy: · Some of the very best AI tools are adding AI agents on top of their platforms. · Create presentations ... This is your quick start guide to build a fully functional and deployed AI agent today with zero lines of code. Designing AI Decision Agents with DMN, Machine Learning & Analytics · From Idea to $650M Exit: Lessons in Building AI Startups · NVIDIA CEO Jensen ... An AI agent is a digital worker that can understand instructions and take actions in order to complete tasks. Join us as we build a robust AI agent architecture capable of solving complex, real-world problems. This video demonstrates building an ...\""
650
+ ]
651
+ },
652
+ "execution_count": 41,
653
+ "metadata": {},
654
+ "output_type": "execute_result"
655
+ }
656
+ ],
657
+ "source": [
658
+ "res"
659
+ ]
660
+ },
661
+ {
662
+ "cell_type": "code",
663
+ "execution_count": 43,
664
+ "id": "19fcaa99-8e08-492d-b2ea-083043784f88",
665
+ "metadata": {
666
+ "scrolled": true
667
+ },
668
+ "outputs": [
669
+ {
670
+ "data": {
671
+ "text/plain": [
672
+ "{'searchParameters': {'q': \"site:youtube.com 'AI Agents'\",\n",
673
+ " 'gl': 'us',\n",
674
+ " 'hl': 'en',\n",
675
+ " 'type': 'search',\n",
676
+ " 'num': 10,\n",
677
+ " 'engine': 'google'},\n",
678
+ " 'organic': [{'title': 'AI Agents, Clearly Explained',\n",
679
+ " 'link': 'https://www.youtube.com/watch?v=FwOTs4UxQS4',\n",
680
+ " 'snippet': \"In this video we'll follow a simple one two three learning path by building on concepts you already understand like chatbt and then moving on to AI workflows.\",\n",
681
+ " 'date': '6 months ago',\n",
682
+ " 'position': 1},\n",
683
+ " {'title': 'AI Agents Fundamentals In 21 Minutes',\n",
684
+ " 'link': 'https://www.youtube.com/watch?v=qU3fmidNbJE',\n",
685
+ " 'snippet': \"I'm going to then show you how to create an AI agent workflow which does not require any code.\",\n",
686
+ " 'date': '8 months ago',\n",
687
+ " 'position': 2},\n",
688
+ " {'title': \"How to Build & Sell AI Agents: Ultimate Beginner's Guide\",\n",
689
+ " 'link': 'https://www.youtube.com/watch?v=w0H1-b044KY',\n",
690
+ " 'snippet': 'An AI agent is a digital worker that can understand instructions and take actions in order to complete tasks.',\n",
691
+ " 'date': '7 months ago',\n",
692
+ " 'position': 3},\n",
693
+ " {'title': \"The AI Agent Tutorial That Should've Been Your First (no code)\",\n",
694
+ " 'link': 'https://www.youtube.com/watch?v=GchXMRwuWxE',\n",
695
+ " 'snippet': \"I'll show you how to build your first agent from scratch on the N8N platform in three relatively simple steps.\",\n",
696
+ " 'date': '2 months ago',\n",
697
+ " 'position': 4},\n",
698
+ " {'title': '5 Types of AI Agents: Autonomous Functions & Real-World ...',\n",
699
+ " 'link': 'https://www.youtube.com/watch?v=fXizBc03D7E',\n",
700
+ " 'snippet': \"Let's explore the five main types of AI agents to understand what they can and cannot do.\",\n",
701
+ " 'date': '6 months ago',\n",
702
+ " 'position': 5},\n",
703
+ " {'title': 'Zero To Your First AI Agent In 26 Minutes (no code)',\n",
704
+ " 'link': 'https://www.youtube.com/watch?v=DV0Ln7HRyJQ',\n",
705
+ " 'snippet': 'This is your quick start guide to build a fully functional and deployed AI agent today with zero lines of code.',\n",
706
+ " 'date': '2 months ago',\n",
707
+ " 'position': 6},\n",
708
+ " {'title': 'AI Agents EXPLAINED in 14 minutes and TOOLS for building ...',\n",
709
+ " 'link': 'https://www.youtube.com/watch?v=1gm__VUG2m8',\n",
710
+ " 'snippet': \"In this video I'll show you how regular AI tools turn into agents and fully automate content from video to publishing.\",\n",
711
+ " 'date': '2 months ago',\n",
712
+ " 'position': 7},\n",
713
+ " {'title': 'The Chaos of AI Agents',\n",
714
+ " 'link': 'https://www.youtube.com/watch?v=2YYjPs8t8MI',\n",
715
+ " 'snippet': \"Watch these AI Agents mess around in a virtual environment. I use google's gemini, anthropic's claude code, and NOT codex.\",\n",
716
+ " 'date': '3 months ago',\n",
717
+ " 'position': 8},\n",
718
+ " {'title': 'I built this AI Agent in 2 hours (and got paid $1200)',\n",
719
+ " 'link': 'https://www.youtube.com/watch?v=8C6iCpJ9HPo',\n",
720
+ " 'snippet': 'Join 2,000+ Members building AI automation businesses https://www.skool.com/... Join my FREE Skool community for all the resources to set ...',\n",
721
+ " 'date': '2 months ago',\n",
722
+ " 'position': 9},\n",
723
+ " {'title': 'How to Build Reliable AI Agents in 2025',\n",
724
+ " 'link': 'https://www.youtube.com/watch?v=T1Lowy1mnEg',\n",
725
+ " 'snippet': 'Introduction to AI Agents 0:56 Understanding AI Agents from First Principles 7:56 Building Block One: Intelligence Layer 9:08 Building Block ...',\n",
726
+ " 'date': '3 months ago',\n",
727
+ " 'position': 10}],\n",
728
+ " 'credits': 1}"
729
+ ]
730
+ },
731
+ "execution_count": 43,
732
+ "metadata": {},
733
+ "output_type": "execute_result"
734
+ }
735
+ ],
736
+ "source": [
737
+ "import os\n",
738
+ "from langchain_community.utilities import GoogleSerperAPIWrapper\n",
739
+ "\n",
740
+ "os.environ[\"SERPER_API_KEY\"] = \"d0114d0a5875a6055b3e0ff87fb60b2fbfea8460\"\n",
741
+ "\n",
742
+ "search = GoogleSerperAPIWrapper(type=\"search\") \n",
743
+ "query = \"site:youtube.com 'AI Agents'\"\n",
744
+ "\n",
745
+ "results = search.results(query)\n",
746
+ "\n",
747
+ "results\n"
748
+ ]
749
+ },
750
+ {
751
+ "cell_type": "code",
752
+ "execution_count": 45,
753
+ "id": "c5109d09-3d2f-43c2-937e-d917acfa02cf",
754
+ "metadata": {},
755
+ "outputs": [
756
+ {
757
+ "name": "stdout",
758
+ "output_type": "stream",
759
+ "text": [
760
+ "https://www.youtube.com/watch?v=FwOTs4UxQS4\n",
761
+ "https://www.youtube.com/watch?v=qU3fmidNbJE\n",
762
+ "https://www.youtube.com/watch?v=w0H1-b044KY\n",
763
+ "https://www.youtube.com/watch?v=GchXMRwuWxE\n",
764
+ "https://www.youtube.com/watch?v=fXizBc03D7E\n",
765
+ "https://www.youtube.com/watch?v=DV0Ln7HRyJQ\n",
766
+ "https://www.youtube.com/watch?v=1gm__VUG2m8\n",
767
+ "https://www.youtube.com/watch?v=2YYjPs8t8MI\n",
768
+ "https://www.youtube.com/watch?v=8C6iCpJ9HPo\n",
769
+ "https://www.youtube.com/watch?v=T1Lowy1mnEg\n"
770
+ ]
771
+ }
772
+ ],
773
+ "source": [
774
+ "for i in results['organic']:\n",
775
+ " print(i['link'])"
776
+ ]
777
+ },
778
+ {
779
+ "cell_type": "code",
780
+ "execution_count": null,
781
+ "id": "03cfee02-dc22-49dc-b3f8-b3281172850c",
782
+ "metadata": {},
783
+ "outputs": [],
784
+ "source": []
785
+ }
786
+ ],
787
+ "metadata": {
788
+ "kernelspec": {
789
+ "display_name": "Python 3 (ipykernel)",
790
+ "language": "python",
791
+ "name": "python3"
792
+ },
793
+ "language_info": {
794
+ "codemirror_mode": {
795
+ "name": "ipython",
796
+ "version": 3
797
+ },
798
+ "file_extension": ".py",
799
+ "mimetype": "text/x-python",
800
+ "name": "python",
801
+ "nbconvert_exporter": "python",
802
+ "pygments_lexer": "ipython3",
803
+ "version": "3.11.11"
804
+ }
805
+ },
806
+ "nbformat": 4,
807
+ "nbformat_minor": 5
808
+ }
youtube.py ADDED
@@ -0,0 +1,389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
2
+ from langchain_chroma import Chroma
3
+ from langchain_huggingface import HuggingFaceEmbeddings
4
+ from bytez import Bytez
5
+ from youtube_transcript_api import YouTubeTranscriptApi
6
+ import gradio as gr
7
+ from dotenv import load_dotenv
8
+ import os
9
+ from urllib.parse import urlparse, parse_qs
10
+
11
+ load_dotenv("secrets.env")
12
+
13
+ api_key = os.getenv("BYTEZ_API_KEY")
14
+ sdk = Bytez(api_key)
15
+
16
+ #toy function
17
+ def video_id_extractor(link):
18
+ if "watch?v=" in link:
19
+ return link[32:43]
20
+ else:
21
+ return link[17:28]
22
+
23
+ #production ready function
24
+ def video_id_extractor(link):
25
+ parsed_url = urlparse(link)
26
+
27
+ if "youtube.com" in parsed_url.netloc:
28
+ return parse_qs(parsed_url.query).get("v", [None])[0]
29
+
30
+ elif "youtu.be" in parsed_url.netloc:
31
+ return parsed_url.path.lstrip("/")
32
+
33
+ return None
34
+
35
+ def generate_transcript(video_id):
36
+ trans = YouTubeTranscriptApi()
37
+ try:
38
+ transcript_raw = trans.fetch(video_id = video_id)
39
+ except Exception:
40
+ return None
41
+ transcript = ""
42
+ for i in transcript_raw.snippets:
43
+ transcript += f" {i.text}"
44
+ return transcript
45
+
46
+ def create_and_save_vs(trans):
47
+ try:
48
+ splitter = RecursiveCharacterTextSplitter(chunk_size = 100, chunk_overlap = 50)
49
+ docs = splitter.split_text(trans)
50
+ embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')
51
+ vector_store_db = Chroma.from_texts(docs, embeddings, persist_directory='chroma_db')
52
+ except Exception:
53
+ return None
54
+ return vector_store_db
55
+
56
+ def generate_summary(trans):
57
+ try:
58
+ model = sdk.model("openai/gpt-4o")
59
+ if len(trans.split(" ")) > 90000:
60
+ trans = trans.split(" ")[0:85000]
61
+ trans = " ".join(trans)
62
+ except Exception:
63
+ return None
64
+ Inp = [{"role": "system", "content": "You are a youtube transcipt sammurizer. Sammurize the transcript under 100 words"}, {"role":"user", "content":trans}]
65
+ trails = 4
66
+ failed = True
67
+ time_to_sleep = 3
68
+ while failed and trails > 0:
69
+ res = model.run(Inp)
70
+ if type(res) == list and len(res) == 3:
71
+ failed = False
72
+ trails -= 1
73
+ return res[0]["content"]
74
+ else:
75
+ time.sleep(time_to_sleep)
76
+ time_to_sleep = time_to_sleep **2
77
+ trails -= 1
78
+ return None
79
+
80
+ def setter(link):
81
+ yield gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), "", ""
82
+ video_id = video_id_extractor(link)
83
+ if not video_id:
84
+ yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), "", ""
85
+ transcript = generate_transcript(video_id)
86
+ if not transcript:
87
+ yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), "", ""
88
+ vectorstore = create_and_save_vs(transcript)
89
+ if not vectorstore:
90
+ yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), "", ""
91
+ summary = generate_summary(transcript)
92
+ if not summary:
93
+ yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), "", ""
94
+ yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), summary, vectorstore
95
+
96
+ def execute(vec, query):
97
+ try:
98
+ res = vec.similarity_search(query, k=3)
99
+ result = ""
100
+ for i in res:
101
+ result += f"\n{i.page_content}"
102
+ model = sdk.model("openai/gpt-4o")
103
+ inp = [{"role": "system", "content": "You are a helpful assistant - you will be asked a query and provided with a context. You have to answer that query based on the provided context - do not make things up. Do not reveal the whole context, answer as like you already knew the context"}, {"role":"user", "content":f"query: {query} | context: {result}"}]
104
+ res = model.run(inp)
105
+ return res[0]['content'], gr.update(visible=True), gr.update(visible=False)
106
+ except Exception:
107
+ return "", gr.update(visible=False), gr.update(visible=True)
108
+
109
+ with gr.Blocks(
110
+ theme=gr.themes.Soft(
111
+ primary_hue="blue",
112
+ secondary_hue="indigo",
113
+ ),
114
+ css="""
115
+ /* Global Styles */
116
+ .gradio-container {
117
+ font-family: 'Inter', 'Segoe UI', sans-serif !important;
118
+ max-width: 1200px !important;
119
+ margin: 0 auto !important;
120
+ }
121
+
122
+ /* Header Branding */
123
+ .header-brand {
124
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
125
+ padding: 2rem;
126
+ border-radius: 16px;
127
+ margin-bottom: 2rem;
128
+ box-shadow: 0 10px 40px rgba(102, 126, 234, 0.3);
129
+ animation: fadeInDown 0.8s ease-out;
130
+ }
131
+
132
+ .header-brand h1 {
133
+ color: white;
134
+ font-size: 2.5rem;
135
+ font-weight: 700;
136
+ margin: 0;
137
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
138
+ }
139
+
140
+ .header-brand p {
141
+ color: rgba(255,255,255,0.95);
142
+ font-size: 1.1rem;
143
+ margin: 0.5rem 0 0 0;
144
+ }
145
+
146
+ /* Footer Branding */
147
+ .footer-brand {
148
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
149
+ padding: 1.5rem;
150
+ border-radius: 12px;
151
+ margin-top: 2rem;
152
+ text-align: center;
153
+ box-shadow: 0 -5px 20px rgba(102, 126, 234, 0.2);
154
+ }
155
+
156
+ .footer-brand p {
157
+ color: white;
158
+ margin: 0.3rem 0;
159
+ font-size: 0.95rem;
160
+ }
161
+
162
+ .footer-brand a {
163
+ color: #ffd700;
164
+ text-decoration: none;
165
+ font-weight: 600;
166
+ transition: all 0.3s ease;
167
+ }
168
+
169
+ .footer-brand a:hover {
170
+ color: #fff;
171
+ text-shadow: 0 0 10px rgba(255,255,255,0.5);
172
+ }
173
+
174
+ /* Main Title Animation */
175
+ .main-title {
176
+ background: linear-gradient(90deg, #667eea, #764ba2, #667eea);
177
+ background-size: 200% auto;
178
+ color: white;
179
+ padding: 1.5rem;
180
+ border-radius: 12px;
181
+ text-align: center;
182
+ font-size: 1.8rem;
183
+ font-weight: 600;
184
+ margin-bottom: 2rem;
185
+ box-shadow: 0 8px 32px rgba(102, 126, 234, 0.4);
186
+ animation: gradientShift 3s ease infinite, fadeIn 1s ease-out;
187
+ }
188
+
189
+ /* Button Styles */
190
+ .gr-button {
191
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
192
+ border: none !important;
193
+ color: white !important;
194
+ font-weight: 600 !important;
195
+ padding: 12px 32px !important;
196
+ border-radius: 8px !important;
197
+ transition: all 0.3s ease !important;
198
+ box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;
199
+ text-transform: uppercase;
200
+ letter-spacing: 0.5px;
201
+ }
202
+
203
+ .gr-button:hover {
204
+ transform: translateY(-2px) !important;
205
+ box-shadow: 0 6px 25px rgba(102, 126, 234, 0.6) !important;
206
+ }
207
+
208
+ .gr-button:active {
209
+ transform: translateY(0px) !important;
210
+ }
211
+
212
+ /* Input Fields */
213
+ .gr-textbox, .gr-text-input {
214
+ border-radius: 8px !important;
215
+ border: 2px solid #e0e7ff !important;
216
+ transition: all 0.3s ease !important;
217
+ }
218
+
219
+ .gr-textbox:focus, .gr-text-input:focus {
220
+ border-color: #667eea !important;
221
+ box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
222
+ }
223
+
224
+ /* Loading Animation */
225
+ .loading-container {
226
+ text-align: center;
227
+ padding: 3rem;
228
+ }
229
+
230
+ .loading-text {
231
+ font-size: 1.5rem;
232
+ color: #667eea;
233
+ animation: pulse 1.5s ease-in-out infinite;
234
+ }
235
+
236
+ /* Error Messages */
237
+ .error-message {
238
+ background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
239
+ color: white;
240
+ padding: 1.5rem;
241
+ border-radius: 12px;
242
+ text-align: center;
243
+ font-size: 1.3rem;
244
+ font-weight: 600;
245
+ box-shadow: 0 8px 32px rgba(245, 87, 108, 0.3);
246
+ animation: shake 0.5s ease-in-out;
247
+ }
248
+
249
+ /* Success/Summary Box */
250
+ .summary-box {
251
+ background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);
252
+ padding: 1.5rem;
253
+ border-radius: 12px;
254
+ margin-bottom: 1.5rem;
255
+ box-shadow: 0 8px 24px rgba(168, 237, 234, 0.3);
256
+ animation: fadeInUp 0.6s ease-out;
257
+ }
258
+
259
+ /* Chat Section */
260
+ .chat-section {
261
+ animation: fadeInUp 0.8s ease-out;
262
+ }
263
+
264
+ /* Animations */
265
+ @keyframes fadeIn {
266
+ from {
267
+ opacity: 0;
268
+ }
269
+ to {
270
+ opacity: 1;
271
+ }
272
+ }
273
+
274
+ @keyframes fadeInDown {
275
+ from {
276
+ opacity: 0;
277
+ transform: translateY(-30px);
278
+ }
279
+ to {
280
+ opacity: 1;
281
+ transform: translateY(0);
282
+ }
283
+ }
284
+
285
+ @keyframes fadeInUp {
286
+ from {
287
+ opacity: 0;
288
+ transform: translateY(30px);
289
+ }
290
+ to {
291
+ opacity: 1;
292
+ transform: translateY(0);
293
+ }
294
+ }
295
+
296
+ @keyframes pulse {
297
+ 0%, 100% {
298
+ opacity: 1;
299
+ }
300
+ 50% {
301
+ opacity: 0.5;
302
+ }
303
+ }
304
+
305
+ @keyframes shake {
306
+ 0%, 100% { transform: translateX(0); }
307
+ 25% { transform: translateX(-10px); }
308
+ 75% { transform: translateX(10px); }
309
+ }
310
+
311
+ @keyframes gradientShift {
312
+ 0% {
313
+ background-position: 0% 50%;
314
+ }
315
+ 50% {
316
+ background-position: 100% 50%;
317
+ }
318
+ 100% {
319
+ background-position: 0% 50%;
320
+ }
321
+ }
322
+
323
+ /* Responsive Design */
324
+ @media (max-width: 768px) {
325
+ .header-brand h1 {
326
+ font-size: 1.8rem;
327
+ }
328
+ .main-title {
329
+ font-size: 1.3rem;
330
+ }
331
+ }
332
+ """
333
+ ) as ui:
334
+ # Header Branding
335
+ gr.HTML("""
336
+ <div class="header-brand">
337
+ <h1>🎓 AI YouTube Study Assistant</h1>
338
+ <p>Transform lengthy videos into concise knowledge</p>
339
+ </div>
340
+ """)
341
+
342
+ vs = gr.State()
343
+ gr.HTML('<div class="main-title">📹 Why watch long YouTube videos when you could study from AI?</div>')
344
+
345
+ with gr.Row(visible=True) as first_page:
346
+ youtube_link = gr.Textbox(
347
+ label="Enter the youtube link here: ",
348
+ lines=2,
349
+ placeholder="https://www.youtube.com/watch?v=..."
350
+ )
351
+ submit_button = gr.Button("SUBMIT!")
352
+
353
+ with gr.Row(visible=False) as chat_page:
354
+ with gr.Column():
355
+ summary = gr.Markdown(elem_classes="summary-box")
356
+ gr.Markdown("### 💬 Now ask any question about the video:")
357
+ ques = gr.Textbox(
358
+ label="Enter the question here: ",
359
+ lines=2,
360
+ placeholder="What is the main topic of this video?"
361
+ )
362
+ submit_answer = gr.Button("SUBMIT!")
363
+ answer = gr.TextArea(label="ANSWER")
364
+
365
+ with gr.Row(visible=False) as wrong_link_page:
366
+ gr.HTML('<div class="error-message">❌ Sorry, your link wasn\'t correct. Please try again!</div>')
367
+
368
+ with gr.Row(visible=False) as cc_not_enabled:
369
+ gr.HTML('<div class="error-message">⚠️ The link you provided was either not valid or subtitles weren\'t enabled in that video</div>')
370
+
371
+ with gr.Row(visible=False) as loading_page:
372
+ gr.HTML('<div class="loading-container"><div class="loading-text">⏳ Loading... Please Wait</div></div>')
373
+
374
+ with gr.Row(visible=False) as normal_error:
375
+ gr.HTML('<div class="error-message">😔 SORRY, SOME ERROR OCCURRED. PLEASE TRY AGAIN LATER</div>')
376
+
377
+ # Footer Branding
378
+ gr.HTML("""
379
+ <div class="footer-brand">
380
+ <p><strong>Developed by Darsh Tayal</strong></p>
381
+ <p>📧 <a href="mailto:[email protected]">[email protected]</a></p>
382
+ <p style="margin-top: 1rem; font-size: 0.85rem; opacity: 0.9;">© 2024 All Rights Reserved</p>
383
+ </div>
384
+ """)
385
+
386
+ submit_button.click(setter, inputs=[youtube_link], outputs=[first_page, loading_page, chat_page, wrong_link_page, cc_not_enabled, normal_error, summary, vs])
387
+ submit_answer.click(execute, inputs=[vs, ques], outputs=[answer, chat_page, normal_error])
388
+
389
+ ui.launch(inbrowser=True)