Sami Ali commited on
Commit
1660531
·
1 Parent(s): 51349bc

remove unwanted files

Browse files
Files changed (1) hide show
  1. madrag.ipynb +0 -113
madrag.ipynb DELETED
@@ -1,113 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 16,
6
- "id": "c80e0812",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "from langchain.text_splitter import CharacterTextSplitter"
11
- ]
12
- },
13
- {
14
- "cell_type": "code",
15
- "execution_count": 17,
16
- "id": "bbc6a9d6",
17
- "metadata": {},
18
- "outputs": [],
19
- "source": [
20
- "with open('./data/pmc/PMC10000000.txt', \"r\", encoding='utf-8') as file:\n",
21
- " data = file.read()"
22
- ]
23
- },
24
- {
25
- "cell_type": "code",
26
- "execution_count": 18,
27
- "id": "9eba0782",
28
- "metadata": {},
29
- "outputs": [
30
- {
31
- "name": "stdout",
32
- "output_type": "stream",
33
- "text": [
34
- "23842\n"
35
- ]
36
- }
37
- ],
38
- "source": [
39
- "print(len(data))"
40
- ]
41
- },
42
- {
43
- "cell_type": "code",
44
- "execution_count": 23,
45
- "id": "c0b716f8",
46
- "metadata": {},
47
- "outputs": [],
48
- "source": [
49
- "chunks = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0, separator=' ')"
50
- ]
51
- },
52
- {
53
- "cell_type": "code",
54
- "execution_count": 24,
55
- "id": "14aa384c",
56
- "metadata": {},
57
- "outputs": [],
58
- "source": [
59
- "temp = chunks.split_text(data)"
60
- ]
61
- },
62
- {
63
- "cell_type": "code",
64
- "execution_count": 25,
65
- "id": "77187982",
66
- "metadata": {},
67
- "outputs": [
68
- {
69
- "data": {
70
- "text/plain": [
71
- "24"
72
- ]
73
- },
74
- "execution_count": 25,
75
- "metadata": {},
76
- "output_type": "execute_result"
77
- }
78
- ],
79
- "source": [
80
- "len(temp)"
81
- ]
82
- },
83
- {
84
- "cell_type": "code",
85
- "execution_count": null,
86
- "id": "4c254a11",
87
- "metadata": {},
88
- "outputs": [],
89
- "source": []
90
- }
91
- ],
92
- "metadata": {
93
- "kernelspec": {
94
- "display_name": "venv",
95
- "language": "python",
96
- "name": "python3"
97
- },
98
- "language_info": {
99
- "codemirror_mode": {
100
- "name": "ipython",
101
- "version": 3
102
- },
103
- "file_extension": ".py",
104
- "mimetype": "text/x-python",
105
- "name": "python",
106
- "nbconvert_exporter": "python",
107
- "pygments_lexer": "ipython3",
108
- "version": "3.11.0"
109
- }
110
- },
111
- "nbformat": 4,
112
- "nbformat_minor": 5
113
- }