Spaces:
Runtime error
Runtime error
| from qdrant_client import QdrantClient | |
| from qdrant_client.http.models import Distance, VectorParams | |
| from qdrant_client.http.models import PointStruct | |
| import tqdm | |
| import glob | |
| import model | |
| import re | |
| if __name__ == '__main__': | |
| client = QdrantClient("127.0.0.1", port=6333) | |
| collection_name = "mdn-docs" | |
| client.recreate_collection( | |
| collection_name=collection_name, | |
| vectors_config=VectorParams(size=768, distance=Distance.COSINE), | |
| ) | |
| count = 0 | |
| files = glob.glob("translated-content/files/zh-cn/**/*.md", recursive=True) | |
| print(len(files)) | |
| for file in tqdm.tqdm(files): | |
| count+=1 | |
| with open(file, 'r', encoding='utf-8') as f: | |
| print('file', file) | |
| text = f.read() | |
| matchObj = re.match(r'\s*---[\n\r]+title:(((?!---).)+)', text, re.M|re.I) | |
| if matchObj: | |
| title = matchObj.group(1).strip() | |
| else: | |
| title = file | |
| vector = model.encode(text) | |
| client.upsert( | |
| collection_name=collection_name, | |
| wait=True, | |
| points=[ | |
| PointStruct(id=count, vector=vector, payload={"title": title, "text": text }), | |
| ], | |
| ) | |