yrshi commited on
Commit
3b64038
·
1 Parent(s): 8b4913f
Files changed (3) hide show
  1. download.py +25 -0
  2. download_corpus.sh +5 -0
  3. install_env.sh +6 -5
download.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from huggingface_hub import hf_hub_download
3
+
4
+ parser = argparse.ArgumentParser(description="Download files from a Hugging Face dataset repository.")
5
+ parser.add_argument("--repo_id", type=str, default="PeterJinGo/wiki-18-e5-index", help="Hugging Face repository ID")
6
+ parser.add_argument("--save_path", type=str, required=True, help="Local directory to save files")
7
+
8
+ args = parser.parse_args()
9
+
10
+ repo_id = "PeterJinGo/wiki-18-e5-index"
11
+ for file in ["part_aa", "part_ab"]:
12
+ hf_hub_download(
13
+ repo_id=repo_id,
14
+ filename=file, # e.g., "e5_Flat.index"
15
+ repo_type="dataset",
16
+ local_dir=args.save_path,
17
+ )
18
+
19
+ repo_id = "PeterJinGo/wiki-18-corpus"
20
+ hf_hub_download(
21
+ repo_id=repo_id,
22
+ filename="wiki-18.jsonl.gz",
23
+ repo_type="dataset",
24
+ local_dir=args.save_path,
25
+ )
download_corpus.sh ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ save_path=./data
2
+ python download.py --save_path $save_path || exit -1
3
+ mv $save_path/part_aa $save_path/e5_Flat.index
4
+ cat $save_path/part_ab >> $save_path/e5_Flat.index && rm $save_path/part_ab
5
+ gzip -d $save_path/wiki-18.jsonl.gz
install_env.sh CHANGED
@@ -1,9 +1,10 @@
1
- conda create -n faiss_env python=3.10
2
- conda activate faiss_env
 
3
 
4
- conda install pytorch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 pytorch-cuda=12.1 -c pytorch -c nvidia
5
  pip install transformers datasets pyserini
6
 
7
- conda install -c pytorch -c nvidia faiss-gpu=1.8.0
8
 
9
- pip install uvicorn fastapi
 
1
+ # Run this code manually
2
+ # conda create -n faiss_env python=3.10
3
+ # conda activate faiss_env
4
 
5
+ conda install pytorch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 pytorch-cuda=12.1 -c pytorch -c nvidia -y
6
  pip install transformers datasets pyserini
7
 
8
+ conda install -c pytorch -c nvidia faiss-gpu=1.8.0 -y
9
 
10
+ pip install uvicorn fastapi huggingface_hub