import os
import tarfile
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

def extract_tar_with_progress(tar_path, output_dir):
    """
    解压单个 tar 文件并显示进度条
    """
    try:
        # 打开 tar 文件
        with tarfile.open(tar_path, 'r') as tar:
            members = tar.getmembers()
            total_files = len(members)
            
            # 如果文件夹不存在，创建它
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            
            # 使用 tqdm 显示进度条
            with tqdm(total=total_files, desc=f"Extracting {os.path.basename(tar_path)}", unit="file") as pbar:
                for member in members:
                    tar.extract(member, path=output_dir)
                    pbar.update(1)
        
        print(f"[SUCCESS] 解压完成: {tar_path}")
    except Exception as e:
        print(f"[ERROR] 解压失败: {tar_path} - {str(e)}")

def main():
    # 定义压缩文件所在的路径和解压目标路径
    base_dir = "data/data_512_2M"
    output_base_dir = "data/data_512_2M"
    
    # 获取所有 tar 文件的路径
    tar_files = [os.path.join(base_dir, f"data_{str(i).zfill(6)}.tar") for i in range(47)]
    
    # 使用多线程解压
    with ThreadPoolExecutor(max_workers=4) as executor:
        for tar_file in tar_files:
            output_dir = os.path.join(output_base_dir, os.path.splitext(os.path.basename(tar_file))[0])
            executor.submit(extract_tar_with_progress, tar_file, output_dir)

if __name__ == "__main__":
    main()
