|
|
|
|
|
""" |
|
|
Script untuk generate README.md dari template |
|
|
Penggunaan: python generate_readme.py config.yaml |
|
|
""" |
|
|
|
|
|
import argparse |
|
|
from pathlib import Path |
|
|
|
|
|
import yaml |
|
|
|
|
|
|
|
|
def load_config(config_path): |
|
|
"""Load konfigurasi dari file YAML""" |
|
|
with open(config_path, "r", encoding="utf-8") as f: |
|
|
return yaml.safe_load(f) |
|
|
|
|
|
|
|
|
def load_template(template_path): |
|
|
"""Load template README""" |
|
|
with open(template_path, "r", encoding="utf-8") as f: |
|
|
return f.read() |
|
|
|
|
|
|
|
|
def replace_placeholders(template, config): |
|
|
"""Replace placeholder dengan nilai dari config""" |
|
|
content = template |
|
|
|
|
|
|
|
|
for key, value in config.items(): |
|
|
placeholder = f"{{{{{key}}}}}" |
|
|
if isinstance(value, (list, dict)): |
|
|
|
|
|
value = yaml.dump( |
|
|
value, default_flow_style=False, allow_unicode=True |
|
|
).strip() |
|
|
content = content.replace(placeholder, str(value)) |
|
|
|
|
|
return content |
|
|
|
|
|
|
|
|
def generate_readme(config_path, template_path, output_path): |
|
|
"""Generate README dari template dan config""" |
|
|
config = load_config(config_path) |
|
|
template = load_template(template_path) |
|
|
|
|
|
readme_content = replace_placeholders(template, config) |
|
|
|
|
|
with open(output_path, "w", encoding="utf-8") as f: |
|
|
f.write(readme_content) |
|
|
|
|
|
print(f"README berhasil digenerate: {output_path}") |
|
|
|
|
|
|
|
|
def create_sample_config(output_path): |
|
|
"""Buat sample config file""" |
|
|
sample_config = { |
|
|
|
|
|
"LICENSE": "mit", |
|
|
"LANGUAGE": "id", |
|
|
"LIBRARY_NAME": "transformers", |
|
|
"PIPELINE_TAG": "text-classification", |
|
|
"DATASET_TYPE": "custom", |
|
|
"INFERENCE_ENABLED": True, |
|
|
|
|
|
"MODEL_NAME": "BERT Indonesian Topic Classification (16 labels)", |
|
|
"MODEL_TITLE": "BERT Indonesian Topic Classification (16 labels)", |
|
|
"BASE_MODEL": "cahya/bert-base-indonesian-1.5G", |
|
|
"TASK_TYPE": "text-classification", |
|
|
"TASK_NAME": "Topic Classification", |
|
|
"TASK_DESCRIPTION": "Topic classification (single-label)", |
|
|
"NUM_LABELS": 16, |
|
|
"LABELS_INLINE": "Politik, Ekonomi, Olahraga, Teknologi, dll.", |
|
|
"DATASET_NAME": "Custom Dataset (ID)", |
|
|
"SPLIT_TYPE": "validation", |
|
|
|
|
|
"VISUALIZATION_TYPE": "Confusion Matrix", |
|
|
"VISUALIZATION_FILENAME": "confusion_matrix.png", |
|
|
|
|
|
"TAGS": [ |
|
|
" - indonesian", |
|
|
" - indonesia", |
|
|
" - topic-classification", |
|
|
" - bert", |
|
|
], |
|
|
|
|
|
"METRICS": [ |
|
|
" - type: accuracy", |
|
|
" value: 0.921", |
|
|
" - type: f1", |
|
|
" name: f1_macro", |
|
|
" value: 0.893", |
|
|
" - type: f1", |
|
|
" name: f1_micro", |
|
|
" value: 0.912", |
|
|
], |
|
|
|
|
|
"INTENDED_USE": "- Klasifikasi topik untuk teks berbahasa Indonesia pada domain umum.", |
|
|
"LIMITATIONS": """- Performa bergantung pada distribusi label dataset Anda. |
|
|
- Teks OOD (di luar domain data latih) bisa turun akurasinya.""", |
|
|
"TRAINING_DETAILS": """- Framework: 🤗 Transformers (PyTorch) |
|
|
- Max length: 512 |
|
|
- Batch size: 16 |
|
|
- Epochs: 3 |
|
|
- Learning rate: 2e-5 |
|
|
- Weight decay: 0.01 |
|
|
- Warmup ratio: 0.1 |
|
|
- Scheduler: linear |
|
|
- Mixed precision: true""", |
|
|
"EVALUATION_DETAILS": """- Split: 80/20 stratified |
|
|
- Accuracy (val): **92.1%** |
|
|
- F1 Macro (val): **89.3%** |
|
|
- F1 Micro (val): **91.2%** |
|
|
|
|
|
Per-label report tersedia pada artifact `eval_results.json`.""", |
|
|
"USAGE_CODE": """from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
import torch |
|
|
|
|
|
repo_id = "your-username/model-name" |
|
|
tokenizer = AutoTokenizer.from_pretrained(repo_id) |
|
|
model = AutoModelForSequenceClassification.from_pretrained(repo_id).eval() |
|
|
|
|
|
text = "Contoh teks untuk diklasifikasi." |
|
|
inputs = tokenizer(text, return_tensors="pt") |
|
|
with torch.no_grad(): |
|
|
logits = model(**inputs).logits |
|
|
pred_id = logits.argmax(-1).item() |
|
|
label = model.config.id2label[pred_id] |
|
|
print(label)""", |
|
|
"ADDITIONAL_INFO": """## Citation |
|
|
|
|
|
Jika menggunakan model ini, mohon kutip: |
|
|
```bibtex |
|
|
@misc{your-model-2025, |
|
|
title={Model Title}, |
|
|
author={Your Name}, |
|
|
year={2025}, |
|
|
url={https://huggingface.co/your-username/model-name} |
|
|
} |
|
|
```""", |
|
|
} |
|
|
|
|
|
with open(output_path, "w", encoding="utf-8") as f: |
|
|
yaml.dump( |
|
|
sample_config, f, default_flow_style=False, allow_unicode=True, indent=2 |
|
|
) |
|
|
|
|
|
print(f"Sample config dibuat: {output_path}") |
|
|
|
|
|
|
|
|
def main(): |
|
|
parser = argparse.ArgumentParser(description="Generate README dari template") |
|
|
parser.add_argument("--config", "-c", help="Path ke file config YAML") |
|
|
parser.add_argument( |
|
|
"--template", |
|
|
"-t", |
|
|
default="README.md", |
|
|
help="Path ke template README (default: README.md)", |
|
|
) |
|
|
parser.add_argument( |
|
|
"--output", |
|
|
"-o", |
|
|
default="README_generated.md", |
|
|
help="Path output README (default: README_generated.md)", |
|
|
) |
|
|
parser.add_argument( |
|
|
"--create-sample", action="store_true", help="Buat sample config file" |
|
|
) |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
if args.create_sample: |
|
|
create_sample_config("sample_config.yaml") |
|
|
return |
|
|
|
|
|
if not args.config: |
|
|
print("Error: --config diperlukan kecuali menggunakan --create-sample") |
|
|
parser.print_help() |
|
|
return |
|
|
|
|
|
if not Path(args.config).exists(): |
|
|
print(f"Error: Config file tidak ditemukan: {args.config}") |
|
|
return |
|
|
|
|
|
if not Path(args.template).exists(): |
|
|
print(f"Error: Template file tidak ditemukan: {args.template}") |
|
|
return |
|
|
|
|
|
generate_readme(args.config, args.template, args.output) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|