Upload mini sentiment transformer
Browse files- README.md +80 -0
- checkpoint-126/config.json +25 -0
- checkpoint-126/model.safetensors +3 -0
- checkpoint-126/optimizer.pt +3 -0
- checkpoint-126/rng_state.pth +3 -0
- checkpoint-126/scheduler.pt +3 -0
- checkpoint-126/trainer_state.json +58 -0
- checkpoint-126/training_args.bin +3 -0
- checkpoint-189/config.json +25 -0
- checkpoint-189/model.safetensors +3 -0
- checkpoint-189/optimizer.pt +3 -0
- checkpoint-189/rng_state.pth +3 -0
- checkpoint-189/scheduler.pt +3 -0
- checkpoint-189/trainer_state.json +70 -0
- checkpoint-189/training_args.bin +3 -0
- checkpoint-63/config.json +25 -0
- checkpoint-63/model.safetensors +3 -0
- checkpoint-63/optimizer.pt +3 -0
- checkpoint-63/rng_state.pth +3 -0
- checkpoint-63/scheduler.pt +3 -0
- checkpoint-63/trainer_state.json +46 -0
- checkpoint-63/training_args.bin +3 -0
- config.json +15 -0
- pytorch_model.bin +3 -0
- runs/Aug01_11-34-03_5f8061b8deee/events.out.tfevents.1754048063.5f8061b8deee.5518.0 +3 -0
- runs/Aug01_11-34-03_5f8061b8deee/events.out.tfevents.1754048289.5f8061b8deee.5518.1 +3 -0
- tokenizer_config.json +7 -0
README.md
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language: en
|
| 3 |
+
license: mit
|
| 4 |
+
library_name: transformers
|
| 5 |
+
tags:
|
| 6 |
+
- sentiment-analysis
|
| 7 |
+
- text-classification
|
| 8 |
+
- transformers
|
| 9 |
+
- mini-transformer
|
| 10 |
+
datasets:
|
| 11 |
+
- glue/sst2
|
| 12 |
+
model-index:
|
| 13 |
+
- name: mini-sentiment-transformer
|
| 14 |
+
results:
|
| 15 |
+
- task:
|
| 16 |
+
type: text-classification
|
| 17 |
+
name: Sentiment Analysis
|
| 18 |
+
dataset:
|
| 19 |
+
name: SST-2
|
| 20 |
+
type: glue
|
| 21 |
+
args: sst2
|
| 22 |
+
metrics:
|
| 23 |
+
- type: accuracy
|
| 24 |
+
value: 0.8154
|
| 25 |
+
name: Validation Accuracy
|
| 26 |
+
---
|
| 27 |
+
|
| 28 |
+
# Mini Sentiment Transformer
|
| 29 |
+
|
| 30 |
+
This is a tiny transformer model for sentiment analysis, created as a learning project to understand transformer architecture. It's much smaller than BERT or DistilBERT, with only around 4,188,802 parameters.
|
| 31 |
+
|
| 32 |
+
## Model Details
|
| 33 |
+
|
| 34 |
+
- Developed by: leorigasaki54
|
| 35 |
+
- Type: Text Classification (Sentiment Analysis)
|
| 36 |
+
- Language: English
|
| 37 |
+
- Training Data: SST-2 (Stanford Sentiment Treebank)
|
| 38 |
+
- Size: 4,188,802 parameters (4.19M)
|
| 39 |
+
- Architecture:
|
| 40 |
+
- 2 transformer encoder layers
|
| 41 |
+
- 2 attention heads per layer
|
| 42 |
+
- 128 embedding dimensions
|
| 43 |
+
- 256 feed-forward dimensions
|
| 44 |
+
|
| 45 |
+
## Usage
|
| 46 |
+
|
| 47 |
+
```python
|
| 48 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 49 |
+
import torch.nn.functional as F
|
| 50 |
+
|
| 51 |
+
# Load tokenizer and model
|
| 52 |
+
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") # We use DistilBERT tokenizer
|
| 53 |
+
model = AutoModelForSequenceClassification.from_pretrained("leorigasaki54/mini-sentiment-transformer")
|
| 54 |
+
|
| 55 |
+
# Prepare input
|
| 56 |
+
text = "I really enjoyed this movie!"
|
| 57 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=64)
|
| 58 |
+
|
| 59 |
+
# Make prediction
|
| 60 |
+
with torch.no_grad():
|
| 61 |
+
outputs = model(**inputs)
|
| 62 |
+
probabilities = F.softmax(outputs.logits, dim=-1)
|
| 63 |
+
prediction = torch.argmax(probabilities, dim=-1).item()
|
| 64 |
+
|
| 65 |
+
sentiment = "Positive" if prediction == 1 else "Negative"
|
| 66 |
+
confidence = probabilities[0][prediction].item()
|
| 67 |
+
|
| 68 |
+
print(f"Sentiment: {sentiment} (confidence: {confidence:.4f})")
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
## Limitations
|
| 72 |
+
|
| 73 |
+
- This is a minimal implementation meant for educational purposes
|
| 74 |
+
- Performance may be lower than larger models like BERT or DistilBERT
|
| 75 |
+
- The model has been trained only on movie reviews and may not generalize well to other domains
|
| 76 |
+
- Limited to English language text only
|
| 77 |
+
|
| 78 |
+
## Training
|
| 79 |
+
|
| 80 |
+
The model was trained on the SST-2 dataset for 5 epochs using Adam optimizer with a learning rate of 5e-5.
|
checkpoint-126/config.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.1,
|
| 9 |
+
"hidden_size": 128,
|
| 10 |
+
"initializer_range": 0.02,
|
| 11 |
+
"intermediate_size": 512,
|
| 12 |
+
"layer_norm_eps": 1e-12,
|
| 13 |
+
"max_position_embeddings": 512,
|
| 14 |
+
"model_type": "bert",
|
| 15 |
+
"num_attention_heads": 2,
|
| 16 |
+
"num_hidden_layers": 2,
|
| 17 |
+
"pad_token_id": 0,
|
| 18 |
+
"position_embedding_type": "absolute",
|
| 19 |
+
"problem_type": "single_label_classification",
|
| 20 |
+
"torch_dtype": "float32",
|
| 21 |
+
"transformers_version": "4.54.1",
|
| 22 |
+
"type_vocab_size": 2,
|
| 23 |
+
"use_cache": true,
|
| 24 |
+
"vocab_size": 30522
|
| 25 |
+
}
|
checkpoint-126/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4506b237f1050833cf91481441ea8a1df63a263334521dc2aa6842b56b34e141
|
| 3 |
+
size 17549312
|
checkpoint-126/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2cbf47f37585b1382e08c469ff83a1b52b06c0ed22d1260c825a579e88cc2ef
|
| 3 |
+
size 35123898
|
checkpoint-126/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e14576e5536188efec313f809265425da6e5c56074fb0b274dfe0222bbb6bf43
|
| 3 |
+
size 14244
|
checkpoint-126/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a30776912918b6f7493dde431aac45fc2965212e97f718858f7ba1f4fe1e5e15
|
| 3 |
+
size 1064
|
checkpoint-126/trainer_state.json
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 126,
|
| 3 |
+
"best_metric": 0.6766817569732666,
|
| 4 |
+
"best_model_checkpoint": "./mini-sentiment-model/checkpoint-126",
|
| 5 |
+
"epoch": 2.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 126,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.0,
|
| 14 |
+
"eval_accuracy": 0.54,
|
| 15 |
+
"eval_f1": 0.6993464052287581,
|
| 16 |
+
"eval_loss": 0.681846022605896,
|
| 17 |
+
"eval_precision": 0.5376884422110553,
|
| 18 |
+
"eval_recall": 1.0,
|
| 19 |
+
"eval_runtime": 0.1526,
|
| 20 |
+
"eval_samples_per_second": 1310.261,
|
| 21 |
+
"eval_steps_per_second": 85.167,
|
| 22 |
+
"step": 63
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"epoch": 2.0,
|
| 26 |
+
"eval_accuracy": 0.57,
|
| 27 |
+
"eval_f1": 0.7094594594594594,
|
| 28 |
+
"eval_loss": 0.6766817569732666,
|
| 29 |
+
"eval_precision": 0.5555555555555556,
|
| 30 |
+
"eval_recall": 0.9813084112149533,
|
| 31 |
+
"eval_runtime": 0.1029,
|
| 32 |
+
"eval_samples_per_second": 1943.287,
|
| 33 |
+
"eval_steps_per_second": 126.314,
|
| 34 |
+
"step": 126
|
| 35 |
+
}
|
| 36 |
+
],
|
| 37 |
+
"logging_steps": 500,
|
| 38 |
+
"max_steps": 189,
|
| 39 |
+
"num_input_tokens_seen": 0,
|
| 40 |
+
"num_train_epochs": 3,
|
| 41 |
+
"save_steps": 500,
|
| 42 |
+
"stateful_callbacks": {
|
| 43 |
+
"TrainerControl": {
|
| 44 |
+
"args": {
|
| 45 |
+
"should_epoch_stop": false,
|
| 46 |
+
"should_evaluate": false,
|
| 47 |
+
"should_log": false,
|
| 48 |
+
"should_save": true,
|
| 49 |
+
"should_training_stop": false
|
| 50 |
+
},
|
| 51 |
+
"attributes": {}
|
| 52 |
+
}
|
| 53 |
+
},
|
| 54 |
+
"total_flos": 635243520000.0,
|
| 55 |
+
"train_batch_size": 16,
|
| 56 |
+
"trial_name": null,
|
| 57 |
+
"trial_params": null
|
| 58 |
+
}
|
checkpoint-126/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e791125ba3e22840402f92bd40cab4a742f51fb9913759fe4f66250ab01752be
|
| 3 |
+
size 5368
|
checkpoint-189/config.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.1,
|
| 9 |
+
"hidden_size": 128,
|
| 10 |
+
"initializer_range": 0.02,
|
| 11 |
+
"intermediate_size": 512,
|
| 12 |
+
"layer_norm_eps": 1e-12,
|
| 13 |
+
"max_position_embeddings": 512,
|
| 14 |
+
"model_type": "bert",
|
| 15 |
+
"num_attention_heads": 2,
|
| 16 |
+
"num_hidden_layers": 2,
|
| 17 |
+
"pad_token_id": 0,
|
| 18 |
+
"position_embedding_type": "absolute",
|
| 19 |
+
"problem_type": "single_label_classification",
|
| 20 |
+
"torch_dtype": "float32",
|
| 21 |
+
"transformers_version": "4.54.1",
|
| 22 |
+
"type_vocab_size": 2,
|
| 23 |
+
"use_cache": true,
|
| 24 |
+
"vocab_size": 30522
|
| 25 |
+
}
|
checkpoint-189/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83c3b3706aee4f4ea7b0e8810fc3b535c3d10cdf331932742559bb0e5eb2ede1
|
| 3 |
+
size 17549312
|
checkpoint-189/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5164d35ec81d9efbe548d69284069bb83499db2ab4e6068ba7fc9faa116050ca
|
| 3 |
+
size 35123898
|
checkpoint-189/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6048021907f08141fd2b6a404cf4620a36db966f1f3ce745c9e4f2b13b28e9dd
|
| 3 |
+
size 14244
|
checkpoint-189/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd9834cb37dd37c0bb479c91093a4be5fae04ef7c14da536f96b6acfdec15663
|
| 3 |
+
size 1064
|
checkpoint-189/trainer_state.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 189,
|
| 3 |
+
"best_metric": 0.6738117933273315,
|
| 4 |
+
"best_model_checkpoint": "./mini-sentiment-model/checkpoint-189",
|
| 5 |
+
"epoch": 3.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 189,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.0,
|
| 14 |
+
"eval_accuracy": 0.54,
|
| 15 |
+
"eval_f1": 0.6993464052287581,
|
| 16 |
+
"eval_loss": 0.681846022605896,
|
| 17 |
+
"eval_precision": 0.5376884422110553,
|
| 18 |
+
"eval_recall": 1.0,
|
| 19 |
+
"eval_runtime": 0.1526,
|
| 20 |
+
"eval_samples_per_second": 1310.261,
|
| 21 |
+
"eval_steps_per_second": 85.167,
|
| 22 |
+
"step": 63
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"epoch": 2.0,
|
| 26 |
+
"eval_accuracy": 0.57,
|
| 27 |
+
"eval_f1": 0.7094594594594594,
|
| 28 |
+
"eval_loss": 0.6766817569732666,
|
| 29 |
+
"eval_precision": 0.5555555555555556,
|
| 30 |
+
"eval_recall": 0.9813084112149533,
|
| 31 |
+
"eval_runtime": 0.1029,
|
| 32 |
+
"eval_samples_per_second": 1943.287,
|
| 33 |
+
"eval_steps_per_second": 126.314,
|
| 34 |
+
"step": 126
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"epoch": 3.0,
|
| 38 |
+
"eval_accuracy": 0.595,
|
| 39 |
+
"eval_f1": 0.7197231833910035,
|
| 40 |
+
"eval_loss": 0.6738117933273315,
|
| 41 |
+
"eval_precision": 0.5714285714285714,
|
| 42 |
+
"eval_recall": 0.9719626168224299,
|
| 43 |
+
"eval_runtime": 0.2443,
|
| 44 |
+
"eval_samples_per_second": 818.694,
|
| 45 |
+
"eval_steps_per_second": 53.215,
|
| 46 |
+
"step": 189
|
| 47 |
+
}
|
| 48 |
+
],
|
| 49 |
+
"logging_steps": 500,
|
| 50 |
+
"max_steps": 189,
|
| 51 |
+
"num_input_tokens_seen": 0,
|
| 52 |
+
"num_train_epochs": 3,
|
| 53 |
+
"save_steps": 500,
|
| 54 |
+
"stateful_callbacks": {
|
| 55 |
+
"TrainerControl": {
|
| 56 |
+
"args": {
|
| 57 |
+
"should_epoch_stop": false,
|
| 58 |
+
"should_evaluate": false,
|
| 59 |
+
"should_log": false,
|
| 60 |
+
"should_save": true,
|
| 61 |
+
"should_training_stop": true
|
| 62 |
+
},
|
| 63 |
+
"attributes": {}
|
| 64 |
+
}
|
| 65 |
+
},
|
| 66 |
+
"total_flos": 952865280000.0,
|
| 67 |
+
"train_batch_size": 16,
|
| 68 |
+
"trial_name": null,
|
| 69 |
+
"trial_params": null
|
| 70 |
+
}
|
checkpoint-189/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e791125ba3e22840402f92bd40cab4a742f51fb9913759fe4f66250ab01752be
|
| 3 |
+
size 5368
|
checkpoint-63/config.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.1,
|
| 9 |
+
"hidden_size": 128,
|
| 10 |
+
"initializer_range": 0.02,
|
| 11 |
+
"intermediate_size": 512,
|
| 12 |
+
"layer_norm_eps": 1e-12,
|
| 13 |
+
"max_position_embeddings": 512,
|
| 14 |
+
"model_type": "bert",
|
| 15 |
+
"num_attention_heads": 2,
|
| 16 |
+
"num_hidden_layers": 2,
|
| 17 |
+
"pad_token_id": 0,
|
| 18 |
+
"position_embedding_type": "absolute",
|
| 19 |
+
"problem_type": "single_label_classification",
|
| 20 |
+
"torch_dtype": "float32",
|
| 21 |
+
"transformers_version": "4.54.1",
|
| 22 |
+
"type_vocab_size": 2,
|
| 23 |
+
"use_cache": true,
|
| 24 |
+
"vocab_size": 30522
|
| 25 |
+
}
|
checkpoint-63/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:379ff04d2e9de33d5ecd3f470def81b52a37a15dc136c95dfd0414ae2b982603
|
| 3 |
+
size 17549312
|
checkpoint-63/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a2ccf39cfd807f8e97f497d888cf45872bdf13c60bf29d0150abb8dee9a5922
|
| 3 |
+
size 35123898
|
checkpoint-63/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ee3244d1fb8d2938b76e0da0926eed0036f4636ced09415eb94b79dac5736c7
|
| 3 |
+
size 14244
|
checkpoint-63/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1f2899150945e7607692260e26159240946ed36221da68ea6e9bb708e62775d
|
| 3 |
+
size 1064
|
checkpoint-63/trainer_state.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 63,
|
| 3 |
+
"best_metric": 0.681846022605896,
|
| 4 |
+
"best_model_checkpoint": "./mini-sentiment-model/checkpoint-63",
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 63,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.0,
|
| 14 |
+
"eval_accuracy": 0.54,
|
| 15 |
+
"eval_f1": 0.6993464052287581,
|
| 16 |
+
"eval_loss": 0.681846022605896,
|
| 17 |
+
"eval_precision": 0.5376884422110553,
|
| 18 |
+
"eval_recall": 1.0,
|
| 19 |
+
"eval_runtime": 0.1526,
|
| 20 |
+
"eval_samples_per_second": 1310.261,
|
| 21 |
+
"eval_steps_per_second": 85.167,
|
| 22 |
+
"step": 63
|
| 23 |
+
}
|
| 24 |
+
],
|
| 25 |
+
"logging_steps": 500,
|
| 26 |
+
"max_steps": 189,
|
| 27 |
+
"num_input_tokens_seen": 0,
|
| 28 |
+
"num_train_epochs": 3,
|
| 29 |
+
"save_steps": 500,
|
| 30 |
+
"stateful_callbacks": {
|
| 31 |
+
"TrainerControl": {
|
| 32 |
+
"args": {
|
| 33 |
+
"should_epoch_stop": false,
|
| 34 |
+
"should_evaluate": false,
|
| 35 |
+
"should_log": false,
|
| 36 |
+
"should_save": true,
|
| 37 |
+
"should_training_stop": false
|
| 38 |
+
},
|
| 39 |
+
"attributes": {}
|
| 40 |
+
}
|
| 41 |
+
},
|
| 42 |
+
"total_flos": 317621760000.0,
|
| 43 |
+
"train_batch_size": 16,
|
| 44 |
+
"trial_name": null,
|
| 45 |
+
"trial_params": null
|
| 46 |
+
}
|
checkpoint-63/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e791125ba3e22840402f92bd40cab4a742f51fb9913759fe4f66250ab01752be
|
| 3 |
+
size 5368
|
config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"vocab_size": 30522,
|
| 3 |
+
"d_model": 128,
|
| 4 |
+
"num_heads": 2,
|
| 5 |
+
"num_layers": 2,
|
| 6 |
+
"d_ff": 256,
|
| 7 |
+
"num_classes": 2,
|
| 8 |
+
"max_length": 64,
|
| 9 |
+
"model_type": "mini-sentiment-transformer",
|
| 10 |
+
"architectures": [
|
| 11 |
+
"MiniSentimentTransformer"
|
| 12 |
+
],
|
| 13 |
+
"tokenizer_class": "AutoTokenizer",
|
| 14 |
+
"transformers_version": "4.30.0"
|
| 15 |
+
}
|
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7d74915b6bb603d563a5b7a5be748c18174f7c38d4f54420154fe96ba4ad703
|
| 3 |
+
size 16800334
|
runs/Aug01_11-34-03_5f8061b8deee/events.out.tfevents.1754048063.5f8061b8deee.5518.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2940d53128736f5e2997f126e59fb6dfb48930ad33d428aed747b5688162de4
|
| 3 |
+
size 6652
|
runs/Aug01_11-34-03_5f8061b8deee/events.out.tfevents.1754048289.5f8061b8deee.5518.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa76d23aa58e2d8edd4ce9bd526769c6fc489aacabcb7ee778ee1f12344fce45
|
| 3 |
+
size 560
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"base_tokenizer": "distilbert-base-uncased",
|
| 3 |
+
"max_length": 64,
|
| 4 |
+
"model_max_length": 64,
|
| 5 |
+
"padding_side": "right",
|
| 6 |
+
"truncation_side": "right"
|
| 7 |
+
}
|