leorigasaki54 commited on
Commit
fc534c9
·
verified ·
1 Parent(s): e4b7c79

Upload mini sentiment transformer

Browse files
README.md ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ license: mit
4
+ library_name: transformers
5
+ tags:
6
+ - sentiment-analysis
7
+ - text-classification
8
+ - transformers
9
+ - mini-transformer
10
+ datasets:
11
+ - glue/sst2
12
+ model-index:
13
+ - name: mini-sentiment-transformer
14
+ results:
15
+ - task:
16
+ type: text-classification
17
+ name: Sentiment Analysis
18
+ dataset:
19
+ name: SST-2
20
+ type: glue
21
+ args: sst2
22
+ metrics:
23
+ - type: accuracy
24
+ value: 0.8154
25
+ name: Validation Accuracy
26
+ ---
27
+
28
+ # Mini Sentiment Transformer
29
+
30
+ This is a tiny transformer model for sentiment analysis, created as a learning project to understand transformer architecture. It's much smaller than BERT or DistilBERT, with only around 4,188,802 parameters.
31
+
32
+ ## Model Details
33
+
34
+ - Developed by: leorigasaki54
35
+ - Type: Text Classification (Sentiment Analysis)
36
+ - Language: English
37
+ - Training Data: SST-2 (Stanford Sentiment Treebank)
38
+ - Size: 4,188,802 parameters (4.19M)
39
+ - Architecture:
40
+ - 2 transformer encoder layers
41
+ - 2 attention heads per layer
42
+ - 128 embedding dimensions
43
+ - 256 feed-forward dimensions
44
+
45
+ ## Usage
46
+
47
+ ```python
48
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
49
+ import torch.nn.functional as F
50
+
51
+ # Load tokenizer and model
52
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") # We use DistilBERT tokenizer
53
+ model = AutoModelForSequenceClassification.from_pretrained("leorigasaki54/mini-sentiment-transformer")
54
+
55
+ # Prepare input
56
+ text = "I really enjoyed this movie!"
57
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=64)
58
+
59
+ # Make prediction
60
+ with torch.no_grad():
61
+ outputs = model(**inputs)
62
+ probabilities = F.softmax(outputs.logits, dim=-1)
63
+ prediction = torch.argmax(probabilities, dim=-1).item()
64
+
65
+ sentiment = "Positive" if prediction == 1 else "Negative"
66
+ confidence = probabilities[0][prediction].item()
67
+
68
+ print(f"Sentiment: {sentiment} (confidence: {confidence:.4f})")
69
+ ```
70
+
71
+ ## Limitations
72
+
73
+ - This is a minimal implementation meant for educational purposes
74
+ - Performance may be lower than larger models like BERT or DistilBERT
75
+ - The model has been trained only on movie reviews and may not generalize well to other domains
76
+ - Limited to English language text only
77
+
78
+ ## Training
79
+
80
+ The model was trained on the SST-2 dataset for 5 epochs using Adam optimizer with a learning rate of 5e-5.
checkpoint-126/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 128,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 512,
12
+ "layer_norm_eps": 1e-12,
13
+ "max_position_embeddings": 512,
14
+ "model_type": "bert",
15
+ "num_attention_heads": 2,
16
+ "num_hidden_layers": 2,
17
+ "pad_token_id": 0,
18
+ "position_embedding_type": "absolute",
19
+ "problem_type": "single_label_classification",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.54.1",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
checkpoint-126/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4506b237f1050833cf91481441ea8a1df63a263334521dc2aa6842b56b34e141
3
+ size 17549312
checkpoint-126/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2cbf47f37585b1382e08c469ff83a1b52b06c0ed22d1260c825a579e88cc2ef
3
+ size 35123898
checkpoint-126/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e14576e5536188efec313f809265425da6e5c56074fb0b274dfe0222bbb6bf43
3
+ size 14244
checkpoint-126/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a30776912918b6f7493dde431aac45fc2965212e97f718858f7ba1f4fe1e5e15
3
+ size 1064
checkpoint-126/trainer_state.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 126,
3
+ "best_metric": 0.6766817569732666,
4
+ "best_model_checkpoint": "./mini-sentiment-model/checkpoint-126",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 126,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_accuracy": 0.54,
15
+ "eval_f1": 0.6993464052287581,
16
+ "eval_loss": 0.681846022605896,
17
+ "eval_precision": 0.5376884422110553,
18
+ "eval_recall": 1.0,
19
+ "eval_runtime": 0.1526,
20
+ "eval_samples_per_second": 1310.261,
21
+ "eval_steps_per_second": 85.167,
22
+ "step": 63
23
+ },
24
+ {
25
+ "epoch": 2.0,
26
+ "eval_accuracy": 0.57,
27
+ "eval_f1": 0.7094594594594594,
28
+ "eval_loss": 0.6766817569732666,
29
+ "eval_precision": 0.5555555555555556,
30
+ "eval_recall": 0.9813084112149533,
31
+ "eval_runtime": 0.1029,
32
+ "eval_samples_per_second": 1943.287,
33
+ "eval_steps_per_second": 126.314,
34
+ "step": 126
35
+ }
36
+ ],
37
+ "logging_steps": 500,
38
+ "max_steps": 189,
39
+ "num_input_tokens_seen": 0,
40
+ "num_train_epochs": 3,
41
+ "save_steps": 500,
42
+ "stateful_callbacks": {
43
+ "TrainerControl": {
44
+ "args": {
45
+ "should_epoch_stop": false,
46
+ "should_evaluate": false,
47
+ "should_log": false,
48
+ "should_save": true,
49
+ "should_training_stop": false
50
+ },
51
+ "attributes": {}
52
+ }
53
+ },
54
+ "total_flos": 635243520000.0,
55
+ "train_batch_size": 16,
56
+ "trial_name": null,
57
+ "trial_params": null
58
+ }
checkpoint-126/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e791125ba3e22840402f92bd40cab4a742f51fb9913759fe4f66250ab01752be
3
+ size 5368
checkpoint-189/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 128,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 512,
12
+ "layer_norm_eps": 1e-12,
13
+ "max_position_embeddings": 512,
14
+ "model_type": "bert",
15
+ "num_attention_heads": 2,
16
+ "num_hidden_layers": 2,
17
+ "pad_token_id": 0,
18
+ "position_embedding_type": "absolute",
19
+ "problem_type": "single_label_classification",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.54.1",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
checkpoint-189/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83c3b3706aee4f4ea7b0e8810fc3b535c3d10cdf331932742559bb0e5eb2ede1
3
+ size 17549312
checkpoint-189/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5164d35ec81d9efbe548d69284069bb83499db2ab4e6068ba7fc9faa116050ca
3
+ size 35123898
checkpoint-189/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6048021907f08141fd2b6a404cf4620a36db966f1f3ce745c9e4f2b13b28e9dd
3
+ size 14244
checkpoint-189/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd9834cb37dd37c0bb479c91093a4be5fae04ef7c14da536f96b6acfdec15663
3
+ size 1064
checkpoint-189/trainer_state.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 189,
3
+ "best_metric": 0.6738117933273315,
4
+ "best_model_checkpoint": "./mini-sentiment-model/checkpoint-189",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 189,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_accuracy": 0.54,
15
+ "eval_f1": 0.6993464052287581,
16
+ "eval_loss": 0.681846022605896,
17
+ "eval_precision": 0.5376884422110553,
18
+ "eval_recall": 1.0,
19
+ "eval_runtime": 0.1526,
20
+ "eval_samples_per_second": 1310.261,
21
+ "eval_steps_per_second": 85.167,
22
+ "step": 63
23
+ },
24
+ {
25
+ "epoch": 2.0,
26
+ "eval_accuracy": 0.57,
27
+ "eval_f1": 0.7094594594594594,
28
+ "eval_loss": 0.6766817569732666,
29
+ "eval_precision": 0.5555555555555556,
30
+ "eval_recall": 0.9813084112149533,
31
+ "eval_runtime": 0.1029,
32
+ "eval_samples_per_second": 1943.287,
33
+ "eval_steps_per_second": 126.314,
34
+ "step": 126
35
+ },
36
+ {
37
+ "epoch": 3.0,
38
+ "eval_accuracy": 0.595,
39
+ "eval_f1": 0.7197231833910035,
40
+ "eval_loss": 0.6738117933273315,
41
+ "eval_precision": 0.5714285714285714,
42
+ "eval_recall": 0.9719626168224299,
43
+ "eval_runtime": 0.2443,
44
+ "eval_samples_per_second": 818.694,
45
+ "eval_steps_per_second": 53.215,
46
+ "step": 189
47
+ }
48
+ ],
49
+ "logging_steps": 500,
50
+ "max_steps": 189,
51
+ "num_input_tokens_seen": 0,
52
+ "num_train_epochs": 3,
53
+ "save_steps": 500,
54
+ "stateful_callbacks": {
55
+ "TrainerControl": {
56
+ "args": {
57
+ "should_epoch_stop": false,
58
+ "should_evaluate": false,
59
+ "should_log": false,
60
+ "should_save": true,
61
+ "should_training_stop": true
62
+ },
63
+ "attributes": {}
64
+ }
65
+ },
66
+ "total_flos": 952865280000.0,
67
+ "train_batch_size": 16,
68
+ "trial_name": null,
69
+ "trial_params": null
70
+ }
checkpoint-189/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e791125ba3e22840402f92bd40cab4a742f51fb9913759fe4f66250ab01752be
3
+ size 5368
checkpoint-63/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 128,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 512,
12
+ "layer_norm_eps": 1e-12,
13
+ "max_position_embeddings": 512,
14
+ "model_type": "bert",
15
+ "num_attention_heads": 2,
16
+ "num_hidden_layers": 2,
17
+ "pad_token_id": 0,
18
+ "position_embedding_type": "absolute",
19
+ "problem_type": "single_label_classification",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.54.1",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
checkpoint-63/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:379ff04d2e9de33d5ecd3f470def81b52a37a15dc136c95dfd0414ae2b982603
3
+ size 17549312
checkpoint-63/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a2ccf39cfd807f8e97f497d888cf45872bdf13c60bf29d0150abb8dee9a5922
3
+ size 35123898
checkpoint-63/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ee3244d1fb8d2938b76e0da0926eed0036f4636ced09415eb94b79dac5736c7
3
+ size 14244
checkpoint-63/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f2899150945e7607692260e26159240946ed36221da68ea6e9bb708e62775d
3
+ size 1064
checkpoint-63/trainer_state.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 63,
3
+ "best_metric": 0.681846022605896,
4
+ "best_model_checkpoint": "./mini-sentiment-model/checkpoint-63",
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 63,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_accuracy": 0.54,
15
+ "eval_f1": 0.6993464052287581,
16
+ "eval_loss": 0.681846022605896,
17
+ "eval_precision": 0.5376884422110553,
18
+ "eval_recall": 1.0,
19
+ "eval_runtime": 0.1526,
20
+ "eval_samples_per_second": 1310.261,
21
+ "eval_steps_per_second": 85.167,
22
+ "step": 63
23
+ }
24
+ ],
25
+ "logging_steps": 500,
26
+ "max_steps": 189,
27
+ "num_input_tokens_seen": 0,
28
+ "num_train_epochs": 3,
29
+ "save_steps": 500,
30
+ "stateful_callbacks": {
31
+ "TrainerControl": {
32
+ "args": {
33
+ "should_epoch_stop": false,
34
+ "should_evaluate": false,
35
+ "should_log": false,
36
+ "should_save": true,
37
+ "should_training_stop": false
38
+ },
39
+ "attributes": {}
40
+ }
41
+ },
42
+ "total_flos": 317621760000.0,
43
+ "train_batch_size": 16,
44
+ "trial_name": null,
45
+ "trial_params": null
46
+ }
checkpoint-63/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e791125ba3e22840402f92bd40cab4a742f51fb9913759fe4f66250ab01752be
3
+ size 5368
config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vocab_size": 30522,
3
+ "d_model": 128,
4
+ "num_heads": 2,
5
+ "num_layers": 2,
6
+ "d_ff": 256,
7
+ "num_classes": 2,
8
+ "max_length": 64,
9
+ "model_type": "mini-sentiment-transformer",
10
+ "architectures": [
11
+ "MiniSentimentTransformer"
12
+ ],
13
+ "tokenizer_class": "AutoTokenizer",
14
+ "transformers_version": "4.30.0"
15
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7d74915b6bb603d563a5b7a5be748c18174f7c38d4f54420154fe96ba4ad703
3
+ size 16800334
runs/Aug01_11-34-03_5f8061b8deee/events.out.tfevents.1754048063.5f8061b8deee.5518.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2940d53128736f5e2997f126e59fb6dfb48930ad33d428aed747b5688162de4
3
+ size 6652
runs/Aug01_11-34-03_5f8061b8deee/events.out.tfevents.1754048289.5f8061b8deee.5518.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa76d23aa58e2d8edd4ce9bd526769c6fc489aacabcb7ee778ee1f12344fce45
3
+ size 560
tokenizer_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_tokenizer": "distilbert-base-uncased",
3
+ "max_length": 64,
4
+ "model_max_length": 64,
5
+ "padding_side": "right",
6
+ "truncation_side": "right"
7
+ }