Askinkaty commited on
Commit
221fbfe
·
verified ·
1 Parent(s): cf352fd

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +142 -3
README.md CHANGED
@@ -1,3 +1,142 @@
1
- ---
2
- license: cc0-1.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc0-1.0
3
+ language:
4
+ - fi
5
+ base_model:
6
+ - TurkuNLP/bert-base-finnish-uncased-v1
7
+ tags:
8
+ - difficulty
9
+ - cefr
10
+ - regression
11
+ - bert
12
+ ---
13
+
14
+ # Text difficulty regression model
15
+
16
+ Regression model which predicts difficulty score for an input text. Predicted scores can be mapped to CEFR levels.
17
+
18
+ ## Model Details
19
+
20
+ Frozen BERT-large layers with a regressor on top. Trained on a mix of manually annotated datasets (more details on data will follow) and data translated from Russian into Finnish.
21
+
22
+ ## How to Get Started with the Model
23
+
24
+ Use the code below to get started with the model.
25
+
26
+ ```
27
+ class CustomModel(BertPreTrainedModel):
28
+ def __init__(self, config, load_path=None, use_auth_token: str = None,):
29
+ super().__init__(config)
30
+ self.bert = BertModel(config)
31
+ self.pre_classifier = nn.Linear(config.hidden_size, 128)
32
+ self.dropout = nn.Dropout(0.2)
33
+ self.classifier = nn.Linear(128, 1)
34
+ self.activation = nn.ReLU()
35
+
36
+ nn.init.kaiming_uniform_(self.pre_classifier.weight, nonlinearity='relu')
37
+ nn.init.kaiming_uniform_(self.classifier.weight, nonlinearity='relu')
38
+ if self.pre_classifier.bias is not None:
39
+ nn.init.constant_(self.pre_classifier.bias, 0)
40
+ if self.classifier.bias is not None:
41
+ nn.init.constant_(self.classifier.bias, 0)
42
+
43
+
44
+ def forward(
45
+ self,
46
+ input_ids,
47
+ labels=None,
48
+ attention_mask=None,
49
+ token_type_ids=None,
50
+ position_ids=None,
51
+ ):
52
+ outputs = self.bert(
53
+ input_ids,
54
+ attention_mask=attention_mask,
55
+ token_type_ids=token_type_ids,
56
+ position_ids=position_ids,
57
+ )
58
+
59
+ pooled_output = outputs.pooler_output
60
+ pooled_output = self.pre_classifier(pooled_output)
61
+ pooled_output = self.activation(pooled_output)
62
+ pooled_output = self.dropout(pooled_output)
63
+ logits = self.classifier(pooled_output)
64
+
65
+ if labels is not None:
66
+ loss_fn = nn.MSELoss()
67
+ loss = loss_fn(logits.view(-1), labels.view(-1))
68
+ return loss, logits
69
+ else:
70
+ return None, logits
71
+
72
+
73
+ # Inference
74
+ from safetensors.torch import load_file
75
+ # Code to load custom fine-tuned model'
76
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
77
+ config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
78
+ config.num_labels = 1
79
+
80
+ # Load your custom model
81
+ model = CustomModel(config)
82
+ state_dict = load_file(f'{model_path}/model.safetensors')
83
+ model.load_state_dict(state_dict)
84
+ model.eval()
85
+
86
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
87
+ inputs = {key: value.to(device) for key, value in inputs.items()}
88
+
89
+ with torch.no_grad():
90
+ _, logits = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], token_type_ids=inputs["token_type_ids"])
91
+ ```
92
+
93
+
94
+ To map to CEFR, use:
95
+ ```
96
+ reg2cl2 = {
97
+ "0.0": "A1", "1.0": "A1", "1.5": "A1-A2", "2.0": "A2",
98
+ "2.5": "A2-B1", "3.0": "B1", "3.5": "B1-B2", "4.0": "B2",
99
+ "4.5": "B2-C1", "5.0": "C1", "5.5": "C1-C2", "6.0": "C2"
100
+ }
101
+
102
+ print("Predicted output (logits):", logits.item(), reg2cl2[str(float(round(logits.item())))])
103
+ ```
104
+
105
+ ## Training Details
106
+
107
+
108
+ #### Training Hyperparameters
109
+
110
+ + num_warmup_steps = int(0.1 * num_training_steps)
111
+ + num_train_epochs: 24.0
112
+ + batch_size: 16
113
+ + weight_decay: 0.01
114
+ + adam_beta1: 0.9
115
+ + adam_beta2: 0.99
116
+ + adam_epsilon: 1e-8
117
+ + max_grad_norm: 1.0
118
+ + fp16: True
119
+ + early_stopping: True
120
+
121
+ #### Learning rates
122
+
123
+ ```
124
+ # Define separate learning rates
125
+ lr_bert = 2e-5 # Learning rate for BERT layers
126
+ lr_classifier = 1e-3 # Learning rate for the classifier
127
+
128
+ optimizer = torch.optim.AdamW([
129
+ {"params": model.bert.parameters(), "lr": lr_bert}, # BERT layers
130
+ {"params": model.classifier.parameters(), "lr": lr_classifier},
131
+ {"params": model.pre_classifier.parameters(), "lr": lr_classifier},
132
+ ])
133
+ ```
134
+
135
+ ## Evaluation on test set
136
+
137
+
138
+ ![Evaluation results](fi_regression.png)
139
+
140
+ ## Citation
141
+
142
+ Please refer to this repo when using the model.