Upload folder using huggingface_hub
Browse files- .ipynb_checkpoints/merges-checkpoint.txt +0 -0
- 1_Pooling/config.json +10 -0
- README.md +501 -0
- config.json +28 -0
- config_sentence_transformers.json +14 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +51 -0
- tokenizer.json +0 -0
- tokenizer_config.json +65 -0
- vocab.json +0 -0
.ipynb_checkpoints/merges-checkpoint.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 1024,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
README.md
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- dense
|
| 7 |
+
- generated_from_trainer
|
| 8 |
+
- dataset_size:6000
|
| 9 |
+
- loss:ContrastiveLoss
|
| 10 |
+
widget:
|
| 11 |
+
- source_sentence: ' "you only have minor depression \[I didn''t\], why are you acting
|
| 12 |
+
so miserable?" Or by reminding me of the damage I was causing my loved ones like
|
| 13 |
+
saying "you''re selfish, cruel and tearing this family apart'
|
| 14 |
+
sentences:
|
| 15 |
+
- ' (I have lost my period a couple more times in the past during my ED and in early
|
| 16 |
+
recovery'
|
| 17 |
+
- ' I find myself angry when people keep commenting and questioning me about eating
|
| 18 |
+
and weight loss!! At first I was angry because they were acting like I was starving
|
| 19 |
+
myself, but now I''m seeing that maybe I am'
|
| 20 |
+
- ' My now two-year long treatment is already being held back by my attachment to
|
| 21 |
+
my eating disorder, and I know a hospitalisation is just going trigger me into
|
| 22 |
+
being more defensive of it'
|
| 23 |
+
- source_sentence: Over the last two weeks or so, I have developed severe and sudden
|
| 24 |
+
anxiety
|
| 25 |
+
sentences:
|
| 26 |
+
- ' I''ve gained weight because of it, and struggling to deal with it a little'
|
| 27 |
+
- ' If you or someone you know qualifies, I would very much appreciate your or their
|
| 28 |
+
input!
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
Participation is anonymous and completely voluntary'
|
| 32 |
+
- ' I also started to feel a lot worse about my body and would try to go a really
|
| 33 |
+
long time without eating'
|
| 34 |
+
- source_sentence: ' then he tried to convince me he messed with my scale to make
|
| 35 |
+
it appear as if i maintained my weight, when really i yained 10 pounds ? i left
|
| 36 |
+
after that'
|
| 37 |
+
sentences:
|
| 38 |
+
- ' What I need to know is this likely to become a serious eating disorder, or can
|
| 39 |
+
I fix it with a schedule and some discipline?
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
Okay, so I have severe chronic migraines, so a lot of time eating normally isn''t
|
| 43 |
+
an option for neurological reasons'
|
| 44 |
+
- Hey! I'm 16 and I don't know exactly if this is a specific ED (I'd love to know
|
| 45 |
+
if it is) but I've been havig trouble with food lately
|
| 46 |
+
- ' However, they were EXTREMELY unhelpful - discounting my feelings and suggesting
|
| 47 |
+
that the only tried and true solution I needed was to lose weight healthily'
|
| 48 |
+
- source_sentence: ' I then went back to restricting, much more intensely with a much
|
| 49 |
+
lower calorie limit'
|
| 50 |
+
sentences:
|
| 51 |
+
- ' This persists after I eat if I don''t eat until I feel full/satisfied, so I
|
| 52 |
+
feel I need to keep eating a lot more than is recommended for someone of my size/activity
|
| 53 |
+
level just so I can concentrate on my work'
|
| 54 |
+
- " \n\nObviously I have never followed through with it and still eat a very consistent\
|
| 55 |
+
\ amount of food (sweets, etc"
|
| 56 |
+
- '
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
Which struck me as odd, because I have been underweight for as long as I can remember,
|
| 60 |
+
and yet no doctor has ever expressed concerns about it'
|
| 61 |
+
- source_sentence: ' still has a lot of food fears, incredibly picky regardless and
|
| 62 |
+
lots of food anxiety\* this is important'
|
| 63 |
+
sentences:
|
| 64 |
+
- ' I''ve been trying to keep down breakfast and supper everyday plus some snacks
|
| 65 |
+
and I don''t think it''s enough but I don''t know if I''d be able to recover if
|
| 66 |
+
I was eating any more than that'
|
| 67 |
+
- ' My relationship with food was much different in high school, I would eat without
|
| 68 |
+
even thinking about it'
|
| 69 |
+
- ' I''ve never had an okay relationship with food and I don''t really know how
|
| 70 |
+
to restrict without over-restricting'
|
| 71 |
+
pipeline_tag: sentence-similarity
|
| 72 |
+
library_name: sentence-transformers
|
| 73 |
+
metrics:
|
| 74 |
+
- cosine_accuracy
|
| 75 |
+
- cosine_accuracy_threshold
|
| 76 |
+
- cosine_f1
|
| 77 |
+
- cosine_f1_threshold
|
| 78 |
+
- cosine_precision
|
| 79 |
+
- cosine_recall
|
| 80 |
+
- cosine_ap
|
| 81 |
+
- cosine_mcc
|
| 82 |
+
model-index:
|
| 83 |
+
- name: SentenceTransformer
|
| 84 |
+
results:
|
| 85 |
+
- task:
|
| 86 |
+
type: binary-classification
|
| 87 |
+
name: Binary Classification
|
| 88 |
+
dataset:
|
| 89 |
+
name: quora duplicates dev
|
| 90 |
+
type: quora_duplicates_dev
|
| 91 |
+
metrics:
|
| 92 |
+
- type: cosine_accuracy
|
| 93 |
+
value: 0.7276315789473684
|
| 94 |
+
name: Cosine Accuracy
|
| 95 |
+
- type: cosine_accuracy_threshold
|
| 96 |
+
value: 0.6381747126579285
|
| 97 |
+
name: Cosine Accuracy Threshold
|
| 98 |
+
- type: cosine_f1
|
| 99 |
+
value: 0.6510989010989011
|
| 100 |
+
name: Cosine F1
|
| 101 |
+
- type: cosine_f1_threshold
|
| 102 |
+
value: 0.3570553660392761
|
| 103 |
+
name: Cosine F1 Threshold
|
| 104 |
+
- type: cosine_precision
|
| 105 |
+
value: 0.5738498789346247
|
| 106 |
+
name: Cosine Precision
|
| 107 |
+
- type: cosine_recall
|
| 108 |
+
value: 0.7523809523809524
|
| 109 |
+
name: Cosine Recall
|
| 110 |
+
- type: cosine_ap
|
| 111 |
+
value: 0.7166460442597221
|
| 112 |
+
name: Cosine Ap
|
| 113 |
+
- type: cosine_mcc
|
| 114 |
+
value: 0.35294907796969793
|
| 115 |
+
name: Cosine Mcc
|
| 116 |
+
---
|
| 117 |
+
|
| 118 |
+
# SentenceTransformer
|
| 119 |
+
|
| 120 |
+
This is a [sentence-transformers](https://www.SBERT.net) model trained. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 121 |
+
|
| 122 |
+
## Model Details
|
| 123 |
+
|
| 124 |
+
### Model Description
|
| 125 |
+
- **Model Type:** Sentence Transformer
|
| 126 |
+
<!-- - **Base model:** [Unknown](https://huggingface.co/unknown) -->
|
| 127 |
+
- **Maximum Sequence Length:** 256 tokens
|
| 128 |
+
- **Output Dimensionality:** 1024 dimensions
|
| 129 |
+
- **Similarity Function:** Cosine Similarity
|
| 130 |
+
<!-- - **Training Dataset:** Unknown -->
|
| 131 |
+
<!-- - **Language:** Unknown -->
|
| 132 |
+
<!-- - **License:** Unknown -->
|
| 133 |
+
|
| 134 |
+
### Model Sources
|
| 135 |
+
|
| 136 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 137 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 138 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 139 |
+
|
| 140 |
+
### Full Model Architecture
|
| 141 |
+
|
| 142 |
+
```
|
| 143 |
+
SentenceTransformer(
|
| 144 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'RobertaModel'})
|
| 145 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 146 |
+
(2): Normalize()
|
| 147 |
+
)
|
| 148 |
+
```
|
| 149 |
+
|
| 150 |
+
## Usage
|
| 151 |
+
|
| 152 |
+
### Direct Usage (Sentence Transformers)
|
| 153 |
+
|
| 154 |
+
First install the Sentence Transformers library:
|
| 155 |
+
|
| 156 |
+
```bash
|
| 157 |
+
pip install -U sentence-transformers
|
| 158 |
+
```
|
| 159 |
+
|
| 160 |
+
Then you can load this model and run inference.
|
| 161 |
+
```python
|
| 162 |
+
from sentence_transformers import SentenceTransformer
|
| 163 |
+
|
| 164 |
+
# Download from the 🤗 Hub
|
| 165 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
| 166 |
+
# Run inference
|
| 167 |
+
sentences = [
|
| 168 |
+
' still has a lot of food fears, incredibly picky regardless and lots of food anxiety\\* this is important',
|
| 169 |
+
' My relationship with food was much different in high school, I would eat without even thinking about it',
|
| 170 |
+
" I've been trying to keep down breakfast and supper everyday plus some snacks and I don't think it's enough but I don't know if I'd be able to recover if I was eating any more than that",
|
| 171 |
+
]
|
| 172 |
+
embeddings = model.encode(sentences)
|
| 173 |
+
print(embeddings.shape)
|
| 174 |
+
# [3, 1024]
|
| 175 |
+
|
| 176 |
+
# Get the similarity scores for the embeddings
|
| 177 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 178 |
+
print(similarities)
|
| 179 |
+
# tensor([[1.0000, 0.6203, 0.5744],
|
| 180 |
+
# [0.6203, 1.0000, 0.7722],
|
| 181 |
+
# [0.5744, 0.7722, 1.0000]])
|
| 182 |
+
```
|
| 183 |
+
|
| 184 |
+
<!--
|
| 185 |
+
### Direct Usage (Transformers)
|
| 186 |
+
|
| 187 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 188 |
+
|
| 189 |
+
</details>
|
| 190 |
+
-->
|
| 191 |
+
|
| 192 |
+
<!--
|
| 193 |
+
### Downstream Usage (Sentence Transformers)
|
| 194 |
+
|
| 195 |
+
You can finetune this model on your own dataset.
|
| 196 |
+
|
| 197 |
+
<details><summary>Click to expand</summary>
|
| 198 |
+
|
| 199 |
+
</details>
|
| 200 |
+
-->
|
| 201 |
+
|
| 202 |
+
<!--
|
| 203 |
+
### Out-of-Scope Use
|
| 204 |
+
|
| 205 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 206 |
+
-->
|
| 207 |
+
|
| 208 |
+
## Evaluation
|
| 209 |
+
|
| 210 |
+
### Metrics
|
| 211 |
+
|
| 212 |
+
#### Binary Classification
|
| 213 |
+
|
| 214 |
+
* Dataset: `quora_duplicates_dev`
|
| 215 |
+
* Evaluated with [<code>BinaryClassificationEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.BinaryClassificationEvaluator)
|
| 216 |
+
|
| 217 |
+
| Metric | Value |
|
| 218 |
+
|:--------------------------|:-----------|
|
| 219 |
+
| cosine_accuracy | 0.7276 |
|
| 220 |
+
| cosine_accuracy_threshold | 0.6382 |
|
| 221 |
+
| cosine_f1 | 0.6511 |
|
| 222 |
+
| cosine_f1_threshold | 0.3571 |
|
| 223 |
+
| cosine_precision | 0.5738 |
|
| 224 |
+
| cosine_recall | 0.7524 |
|
| 225 |
+
| **cosine_ap** | **0.7166** |
|
| 226 |
+
| cosine_mcc | 0.3529 |
|
| 227 |
+
|
| 228 |
+
<!--
|
| 229 |
+
## Bias, Risks and Limitations
|
| 230 |
+
|
| 231 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 232 |
+
-->
|
| 233 |
+
|
| 234 |
+
<!--
|
| 235 |
+
### Recommendations
|
| 236 |
+
|
| 237 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 238 |
+
-->
|
| 239 |
+
|
| 240 |
+
## Training Details
|
| 241 |
+
|
| 242 |
+
### Training Dataset
|
| 243 |
+
|
| 244 |
+
#### Unnamed Dataset
|
| 245 |
+
|
| 246 |
+
* Size: 6,000 training samples
|
| 247 |
+
* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
|
| 248 |
+
* Approximate statistics based on the first 1000 samples:
|
| 249 |
+
| | sentence1 | sentence2 | label |
|
| 250 |
+
|:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:------------------------------------------------|
|
| 251 |
+
| type | string | string | int |
|
| 252 |
+
| details | <ul><li>min: 4 tokens</li><li>mean: 33.52 tokens</li><li>max: 169 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 32.67 tokens</li><li>max: 228 tokens</li></ul> | <ul><li>0: ~57.10%</li><li>1: ~42.90%</li></ul> |
|
| 253 |
+
* Samples:
|
| 254 |
+
| sentence1 | sentence2 | label |
|
| 255 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
|
| 256 |
+
| <code> Obviously as my age is specified there's not much I can do from a physical standpoint other than cook for her when I'm with her, but I'e been there for her emotionally in the past for other things and I think it helped her</code> | <code> any advice would be greatly appreciated to help her</code> | <code>1</code> |
|
| 257 |
+
| <code> I am 15 years old and recently I have been feeling awful about it and realized how much of a problem it has become</code> | <code> <br><br>And I know the answer is probably, "go see a doctor or therapist/talk to ur parents" however I'm worried about doing that since what if it's nothing but me worrying and making a deal about nothing, I don't want them judging me over something potentially stupid like this</code> | <code>0</code> |
|
| 258 |
+
| <code> This is Part 1 of 2 for 1 post) A friend asked me for some advice for her sister who is currently struggling with her relationship to food and her body</code> | <code> <br><br>The things I'm concerned about:<br>- I don't like eating, I try to avoid it until my stomach physically hurts, especially when I have a major depressive episode</code> | <code>1</code> |
|
| 259 |
+
* Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
|
| 260 |
+
```json
|
| 261 |
+
{
|
| 262 |
+
"distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
|
| 263 |
+
"margin": 1.0,
|
| 264 |
+
"size_average": true
|
| 265 |
+
}
|
| 266 |
+
```
|
| 267 |
+
|
| 268 |
+
### Evaluation Dataset
|
| 269 |
+
|
| 270 |
+
#### Unnamed Dataset
|
| 271 |
+
|
| 272 |
+
* Size: 760 evaluation samples
|
| 273 |
+
* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
|
| 274 |
+
* Approximate statistics based on the first 760 samples:
|
| 275 |
+
| | sentence1 | sentence2 | label |
|
| 276 |
+
|:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:------------------------------------------------|
|
| 277 |
+
| type | string | string | int |
|
| 278 |
+
| details | <ul><li>min: 4 tokens</li><li>mean: 33.98 tokens</li><li>max: 230 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 31.82 tokens</li><li>max: 192 tokens</li></ul> | <ul><li>0: ~58.55%</li><li>1: ~41.45%</li></ul> |
|
| 279 |
+
* Samples:
|
| 280 |
+
| sentence1 | sentence2 | label |
|
| 281 |
+
|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
|
| 282 |
+
| <code> I'm not getting enough nutrients ?<br><br>I have a lot of unhealthy coping mechanisms and I'm trying to not SH as much anymore, which makes that the ED is coming up much stronger? <br>I don't know what to do or how to stop this, anyone have any advice?<br><br>Edit: I also have to say that when I eat I get really nauseous, but when I don't eat I also get nauseous, which then makes it harder to eat and it just keeps going on like that </code> | <code> I want to be smaller than them and all that because eds are like that, but that's hard when they don't tell me stuff and I'm left to stew in my own thoughts</code> | <code>0</code> |
|
| 283 |
+
| <code> I'm underweight right now and I was actually gaining weight since I was an average weight about two-three weeks ago and went down</code> | <code> So, this is definetly going into the right direction, but:<br> I am so afraid of stepping on the scale and seeing gained weight</code> | <code>1</code> |
|
| 284 |
+
| <code> I feel so much shame around the idea of eating junk food and often times, when things have gotten worse, part of that mechanism has been my definition of "junk food" getting so warped that I consider almost anything unhealthy and shameful to eat</code> | <code> Overall, I realized my eating disorders had almost nothing to do with food</code> | <code>0</code> |
|
| 285 |
+
* Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
|
| 286 |
+
```json
|
| 287 |
+
{
|
| 288 |
+
"distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
|
| 289 |
+
"margin": 1.0,
|
| 290 |
+
"size_average": true
|
| 291 |
+
}
|
| 292 |
+
```
|
| 293 |
+
|
| 294 |
+
### Training Hyperparameters
|
| 295 |
+
#### Non-Default Hyperparameters
|
| 296 |
+
|
| 297 |
+
- `eval_strategy`: steps
|
| 298 |
+
- `per_device_train_batch_size`: 16
|
| 299 |
+
- `per_device_eval_batch_size`: 16
|
| 300 |
+
- `gradient_accumulation_steps`: 2
|
| 301 |
+
- `learning_rate`: 1.5e-05
|
| 302 |
+
- `num_train_epochs`: 5
|
| 303 |
+
- `warmup_ratio`: 0.1
|
| 304 |
+
- `bf16`: True
|
| 305 |
+
- `batch_sampler`: no_duplicates
|
| 306 |
+
|
| 307 |
+
#### All Hyperparameters
|
| 308 |
+
<details><summary>Click to expand</summary>
|
| 309 |
+
|
| 310 |
+
- `overwrite_output_dir`: False
|
| 311 |
+
- `do_predict`: False
|
| 312 |
+
- `eval_strategy`: steps
|
| 313 |
+
- `prediction_loss_only`: True
|
| 314 |
+
- `per_device_train_batch_size`: 16
|
| 315 |
+
- `per_device_eval_batch_size`: 16
|
| 316 |
+
- `per_gpu_train_batch_size`: None
|
| 317 |
+
- `per_gpu_eval_batch_size`: None
|
| 318 |
+
- `gradient_accumulation_steps`: 2
|
| 319 |
+
- `eval_accumulation_steps`: None
|
| 320 |
+
- `torch_empty_cache_steps`: None
|
| 321 |
+
- `learning_rate`: 1.5e-05
|
| 322 |
+
- `weight_decay`: 0.0
|
| 323 |
+
- `adam_beta1`: 0.9
|
| 324 |
+
- `adam_beta2`: 0.999
|
| 325 |
+
- `adam_epsilon`: 1e-08
|
| 326 |
+
- `max_grad_norm`: 1.0
|
| 327 |
+
- `num_train_epochs`: 5
|
| 328 |
+
- `max_steps`: -1
|
| 329 |
+
- `lr_scheduler_type`: linear
|
| 330 |
+
- `lr_scheduler_kwargs`: {}
|
| 331 |
+
- `warmup_ratio`: 0.1
|
| 332 |
+
- `warmup_steps`: 0
|
| 333 |
+
- `log_level`: passive
|
| 334 |
+
- `log_level_replica`: warning
|
| 335 |
+
- `log_on_each_node`: True
|
| 336 |
+
- `logging_nan_inf_filter`: True
|
| 337 |
+
- `save_safetensors`: True
|
| 338 |
+
- `save_on_each_node`: False
|
| 339 |
+
- `save_only_model`: False
|
| 340 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 341 |
+
- `no_cuda`: False
|
| 342 |
+
- `use_cpu`: False
|
| 343 |
+
- `use_mps_device`: False
|
| 344 |
+
- `seed`: 42
|
| 345 |
+
- `data_seed`: None
|
| 346 |
+
- `jit_mode_eval`: False
|
| 347 |
+
- `use_ipex`: False
|
| 348 |
+
- `bf16`: True
|
| 349 |
+
- `fp16`: False
|
| 350 |
+
- `fp16_opt_level`: O1
|
| 351 |
+
- `half_precision_backend`: auto
|
| 352 |
+
- `bf16_full_eval`: False
|
| 353 |
+
- `fp16_full_eval`: False
|
| 354 |
+
- `tf32`: None
|
| 355 |
+
- `local_rank`: 0
|
| 356 |
+
- `ddp_backend`: None
|
| 357 |
+
- `tpu_num_cores`: None
|
| 358 |
+
- `tpu_metrics_debug`: False
|
| 359 |
+
- `debug`: []
|
| 360 |
+
- `dataloader_drop_last`: False
|
| 361 |
+
- `dataloader_num_workers`: 0
|
| 362 |
+
- `dataloader_prefetch_factor`: None
|
| 363 |
+
- `past_index`: -1
|
| 364 |
+
- `disable_tqdm`: False
|
| 365 |
+
- `remove_unused_columns`: True
|
| 366 |
+
- `label_names`: None
|
| 367 |
+
- `load_best_model_at_end`: False
|
| 368 |
+
- `ignore_data_skip`: False
|
| 369 |
+
- `fsdp`: []
|
| 370 |
+
- `fsdp_min_num_params`: 0
|
| 371 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 372 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 373 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 374 |
+
- `deepspeed`: None
|
| 375 |
+
- `label_smoothing_factor`: 0.0
|
| 376 |
+
- `optim`: adamw_torch
|
| 377 |
+
- `optim_args`: None
|
| 378 |
+
- `adafactor`: False
|
| 379 |
+
- `group_by_length`: False
|
| 380 |
+
- `length_column_name`: length
|
| 381 |
+
- `ddp_find_unused_parameters`: None
|
| 382 |
+
- `ddp_bucket_cap_mb`: None
|
| 383 |
+
- `ddp_broadcast_buffers`: False
|
| 384 |
+
- `dataloader_pin_memory`: True
|
| 385 |
+
- `dataloader_persistent_workers`: False
|
| 386 |
+
- `skip_memory_metrics`: True
|
| 387 |
+
- `use_legacy_prediction_loop`: False
|
| 388 |
+
- `push_to_hub`: False
|
| 389 |
+
- `resume_from_checkpoint`: None
|
| 390 |
+
- `hub_model_id`: None
|
| 391 |
+
- `hub_strategy`: every_save
|
| 392 |
+
- `hub_private_repo`: None
|
| 393 |
+
- `hub_always_push`: False
|
| 394 |
+
- `gradient_checkpointing`: False
|
| 395 |
+
- `gradient_checkpointing_kwargs`: None
|
| 396 |
+
- `include_inputs_for_metrics`: False
|
| 397 |
+
- `include_for_metrics`: []
|
| 398 |
+
- `eval_do_concat_batches`: True
|
| 399 |
+
- `fp16_backend`: auto
|
| 400 |
+
- `push_to_hub_model_id`: None
|
| 401 |
+
- `push_to_hub_organization`: None
|
| 402 |
+
- `mp_parameters`:
|
| 403 |
+
- `auto_find_batch_size`: False
|
| 404 |
+
- `full_determinism`: False
|
| 405 |
+
- `torchdynamo`: None
|
| 406 |
+
- `ray_scope`: last
|
| 407 |
+
- `ddp_timeout`: 1800
|
| 408 |
+
- `torch_compile`: False
|
| 409 |
+
- `torch_compile_backend`: None
|
| 410 |
+
- `torch_compile_mode`: None
|
| 411 |
+
- `dispatch_batches`: None
|
| 412 |
+
- `split_batches`: None
|
| 413 |
+
- `include_tokens_per_second`: False
|
| 414 |
+
- `include_num_input_tokens_seen`: False
|
| 415 |
+
- `neftune_noise_alpha`: None
|
| 416 |
+
- `optim_target_modules`: None
|
| 417 |
+
- `batch_eval_metrics`: False
|
| 418 |
+
- `eval_on_start`: False
|
| 419 |
+
- `use_liger_kernel`: False
|
| 420 |
+
- `eval_use_gather_object`: False
|
| 421 |
+
- `average_tokens_across_devices`: False
|
| 422 |
+
- `prompts`: None
|
| 423 |
+
- `batch_sampler`: no_duplicates
|
| 424 |
+
- `multi_dataset_batch_sampler`: proportional
|
| 425 |
+
- `router_mapping`: {}
|
| 426 |
+
- `learning_rate_mapping`: {}
|
| 427 |
+
|
| 428 |
+
</details>
|
| 429 |
+
|
| 430 |
+
### Training Logs
|
| 431 |
+
| Epoch | Step | Training Loss | Validation Loss | quora_duplicates_dev_cosine_ap |
|
| 432 |
+
|:------:|:----:|:-------------:|:---------------:|:------------------------------:|
|
| 433 |
+
| 0.5333 | 100 | 0.2145 | - | - |
|
| 434 |
+
| 1.064 | 200 | 0.185 | 0.0903 | - |
|
| 435 |
+
| 1.5973 | 300 | 0.1472 | - | - |
|
| 436 |
+
| 2.128 | 400 | 0.1361 | 0.0914 | - |
|
| 437 |
+
| 2.6613 | 500 | 0.1045 | - | - |
|
| 438 |
+
| 3.192 | 600 | 0.0879 | 0.0957 | - |
|
| 439 |
+
| 3.7253 | 700 | 0.067 | - | - |
|
| 440 |
+
| 4.256 | 800 | 0.0617 | 0.1003 | - |
|
| 441 |
+
| 4.7893 | 900 | 0.0529 | - | - |
|
| 442 |
+
| -1 | -1 | - | - | 0.7166 |
|
| 443 |
+
|
| 444 |
+
|
| 445 |
+
### Framework Versions
|
| 446 |
+
- Python: 3.10.8
|
| 447 |
+
- Sentence Transformers: 5.0.0
|
| 448 |
+
- Transformers: 4.49.0
|
| 449 |
+
- PyTorch: 2.1.2+cu118
|
| 450 |
+
- Accelerate: 1.4.0
|
| 451 |
+
- Datasets: 3.3.2
|
| 452 |
+
- Tokenizers: 0.21.0
|
| 453 |
+
|
| 454 |
+
## Citation
|
| 455 |
+
|
| 456 |
+
### BibTeX
|
| 457 |
+
|
| 458 |
+
#### Sentence Transformers
|
| 459 |
+
```bibtex
|
| 460 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 461 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 462 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 463 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 464 |
+
month = "11",
|
| 465 |
+
year = "2019",
|
| 466 |
+
publisher = "Association for Computational Linguistics",
|
| 467 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 468 |
+
}
|
| 469 |
+
```
|
| 470 |
+
|
| 471 |
+
#### ContrastiveLoss
|
| 472 |
+
```bibtex
|
| 473 |
+
@inproceedings{hadsell2006dimensionality,
|
| 474 |
+
author={Hadsell, R. and Chopra, S. and LeCun, Y.},
|
| 475 |
+
booktitle={2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'06)},
|
| 476 |
+
title={Dimensionality Reduction by Learning an Invariant Mapping},
|
| 477 |
+
year={2006},
|
| 478 |
+
volume={2},
|
| 479 |
+
number={},
|
| 480 |
+
pages={1735-1742},
|
| 481 |
+
doi={10.1109/CVPR.2006.100}
|
| 482 |
+
}
|
| 483 |
+
```
|
| 484 |
+
|
| 485 |
+
<!--
|
| 486 |
+
## Glossary
|
| 487 |
+
|
| 488 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 489 |
+
-->
|
| 490 |
+
|
| 491 |
+
<!--
|
| 492 |
+
## Model Card Authors
|
| 493 |
+
|
| 494 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 495 |
+
-->
|
| 496 |
+
|
| 497 |
+
<!--
|
| 498 |
+
## Model Card Contact
|
| 499 |
+
|
| 500 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 501 |
+
-->
|
config.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "autodl-tmp/roberta-large-v1",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"RobertaModel"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": 0,
|
| 8 |
+
"classifier_dropout": null,
|
| 9 |
+
"eos_token_id": 2,
|
| 10 |
+
"gradient_checkpointing": false,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 1024,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 4096,
|
| 16 |
+
"layer_norm_eps": 1e-05,
|
| 17 |
+
"max_position_embeddings": 514,
|
| 18 |
+
"model_type": "roberta",
|
| 19 |
+
"num_attention_heads": 16,
|
| 20 |
+
"num_hidden_layers": 24,
|
| 21 |
+
"pad_token_id": 1,
|
| 22 |
+
"position_embedding_type": "absolute",
|
| 23 |
+
"torch_dtype": "float32",
|
| 24 |
+
"transformers_version": "4.49.0",
|
| 25 |
+
"type_vocab_size": 1,
|
| 26 |
+
"use_cache": true,
|
| 27 |
+
"vocab_size": 50265
|
| 28 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "5.0.0",
|
| 4 |
+
"transformers": "4.49.0",
|
| 5 |
+
"pytorch": "2.1.2+cu118"
|
| 6 |
+
},
|
| 7 |
+
"model_type": "SentenceTransformer",
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "cosine"
|
| 14 |
+
}
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3966b343bf67ae889c92438fbea6f37a76a88d7eea5b5168751310eb3c0847f9
|
| 3 |
+
size 1421483904
|
modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 256,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"cls_token": {
|
| 10 |
+
"content": "<s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"eos_token": {
|
| 17 |
+
"content": "</s>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"mask_token": {
|
| 24 |
+
"content": "<mask>",
|
| 25 |
+
"lstrip": true,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"pad_token": {
|
| 31 |
+
"content": "<pad>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
},
|
| 37 |
+
"sep_token": {
|
| 38 |
+
"content": "</s>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false
|
| 43 |
+
},
|
| 44 |
+
"unk_token": {
|
| 45 |
+
"content": "<unk>",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false
|
| 50 |
+
}
|
| 51 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"0": {
|
| 5 |
+
"content": "<s>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": false,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
},
|
| 12 |
+
"1": {
|
| 13 |
+
"content": "<pad>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false,
|
| 18 |
+
"special": true
|
| 19 |
+
},
|
| 20 |
+
"2": {
|
| 21 |
+
"content": "</s>",
|
| 22 |
+
"lstrip": false,
|
| 23 |
+
"normalized": false,
|
| 24 |
+
"rstrip": false,
|
| 25 |
+
"single_word": false,
|
| 26 |
+
"special": true
|
| 27 |
+
},
|
| 28 |
+
"3": {
|
| 29 |
+
"content": "<unk>",
|
| 30 |
+
"lstrip": false,
|
| 31 |
+
"normalized": false,
|
| 32 |
+
"rstrip": false,
|
| 33 |
+
"single_word": false,
|
| 34 |
+
"special": true
|
| 35 |
+
},
|
| 36 |
+
"50264": {
|
| 37 |
+
"content": "<mask>",
|
| 38 |
+
"lstrip": true,
|
| 39 |
+
"normalized": false,
|
| 40 |
+
"rstrip": false,
|
| 41 |
+
"single_word": false,
|
| 42 |
+
"special": true
|
| 43 |
+
}
|
| 44 |
+
},
|
| 45 |
+
"bos_token": "<s>",
|
| 46 |
+
"clean_up_tokenization_spaces": false,
|
| 47 |
+
"cls_token": "<s>",
|
| 48 |
+
"eos_token": "</s>",
|
| 49 |
+
"errors": "replace",
|
| 50 |
+
"extra_special_tokens": {},
|
| 51 |
+
"mask_token": "<mask>",
|
| 52 |
+
"max_length": 128,
|
| 53 |
+
"model_max_length": 256,
|
| 54 |
+
"pad_to_multiple_of": null,
|
| 55 |
+
"pad_token": "<pad>",
|
| 56 |
+
"pad_token_type_id": 0,
|
| 57 |
+
"padding_side": "right",
|
| 58 |
+
"sep_token": "</s>",
|
| 59 |
+
"stride": 0,
|
| 60 |
+
"tokenizer_class": "RobertaTokenizer",
|
| 61 |
+
"trim_offsets": true,
|
| 62 |
+
"truncation_side": "right",
|
| 63 |
+
"truncation_strategy": "longest_first",
|
| 64 |
+
"unk_token": "<unk>"
|
| 65 |
+
}
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|