metadata
base_model: intfloat/multilingual-e5-small
datasets: []
language: []
library_name: sentence-transformers
metrics:
- cosine_accuracy
- cosine_accuracy_threshold
- cosine_f1
- cosine_f1_threshold
- cosine_precision
- cosine_recall
- cosine_ap
- dot_accuracy
- dot_accuracy_threshold
- dot_f1
- dot_f1_threshold
- dot_precision
- dot_recall
- dot_ap
- manhattan_accuracy
- manhattan_accuracy_threshold
- manhattan_f1
- manhattan_f1_threshold
- manhattan_precision
- manhattan_recall
- manhattan_ap
- euclidean_accuracy
- euclidean_accuracy_threshold
- euclidean_f1
- euclidean_f1_threshold
- euclidean_precision
- euclidean_recall
- euclidean_ap
- max_accuracy
- max_accuracy_threshold
- max_f1
- max_f1_threshold
- max_precision
- max_recall
- max_ap
pipeline_tag: sentence-similarity
tags:
- sentence-transformers
- sentence-similarity
- feature-extraction
- generated_from_trainer
- dataset_size:1273
- loss:OnlineContrastiveLoss
widget:
- source_sentence: Where can I buy organic vegetables?
sentences:
- Primary export product of Saudi Arabia
- Share info about Amazon
- Where can I buy organic fruits?
- source_sentence: How to open a bank account in the UK?
sentences:
- Steps to open a bank account in the United Kingdom
- How many weeks in a month?
- What is the process for turning in an expense report?
- source_sentence: What is the population of Tokyo?
sentences:
- What is the smallest planet in the solar system?
- Author of the play 'Hamlet'
- What is the population of Osaka?
- source_sentence: How to visit the Great Wall of China?
sentences:
- Where can I buy a new laptop?
- How do I close a bank account?
- Guide to visiting the Great Wall of China
- source_sentence: Who is the President of the United States?
sentences:
- What is the velocity of sound?
- Who is the current US President?
- History of the Byzantine Empire
model-index:
- name: SentenceTransformer based on intfloat/multilingual-e5-small
results:
- task:
type: binary-classification
name: Binary Classification
dataset:
name: pair class dev
type: pair-class-dev
metrics:
- type: cosine_accuracy
value: 0.6206896551724138
name: Cosine Accuracy
- type: cosine_accuracy_threshold
value: 0.9036016464233398
name: Cosine Accuracy Threshold
- type: cosine_f1
value: 0.7192575406032483
name: Cosine F1
- type: cosine_f1_threshold
value: 0.9036016464233398
name: Cosine F1 Threshold
- type: cosine_precision
value: 0.5827067669172933
name: Cosine Precision
- type: cosine_recall
value: 0.9393939393939394
name: Cosine Recall
- type: cosine_ap
value: 0.6366493234478966
name: Cosine Ap
- type: dot_accuracy
value: 0.6206896551724138
name: Dot Accuracy
- type: dot_accuracy_threshold
value: 0.9036016464233398
name: Dot Accuracy Threshold
- type: dot_f1
value: 0.7192575406032483
name: Dot F1
- type: dot_f1_threshold
value: 0.9036016464233398
name: Dot F1 Threshold
- type: dot_precision
value: 0.5827067669172933
name: Dot Precision
- type: dot_recall
value: 0.9393939393939394
name: Dot Recall
- type: dot_ap
value: 0.6366493234478966
name: Dot Ap
- type: manhattan_accuracy
value: 0.6175548589341693
name: Manhattan Accuracy
- type: manhattan_accuracy_threshold
value: 6.501791000366211
name: Manhattan Accuracy Threshold
- type: manhattan_f1
value: 0.7232142857142857
name: Manhattan F1
- type: manhattan_f1_threshold
value: 7.142887115478516
name: Manhattan F1 Threshold
- type: manhattan_precision
value: 0.5724381625441696
name: Manhattan Precision
- type: manhattan_recall
value: 0.9818181818181818
name: Manhattan Recall
- type: manhattan_ap
value: 0.64137074777591
name: Manhattan Ap
- type: euclidean_accuracy
value: 0.6206896551724138
name: Euclidean Accuracy
- type: euclidean_accuracy_threshold
value: 0.43908166885375977
name: Euclidean Accuracy Threshold
- type: euclidean_f1
value: 0.7192575406032483
name: Euclidean F1
- type: euclidean_f1_threshold
value: 0.43908166885375977
name: Euclidean F1 Threshold
- type: euclidean_precision
value: 0.5827067669172933
name: Euclidean Precision
- type: euclidean_recall
value: 0.9393939393939394
name: Euclidean Recall
- type: euclidean_ap
value: 0.6366493234478966
name: Euclidean Ap
- type: max_accuracy
value: 0.6206896551724138
name: Max Accuracy
- type: max_accuracy_threshold
value: 6.501791000366211
name: Max Accuracy Threshold
- type: max_f1
value: 0.7232142857142857
name: Max F1
- type: max_f1_threshold
value: 7.142887115478516
name: Max F1 Threshold
- type: max_precision
value: 0.5827067669172933
name: Max Precision
- type: max_recall
value: 0.9818181818181818
name: Max Recall
- type: max_ap
value: 0.64137074777591
name: Max Ap
- task:
type: binary-classification
name: Binary Classification
dataset:
name: pair class test
type: pair-class-test
metrics:
- type: cosine_accuracy
value: 0.8934169278996865
name: Cosine Accuracy
- type: cosine_accuracy_threshold
value: 0.7770164012908936
name: Cosine Accuracy Threshold
- type: cosine_f1
value: 0.9034090909090907
name: Cosine F1
- type: cosine_f1_threshold
value: 0.7750071287155151
name: Cosine F1 Threshold
- type: cosine_precision
value: 0.8502673796791443
name: Cosine Precision
- type: cosine_recall
value: 0.9636363636363636
name: Cosine Recall
- type: cosine_ap
value: 0.9467412947017336
name: Cosine Ap
- type: dot_accuracy
value: 0.8934169278996865
name: Dot Accuracy
- type: dot_accuracy_threshold
value: 0.7770164012908936
name: Dot Accuracy Threshold
- type: dot_f1
value: 0.9034090909090907
name: Dot F1
- type: dot_f1_threshold
value: 0.7750071287155151
name: Dot F1 Threshold
- type: dot_precision
value: 0.8502673796791443
name: Dot Precision
- type: dot_recall
value: 0.9636363636363636
name: Dot Recall
- type: dot_ap
value: 0.9467412947017336
name: Dot Ap
- type: manhattan_accuracy
value: 0.890282131661442
name: Manhattan Accuracy
- type: manhattan_accuracy_threshold
value: 9.908584594726562
name: Manhattan Accuracy Threshold
- type: manhattan_f1
value: 0.9002849002849003
name: Manhattan F1
- type: manhattan_f1_threshold
value: 10.437429428100586
name: Manhattan F1 Threshold
- type: manhattan_precision
value: 0.8494623655913979
name: Manhattan Precision
- type: manhattan_recall
value: 0.9575757575757575
name: Manhattan Recall
- type: manhattan_ap
value: 0.9451852140210413
name: Manhattan Ap
- type: euclidean_accuracy
value: 0.8934169278996865
name: Euclidean Accuracy
- type: euclidean_accuracy_threshold
value: 0.6678076386451721
name: Euclidean Accuracy Threshold
- type: euclidean_f1
value: 0.9034090909090907
name: Euclidean F1
- type: euclidean_f1_threshold
value: 0.6708062887191772
name: Euclidean F1 Threshold
- type: euclidean_precision
value: 0.8502673796791443
name: Euclidean Precision
- type: euclidean_recall
value: 0.9636363636363636
name: Euclidean Recall
- type: euclidean_ap
value: 0.9467412947017336
name: Euclidean Ap
- type: max_accuracy
value: 0.8934169278996865
name: Max Accuracy
- type: max_accuracy_threshold
value: 9.908584594726562
name: Max Accuracy Threshold
- type: max_f1
value: 0.9034090909090907
name: Max F1
- type: max_f1_threshold
value: 10.437429428100586
name: Max F1 Threshold
- type: max_precision
value: 0.8502673796791443
name: Max Precision
- type: max_recall
value: 0.9636363636363636
name: Max Recall
- type: max_ap
value: 0.9467412947017336
name: Max Ap
SentenceTransformer based on intfloat/multilingual-e5-small
This is a sentence-transformers model finetuned from intfloat/multilingual-e5-small. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
Model Details
Model Description
- Model Type: Sentence Transformer
- Base model: intfloat/multilingual-e5-small
- Maximum Sequence Length: 512 tokens
- Output Dimensionality: 384 tokens
- Similarity Function: Cosine Similarity
Model Sources
- Documentation: Sentence Transformers Documentation
- Repository: Sentence Transformers on GitHub
- Hugging Face: Sentence Transformers on Hugging Face
Full Model Architecture
SentenceTransformer(
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
(2): Normalize()
)
Usage
Direct Usage (Sentence Transformers)
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import SentenceTransformer
# Download from the 🤗 Hub
model = SentenceTransformer("srikarvar/fine_tuned_model_4")
# Run inference
sentences = [
'Who is the President of the United States?',
'Who is the current US President?',
'What is the velocity of sound?',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 384]
# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]
Evaluation
Metrics
Binary Classification
- Dataset:
pair-class-dev - Evaluated with
BinaryClassificationEvaluator
| Metric | Value |
|---|---|
| cosine_accuracy | 0.6207 |
| cosine_accuracy_threshold | 0.9036 |
| cosine_f1 | 0.7193 |
| cosine_f1_threshold | 0.9036 |
| cosine_precision | 0.5827 |
| cosine_recall | 0.9394 |
| cosine_ap | 0.6366 |
| dot_accuracy | 0.6207 |
| dot_accuracy_threshold | 0.9036 |
| dot_f1 | 0.7193 |
| dot_f1_threshold | 0.9036 |
| dot_precision | 0.5827 |
| dot_recall | 0.9394 |
| dot_ap | 0.6366 |
| manhattan_accuracy | 0.6176 |
| manhattan_accuracy_threshold | 6.5018 |
| manhattan_f1 | 0.7232 |
| manhattan_f1_threshold | 7.1429 |
| manhattan_precision | 0.5724 |
| manhattan_recall | 0.9818 |
| manhattan_ap | 0.6414 |
| euclidean_accuracy | 0.6207 |
| euclidean_accuracy_threshold | 0.4391 |
| euclidean_f1 | 0.7193 |
| euclidean_f1_threshold | 0.4391 |
| euclidean_precision | 0.5827 |
| euclidean_recall | 0.9394 |
| euclidean_ap | 0.6366 |
| max_accuracy | 0.6207 |
| max_accuracy_threshold | 6.5018 |
| max_f1 | 0.7232 |
| max_f1_threshold | 7.1429 |
| max_precision | 0.5827 |
| max_recall | 0.9818 |
| max_ap | 0.6414 |
Binary Classification
- Dataset:
pair-class-test - Evaluated with
BinaryClassificationEvaluator
| Metric | Value |
|---|---|
| cosine_accuracy | 0.8934 |
| cosine_accuracy_threshold | 0.777 |
| cosine_f1 | 0.9034 |
| cosine_f1_threshold | 0.775 |
| cosine_precision | 0.8503 |
| cosine_recall | 0.9636 |
| cosine_ap | 0.9467 |
| dot_accuracy | 0.8934 |
| dot_accuracy_threshold | 0.777 |
| dot_f1 | 0.9034 |
| dot_f1_threshold | 0.775 |
| dot_precision | 0.8503 |
| dot_recall | 0.9636 |
| dot_ap | 0.9467 |
| manhattan_accuracy | 0.8903 |
| manhattan_accuracy_threshold | 9.9086 |
| manhattan_f1 | 0.9003 |
| manhattan_f1_threshold | 10.4374 |
| manhattan_precision | 0.8495 |
| manhattan_recall | 0.9576 |
| manhattan_ap | 0.9452 |
| euclidean_accuracy | 0.8934 |
| euclidean_accuracy_threshold | 0.6678 |
| euclidean_f1 | 0.9034 |
| euclidean_f1_threshold | 0.6708 |
| euclidean_precision | 0.8503 |
| euclidean_recall | 0.9636 |
| euclidean_ap | 0.9467 |
| max_accuracy | 0.8934 |
| max_accuracy_threshold | 9.9086 |
| max_f1 | 0.9034 |
| max_f1_threshold | 10.4374 |
| max_precision | 0.8503 |
| max_recall | 0.9636 |
| max_ap | 0.9467 |
Training Details
Training Dataset
Unnamed Dataset
- Size: 1,273 training samples
- Columns:
sentence1,label, andsentence2 - Approximate statistics based on the first 1000 samples:
sentence1 label sentence2 type string int string details - min: 6 tokens
- mean: 10.93 tokens
- max: 28 tokens
- 0: ~48.90%
- 1: ~51.10%
- min: 5 tokens
- mean: 10.29 tokens
- max: 22 tokens
- Samples:
sentence1 label sentence2 What are the main ingredients in a traditional pizza Margherita?1What ingredients are used in a classic pizza Margherita?Release date of the iPhone 140Release date of the iPhone 13Who won the first Nobel Prize in Literature?0Who won the first Nobel Prize in Peace? - Loss:
OnlineContrastiveLoss
Evaluation Dataset
Unnamed Dataset
- Size: 319 evaluation samples
- Columns:
sentence1,label, andsentence2 - Approximate statistics based on the first 1000 samples:
sentence1 label sentence2 type string int string details - min: 6 tokens
- mean: 11.12 tokens
- max: 22 tokens
- 0: ~48.28%
- 1: ~51.72%
- min: 4 tokens
- mean: 10.52 tokens
- max: 21 tokens
- Samples:
sentence1 label sentence2 How many bones are in the human body?1Total bones in an adult humanWhat is the price of an iPhone 12?0What is the price of an iPhone 11?What are the different types of renewable energy?1What are the various forms of renewable energy? - Loss:
OnlineContrastiveLoss
Training Hyperparameters
Non-Default Hyperparameters
eval_strategy: epochper_device_train_batch_size: 32per_device_eval_batch_size: 32gradient_accumulation_steps: 2num_train_epochs: 4warmup_ratio: 0.1load_best_model_at_end: Trueoptim: adamw_torch_fusedbatch_sampler: no_duplicates
All Hyperparameters
Click to expand
overwrite_output_dir: Falsedo_predict: Falseeval_strategy: epochprediction_loss_only: Trueper_device_train_batch_size: 32per_device_eval_batch_size: 32per_gpu_train_batch_size: Noneper_gpu_eval_batch_size: Nonegradient_accumulation_steps: 2eval_accumulation_steps: Nonelearning_rate: 5e-05weight_decay: 0.0adam_beta1: 0.9adam_beta2: 0.999adam_epsilon: 1e-08max_grad_norm: 1.0num_train_epochs: 4max_steps: -1lr_scheduler_type: linearlr_scheduler_kwargs: {}warmup_ratio: 0.1warmup_steps: 0log_level: passivelog_level_replica: warninglog_on_each_node: Truelogging_nan_inf_filter: Truesave_safetensors: Truesave_on_each_node: Falsesave_only_model: Falserestore_callback_states_from_checkpoint: Falseno_cuda: Falseuse_cpu: Falseuse_mps_device: Falseseed: 42data_seed: Nonejit_mode_eval: Falseuse_ipex: Falsebf16: Falsefp16: Falsefp16_opt_level: O1half_precision_backend: autobf16_full_eval: Falsefp16_full_eval: Falsetf32: Nonelocal_rank: 0ddp_backend: Nonetpu_num_cores: Nonetpu_metrics_debug: Falsedebug: []dataloader_drop_last: Falsedataloader_num_workers: 0dataloader_prefetch_factor: Nonepast_index: -1disable_tqdm: Falseremove_unused_columns: Truelabel_names: Noneload_best_model_at_end: Trueignore_data_skip: Falsefsdp: []fsdp_min_num_params: 0fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}fsdp_transformer_layer_cls_to_wrap: Noneaccelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}deepspeed: Nonelabel_smoothing_factor: 0.0optim: adamw_torch_fusedoptim_args: Noneadafactor: Falsegroup_by_length: Falselength_column_name: lengthddp_find_unused_parameters: Noneddp_bucket_cap_mb: Noneddp_broadcast_buffers: Falsedataloader_pin_memory: Truedataloader_persistent_workers: Falseskip_memory_metrics: Trueuse_legacy_prediction_loop: Falsepush_to_hub: Falseresume_from_checkpoint: Nonehub_model_id: Nonehub_strategy: every_savehub_private_repo: Falsehub_always_push: Falsegradient_checkpointing: Falsegradient_checkpointing_kwargs: Noneinclude_inputs_for_metrics: Falseeval_do_concat_batches: Truefp16_backend: autopush_to_hub_model_id: Nonepush_to_hub_organization: Nonemp_parameters:auto_find_batch_size: Falsefull_determinism: Falsetorchdynamo: Noneray_scope: lastddp_timeout: 1800torch_compile: Falsetorch_compile_backend: Nonetorch_compile_mode: Nonedispatch_batches: Nonesplit_batches: Noneinclude_tokens_per_second: Falseinclude_num_input_tokens_seen: Falseneftune_noise_alpha: Noneoptim_target_modules: Nonebatch_eval_metrics: Falsebatch_sampler: no_duplicatesmulti_dataset_batch_sampler: proportional
Training Logs
| Epoch | Step | Training Loss | loss | pair-class-dev_max_ap | pair-class-test_max_ap |
|---|---|---|---|---|---|
| 0 | 0 | - | - | 0.6414 | - |
| 0.5 | 10 | 1.9407 | - | - | - |
| 1.0 | 20 | 0.9729 | 0.6810 | - | - |
| 1.475 | 30 | 0.4822 | - | - | - |
| 1.975 | 40 | 0.4062 | - | - | - |
| 2.025 | 41 | - | 0.5953 | - | - |
| 2.45 | 50 | 0.2894 | - | - | - |
| 2.95 | 60 | 0.1977 | - | - | - |
| 3.0 | 61 | - | 0.5318 | - | - |
| 3.425 | 70 | 0.1999 | - | - | - |
| 3.925 | 80 | 0.1491 | 0.5159 | - | 0.9467 |
- The bold row denotes the saved checkpoint.
Framework Versions
- Python: 3.10.12
- Sentence Transformers: 3.0.1
- Transformers: 4.41.2
- PyTorch: 2.1.2+cu121
- Accelerate: 0.32.1
- Datasets: 2.19.1
- Tokenizers: 0.19.1
Citation
BibTeX
Sentence Transformers
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}