Spaces:
Sleeping
Sleeping
| import sys | |
| import os | |
| sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) | |
| import numpy as np | |
| from transformers import pipeline | |
| from typing import List | |
| from utils.config import load_config | |
| class EntailmentAnalyzer: | |
| # def __init__(self, config_path: str): | |
| def __init__(self, config): | |
| """ | |
| Initialize the EntailmentAnalyzer with the config file path. | |
| Args: | |
| config_path: The path to the configuration file. | |
| """ | |
| # self.config = load_config(config_path)['PECCAVI_TEXT']['Entailment'] | |
| self.config = config | |
| self.entailment_pipeline = pipeline(task=self.config['task'], model=self.config['model']) | |
| def check_entailment(self, premise: str, hypothesis: str) -> float: | |
| """ | |
| Check entailment between the premise and hypothesis. | |
| Args: | |
| premise: The premise sentence. | |
| hypothesis: The hypothesis sentence. | |
| Returns: | |
| float: The entailment score. | |
| """ | |
| results = self.entailment_pipeline(f"{premise} [SEP] {hypothesis}", top_k=None) | |
| entailment_score = next(item['score'] for item in results if item['label'] == 'entailment') | |
| return entailment_score | |
| def analyze_entailment(self, original_sentence: str, paraphrased_sentences: List[str], threshold: float) -> tuple: | |
| """ | |
| Analyze entailment scores for paraphrased sentences. If no selected sentences are found, | |
| lower the threshold and rerun the analysis. | |
| Args: | |
| original_sentence: The original sentence. | |
| paraphrased_sentences: List of paraphrased sentences. | |
| threshold: Minimum score to select a sentence. | |
| Returns: | |
| tuple: A dictionary of all scores, selected sentences, and discarded sentences. | |
| """ | |
| all_sentences = {} | |
| selected_sentences = {} | |
| discarded_sentences = {} | |
| # Loop to reduce threshold if no sentences are selected | |
| while not selected_sentences: | |
| for paraphrased_sentence in paraphrased_sentences: | |
| entailment_score = self.check_entailment(original_sentence, paraphrased_sentence) | |
| all_sentences[paraphrased_sentence] = entailment_score | |
| if entailment_score >= threshold: | |
| selected_sentences[paraphrased_sentence] = entailment_score | |
| else: | |
| discarded_sentences[paraphrased_sentence] = entailment_score | |
| # If no sentences are selected, lower the threshold | |
| if not selected_sentences: | |
| print(f"No selected sentences found. Lowering the threshold by 0.1 (from {threshold} to {threshold - 0.1}).") | |
| threshold -= 0.1 | |
| if threshold <= 0: | |
| print("Threshold has reached 0. No sentences meet the criteria.") | |
| break | |
| return all_sentences, selected_sentences, discarded_sentences | |
| if __name__ == "__main__": | |
| config_path = os.path.join(os.path.dirname(__file__), '..', 'config', 'config.yaml') | |
| config_path = '/home/ashhar21137/text_wm/scratch/utils/config/config.yaml' | |
| config = load_config(config_path) | |
| entailment_analyzer = EntailmentAnalyzer(config['PECCAVI_TEXT']['Entailment']) | |
| all_sentences, selected_sentences, discarded_sentences = entailment_analyzer.analyze_entailment( | |
| "The weather is nice today", | |
| [ | |
| "The climate is pleasant today", | |
| "It's a good day weather-wise", | |
| "Today, the weather is terrible", | |
| "What a beautiful day it is", | |
| "The sky is clear and the weather is perfect", | |
| "It's pouring rain outside today", | |
| "The weather isn't bad today", | |
| "A lovely day for outdoor activities" | |
| ], | |
| 0.7 | |
| ) | |
| print("----------------------- All Sentences -----------------------") | |
| print(all_sentences) | |
| print("----------------------- Discarded Sentences -----------------------") | |
| print(discarded_sentences) | |
| print("----------------------- Selected Sentences -----------------------") | |
| print(selected_sentences) | |