Spaces:
Sleeping
Sleeping
| import re | |
| def create_matching_pattern(word): | |
| """Creates appropriate regex pattern based on word characteristics""" | |
| escaped_word = re.escape(word) | |
| # Check for special cases | |
| if any(char in word for char in '&-/.\'()[]$€£¥+') or ' ' in word: | |
| # Special handling for phrases with special characters or spaces | |
| return rf'{escaped_word}' | |
| elif word.endswith('%'): | |
| # Special handling for percentage values | |
| numeric_part = word[:-1] | |
| return rf'\b{re.escape(numeric_part)}\s*%' | |
| elif re.search(r'[0-9]', word) and re.search(r'[a-zA-Z]', word): | |
| # Special handling for alphanumeric combinations | |
| return rf'{escaped_word}' | |
| else: | |
| # Standard word boundary pattern for simple words | |
| return rf'\b{escaped_word}\b' | |
| def highlight_common_words(common_words, sentences, title): | |
| """ | |
| Highlight common words in sentences by adding color-coded background and unique IDs. | |
| Args: | |
| common_words (list of tuples): List of tuples where each tuple contains a word's index and the word. | |
| sentences (list of str): List of sentences to search through. | |
| title (str): The title for the HTML output. | |
| Returns: | |
| str: HTML string with the highlighted sentences. | |
| """ | |
| color_map = {} | |
| color_index = 0 | |
| highlighted_html = [] | |
| pastel_colors = ['#E199C6','#7FB3D5', '#E57373', '#B388EB', '#80D9AA', '#F0B66B', | |
| "#73D9A0", "#9A89EB", "#E38AD4", "#7FAFDB", "#DDAF8C", "#DA7FC1", | |
| "#65CFA5", "#B38FDE", "#E6C97A"] | |
| # Process each sentence | |
| for idx, sentence in enumerate(sentences, start=1): | |
| sentence_with_idx = f"{idx}. {sentence}" | |
| highlighted_sentence = sentence_with_idx | |
| # Highlight common words in each sentence | |
| for index, word in common_words: | |
| if word not in color_map: | |
| color_map[word] = pastel_colors[color_index % len(pastel_colors)] | |
| color_index += 1 | |
| # Create appropriate pattern based on word characteristics | |
| pattern = create_matching_pattern(word) | |
| # Replace the word with highlighted version | |
| highlighted_sentence = re.sub( | |
| pattern, | |
| lambda m, idx=index, color=color_map[word]: ( | |
| f'<span style="background-color: {color}; font-weight: bold;' | |
| f' padding: 2px 4px; border-radius: 2px; position: relative;">' | |
| f'<span style="background-color: black; color: white; border-radius: 50%;' | |
| f' padding: 2px 5px; margin-right: 5px;">{idx}</span>' | |
| f'{m.group(0)}' | |
| f'</span>' | |
| ), | |
| highlighted_sentence, | |
| flags=re.IGNORECASE | |
| ) | |
| highlighted_html.append(highlighted_sentence) | |
| # Format the HTML output with the title | |
| final_html = "<br><br>".join(highlighted_html) | |
| return f''' | |
| <div style="border: solid 1px #FFFFFF; padding: 16px; background-color: #000000; color: #FFFFFF; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 8px;"> | |
| <h3 style="margin-top: 0; font-size: 1em; color: #FFFFFF;">{title}</h3> | |
| <div style="background-color: #333333; line-height: 1.6; padding: 15px; border-radius: 8px; color: #FFFFFF;">{final_html}</div> | |
| </div> | |
| ''' | |
| def highlight_common_words_dict(common_words, sentences, title): | |
| """ | |
| Highlight common words in sentences (from a dictionary) by adding color-coded background and unique IDs. | |
| Args: | |
| common_words (list of tuples): List of tuples where each tuple contains a word's index and the word. | |
| sentences (dict): A dictionary of sentences where the key is the sentence and the value is an entailment score. | |
| title (str): The title for the HTML output. | |
| Returns: | |
| str: HTML string with the highlighted sentences and their entailment scores. | |
| """ | |
| color_map = {} | |
| color_index = 0 | |
| highlighted_html = [] | |
| pastel_colors = ['#E199C6','#7FB3D5', '#E57373', '#B388EB', '#80D9AA', '#F0B66B', | |
| "#73D9A0", "#9A89EB", "#E38AD4", "#7FAFDB", "#DDAF8C", "#DA7FC1", | |
| "#65CFA5", "#B38FDE", "#E6C97A"] | |
| # Process each sentence and its score | |
| for idx, (sentence, score) in enumerate(sentences.items(), start=1): | |
| sentence_with_idx = f"{idx}. {sentence}" | |
| highlighted_sentence = sentence_with_idx | |
| # Highlight common words in each sentence | |
| for index, word in common_words: | |
| if word not in color_map: | |
| color_map[word] = pastel_colors[color_index % len(pastel_colors)] | |
| color_index += 1 | |
| # Create appropriate pattern based on word characteristics | |
| pattern = create_matching_pattern(word) | |
| # Replace the word with highlighted version | |
| highlighted_sentence = re.sub( | |
| pattern, | |
| lambda m, idx=index, color=color_map[word]: ( | |
| f'<span style="background-color: {color}; font-weight: bold;' | |
| f' padding: 1px 2px; border-radius: 2px; position: relative;">' | |
| f'<span style="background-color: black; color: white; border-radius: 50%;' | |
| f' padding: 1px 3px; margin-right: 3px; font-size: 0.8em;">{idx}</span>' | |
| f'{m.group(0)}' | |
| f'</span>' | |
| ), | |
| highlighted_sentence, | |
| flags=re.IGNORECASE | |
| ) | |
| # Add the entailment score | |
| highlighted_html.append( | |
| f'<div style="margin-bottom: 5px;">' | |
| f'{highlighted_sentence}' | |
| f'<div style="display: inline-block; margin-left: 5px; padding: 3px 5px; border-radius: 3px; ' | |
| f'background-color: #333333; color: white; font-size: 0.9em;">' | |
| f'Entailment Score: {score}</div></div>' | |
| ) | |
| # Format the HTML output with the title | |
| final_html = "<br>".join(highlighted_html) | |
| return f''' | |
| <div style="background-color: #000000; color: #FFFFFF;border: solid 1px #FFFFFF; border-radius: 8px;"> | |
| <h3 style="margin-top: 0; font-size: 1em; color: #FFFFFF;">{title}</h3> | |
| <div style="background-color: #333333; line-height: 1.6; padding: 15px; border-radius: 8px; color: #FFFFFF;">{final_html}</div> | |
| </div> | |
| ''' | |
| def reparaphrased_sentences_html(sentences): | |
| """ | |
| Create an HTML representation of sentences with numbering. | |
| Args: | |
| sentences (list of str): List of sentences to format. | |
| Returns: | |
| str: HTML string with numbered sentences. | |
| """ | |
| formatted_sentences = [] | |
| # Process each sentence | |
| for idx, sentence in enumerate(sentences, start=1): | |
| sentence_with_idx = f"{idx}. {sentence}" | |
| formatted_sentences.append(sentence_with_idx) | |
| # Format the HTML output | |
| final_html = "<br><br>".join(formatted_sentences) | |
| return f''' | |
| <div style="border: solid 1px #FFFFFF; background-color: #000000; color: #FFFFFF; | |
| box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 8px;"> | |
| <div style="background-color: #333333; line-height: 1.6; padding: 15px; border-radius: 8px;">{final_html}</div> | |
| </div> | |
| ''' | |
| if __name__ == "__main__": | |
| # Example usage | |
| common_words = [(1, "highlight"), (2, "numbering"), (3, "S&P 500")] | |
| sentences = ["This is a test to highlight words.", "Numbering is important for clarity.", "The S&P 500 index rose 2% today."] | |
| # Test highlight_common_words | |
| highlighted_html = highlight_common_words(common_words, sentences, "Test Highlighting") | |
| print(highlighted_html) | |
| # Test highlight_common_words_dict | |
| sentences_with_scores = {"Highlight words in this text.": 0.95, "Number sentences for clarity.": 0.8, "The S&P 500 index is a market benchmark.": 0.88} | |
| highlighted_html_dict = highlight_common_words_dict(common_words, sentences_with_scores, "Test Dict Highlighting") | |
| print(highlighted_html_dict) |