import re
def create_matching_pattern(word):
"""Creates appropriate regex pattern based on word characteristics"""
escaped_word = re.escape(word)
# Check for special cases
if any(char in word for char in '&-/.\'()[]$€£¥+') or ' ' in word:
# Special handling for phrases with special characters or spaces
return rf'{escaped_word}'
elif word.endswith('%'):
# Special handling for percentage values
numeric_part = word[:-1]
return rf'\b{re.escape(numeric_part)}\s*%'
elif re.search(r'[0-9]', word) and re.search(r'[a-zA-Z]', word):
# Special handling for alphanumeric combinations
return rf'{escaped_word}'
else:
# Standard word boundary pattern for simple words
return rf'\b{escaped_word}\b'
def highlight_common_words(common_words, sentences, title):
"""
Highlight common words in sentences by adding color-coded background and unique IDs.
Args:
common_words (list of tuples): List of tuples where each tuple contains a word's index and the word.
sentences (list of str): List of sentences to search through.
title (str): The title for the HTML output.
Returns:
str: HTML string with the highlighted sentences.
"""
color_map = {}
color_index = 0
highlighted_html = []
pastel_colors = ['#E199C6','#7FB3D5', '#E57373', '#B388EB', '#80D9AA', '#F0B66B',
"#73D9A0", "#9A89EB", "#E38AD4", "#7FAFDB", "#DDAF8C", "#DA7FC1",
"#65CFA5", "#B38FDE", "#E6C97A"]
# Process each sentence
for idx, sentence in enumerate(sentences, start=1):
sentence_with_idx = f"{idx}. {sentence}"
highlighted_sentence = sentence_with_idx
# Highlight common words in each sentence
for index, word in common_words:
if word not in color_map:
color_map[word] = pastel_colors[color_index % len(pastel_colors)]
color_index += 1
# Create appropriate pattern based on word characteristics
pattern = create_matching_pattern(word)
# Replace the word with highlighted version
highlighted_sentence = re.sub(
pattern,
lambda m, idx=index, color=color_map[word]: (
f''
f'{idx}'
f'{m.group(0)}'
f''
),
highlighted_sentence,
flags=re.IGNORECASE
)
highlighted_html.append(highlighted_sentence)
# Format the HTML output with the title
final_html = "
".join(highlighted_html)
return f'''