Spaces:

EmpathyEthicist
/

mec-functional-empathy-demo

Running

App Files Files Community

ThreatLevelD commited on Jun 24

Commit

9b2f3b7

1 Parent(s): bea66c7

Upgrade EILProcessor to world-class signal normalization: adds subphrase/keyword blend detection, chunk weighting by model confidence, negation/contrast handling, emotion arc trajectory output, and sentiment-to-emotion mapping for non-EI language. Significantly improves long-form and ambiguous emotional inference.

Browse files

Files changed (1) hide show

core/eil_processor.py +84 -29

core/eil_processor.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # core/eil_processor.py
-# Master Emotional Core (MEC) - EIL Processor (Signal Normalization Edition)
 import yaml
 import re
@@ -42,11 +42,11 @@ class EILProcessor:
         # Emotion keyword dictionary for signal normalization/blending
         self.emotion_keyword_map = {
             "FAM-ANG": ["anger", "angry", "hate", "furious", "rage", "resentment"],
-            "FAM-HEL": ["helpless", "powerless", "can't", "unable", "trapped", "stuck"],
-            "FAM-SAD": ["sad", "down", "unhappy", "miserable", "depressed", "blue"],
-            "FAM-FEA": ["afraid", "scared", "fear", "terrified", "worried", "nervous", "anxious"],
-            "FAM-LOV": ["love", "loved", "loving", "caring", "affection"],
-            "FAM-JOY": ["joy", "happy", "excited", "delighted", "content"],
             "FAM-SUR": ["surprised", "amazed", "astonished", "shocked"],
             "FAM-DIS": ["disgust", "disgusted", "gross", "revolted"],
             "FAM-SHA": ["ashamed", "shame", "embarrassed", "humiliated"],
@@ -54,9 +54,23 @@ class EILProcessor:
             # Add more as needed
         }
-        # Load tokenizer and model
         self.tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-emotion')
         self.model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-emotion')
     def normalize_text(self, text):
         normalization_map = {
@@ -79,24 +93,36 @@ class EILProcessor:
         clause_markers = [',', ';', '.', 'but', 'because', 'so that', 'which', 'when', 'while']
         token_count = len(text.split())
         clause_hits = any(marker in text for marker in clause_markers)
-        if token_count > 12 or clause_hits:
-            return True
-        return False
     def chunk_story(self, text):
-        # Also split on conjunctions and relative pronouns, not just punctuation
         chunks = re.split(r'[.,;!?]|\b(?:and|but|because|so|although|though|while|when)\b', text, flags=re.IGNORECASE)
         chunks = [chunk.strip() for chunk in chunks if chunk and chunk.strip()]
         return chunks
-    def detect_emotion_blend(self, norm_text):
         blend = {}
         for fam, keywords in self.emotion_keyword_map.items():
             for kw in keywords:
                 if kw in norm_text:
                     blend[fam] = blend.get(fam, 0) + 1.0
         return blend
     def infer_emotion(self, input_text):
         norm_text = self.normalize_text(input_text)
@@ -113,7 +139,8 @@ class EILProcessor:
                 'primary_emotion_code': emotion_data['primary_emotion_code'],
                 'arc': emotion_data['arc'],
                 'resonance': emotion_data['resonance'],
-                'blend': {emotion_data['primary_emotion_code']: 1.0}
             }
             return packet
@@ -126,35 +153,41 @@ class EILProcessor:
             chunk_results = []
             blend_accum = {}
             for chunk in chunks:
                 sub_result = self.infer_emotion(chunk)  # RECURSIVE CALL
                 chunk_results.append(sub_result)
-                # Accumulate blends
                 for fam, val in sub_result.get('blend', {}).items():
-                    blend_accum[fam] = blend_accum.get(fam, 0) + val
             # Normalize blend
             if blend_accum:
                 total = sum(blend_accum.values())
                 for k in blend_accum:
                     blend_accum[k] /= total
                 dominant_family = max(blend_accum.items(), key=lambda x: x[1])[0]
             else:
                 dominant_family = "FAM-NEU"
                 blend_accum = {"FAM-NEU": 1.0}
             emotion_data = self.codex_informer.resolve_emotion_family(dominant_family)
             packet = {
                 'phrases': [input_text] + [r['phrases'][0] for r in chunk_results],
-                'emotion_candidates': [{'phrase': r['phrases'][0], 'candidate_emotion': r['primary_emotion_code']} for r in chunk_results],
                 'metadata': {'source': 'EILProcessor (story mode)', 'input_type': input_type},
                 'emotion_family': emotion_data['emotion_family'],
                 'primary_emotion_code': emotion_data['primary_emotion_code'],
                 'arc': emotion_data['arc'],
                 'resonance': emotion_data['resonance'],
-                'blend': blend_accum
             }
             return packet
@@ -171,7 +204,8 @@ class EILProcessor:
                 'primary_emotion_code': emotion_data['primary_emotion_code'],
                 'arc': emotion_data['arc'],
                 'resonance': emotion_data['resonance'],
-                'blend': {emotion_data['primary_emotion_code']: 1.0}
             }
             return packet
@@ -189,14 +223,14 @@ class EILProcessor:
                 'primary_emotion_code': variant_code,
                 'arc': 'Pending',
                 'resonance': 'Pending',
-                'blend': {variant_code: 1.0}
             }
             return packet
-        # 5️⃣ Signal normalization - keyword blend check
-        blend = self.detect_emotion_blend(norm_text)
         if blend:
-            # Normalize
             total = sum(blend.values())
             for k in blend:
                 blend[k] /= total
@@ -211,12 +245,32 @@ class EILProcessor:
                 'primary_emotion_code': emotion_data['primary_emotion_code'],
                 'arc': emotion_data['arc'],
                 'resonance': emotion_data['resonance'],
-                'blend': blend
             }
             return packet
-        # 6️⃣ Model fallback
-        print(f"[EILProcessor] No crosswalk/alias/keyword match — running model on: '{norm_text}'")
         tokens = self.tokenizer(norm_text, return_tensors='pt')
         with torch.no_grad():
             logits = self.model(**tokens).logits
@@ -243,15 +297,16 @@ class EILProcessor:
         primary_emotion_code = model_to_codex_map.get(predicted_label.lower(), "FAM-NEU")
         emotion_data = self.codex_informer.resolve_emotion_family(primary_emotion_code)
         blend = {emotion_data['primary_emotion_code']: 1.0}
         packet = {
             'phrases': [input_text],
             'emotion_candidates': [{'phrase': input_text, 'candidate_emotion': predicted_label}],
-            'metadata': {'source': 'EILProcessor (model)', 'input_type': input_type},
             'emotion_family': emotion_data['emotion_family'],
             'primary_emotion_code': emotion_data['primary_emotion_code'],
             'arc': emotion_data['arc'],
             'resonance': emotion_data['resonance'],
-            'blend': blend
         }
         return packet

 # core/eil_processor.py
+# MEC EIL Processor – World-Class Signal Normalization Edition
 import yaml
 import re
         # Emotion keyword dictionary for signal normalization/blending
         self.emotion_keyword_map = {
             "FAM-ANG": ["anger", "angry", "hate", "furious", "rage", "resentment"],
+            "FAM-HEL": ["helpless", "powerless", "can't", "unable", "trapped", "stuck", "overwhelmed", "overwhelm"],
+            "FAM-SAD": ["sad", "down", "unhappy", "miserable", "depressed", "blue", "empty"],
+            "FAM-FEA": ["afraid", "scared", "fear", "terrified", "worried", "nervous", "anxious", "can't sleep"],
+            "FAM-LOV": ["love", "loved", "loving", "caring", "affection", "proud"],
+            "FAM-JOY": ["joy", "happy", "excited", "delighted", "content", "proud"],
             "FAM-SUR": ["surprised", "amazed", "astonished", "shocked"],
             "FAM-DIS": ["disgust", "disgusted", "gross", "revolted"],
             "FAM-SHA": ["ashamed", "shame", "embarrassed", "humiliated"],
             # Add more as needed
         }
+        # For sentiment-to-emotion mapping of ambiguous/indirect language
+        self.sentiment_cue_map = [
+            # (sentiment, regex or cue, mapped emotion)
+            ("negative", r"can.?t sleep|insomnia|restless|wake up", "FAM-FEA"),
+            ("negative", r"too much|overwhelmed|can.?t cope|can.?t deal", "FAM-HEL"),
+            ("negative", r"nothing feels right|empty|pointless|no purpose", "FAM-SAD"),
+            ("negative", r"don't care|apathy|numb", "FAM-LON"),
+            ("positive", r"did it|proud|relieved", "FAM-JOY"),
+            ("neutral", r"just tired|exhausted", "FAM-HEL"),
+            # ...add more for coverage
+        ]
+        # Load emotion and sentiment models
         self.tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-emotion')
         self.model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-emotion')
+        self.sentiment_tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment-latest')
+        self.sentiment_model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment-latest')
     def normalize_text(self, text):
         normalization_map = {
         clause_markers = [',', ';', '.', 'but', 'because', 'so that', 'which', 'when', 'while']
         token_count = len(text.split())
         clause_hits = any(marker in text for marker in clause_markers)
+        return token_count > 12 or clause_hits
     def chunk_story(self, text):
         chunks = re.split(r'[.,;!?]|\b(?:and|but|because|so|although|though|while|when)\b', text, flags=re.IGNORECASE)
         chunks = [chunk.strip() for chunk in chunks if chunk and chunk.strip()]
         return chunks
+    def detect_emotion_blend_with_negation(self, norm_text):
         blend = {}
         for fam, keywords in self.emotion_keyword_map.items():
             for kw in keywords:
+                negation_patterns = [
+                    rf"not {kw}", rf"no longer {kw}", rf"never {kw}",
+                    rf"no {kw}", rf"\bwithout {kw}"
+                ]
+                if any(re.search(p, norm_text) for p in negation_patterns):
+                    continue
                 if kw in norm_text:
                     blend[fam] = blend.get(fam, 0) + 1.0
         return blend
+    def get_sentiment(self, norm_text):
+        tokens = self.sentiment_tokenizer(norm_text, return_tensors='pt')
+        with torch.no_grad():
+            logits = self.sentiment_model(**tokens).logits
+            probs = F.softmax(logits, dim=-1).squeeze()
+            top_prob, top_idx = torch.max(probs, dim=-1)
+        sentiment_label = self.sentiment_model.config.id2label[top_idx.item()]
+        return sentiment_label.lower(), top_prob.item()
     def infer_emotion(self, input_text):
         norm_text = self.normalize_text(input_text)
                 'primary_emotion_code': emotion_data['primary_emotion_code'],
                 'arc': emotion_data['arc'],
                 'resonance': emotion_data['resonance'],
+                'blend': {emotion_data['primary_emotion_code']: 1.0},
+                'trajectory': [emotion_data['primary_emotion_code']],
             }
             return packet
             chunk_results = []
             blend_accum = {}
+            trajectory = []
             for chunk in chunks:
                 sub_result = self.infer_emotion(chunk)  # RECURSIVE CALL
                 chunk_results.append(sub_result)
+                # Accumulate blends (weighted by confidence if available)
+                conf = sub_result.get('confidence', 1.0)
                 for fam, val in sub_result.get('blend', {}).items():
+                    blend_accum[fam] = blend_accum.get(fam, 0) + val * conf
+                # Trajectory
+                if 'primary_emotion_code' in sub_result:
+                    trajectory.append(sub_result['primary_emotion_code'])
             # Normalize blend
             if blend_accum:
                 total = sum(blend_accum.values())
                 for k in blend_accum:
                     blend_accum[k] /= total
                 dominant_family = max(blend_accum.items(), key=lambda x: x[1])[0]
             else:
                 dominant_family = "FAM-NEU"
                 blend_accum = {"FAM-NEU": 1.0}
+                trajectory = ["FAM-NEU"]
             emotion_data = self.codex_informer.resolve_emotion_family(dominant_family)
             packet = {
                 'phrases': [input_text] + [r['phrases'][0] for r in chunk_results],
+                'emotion_candidates': [{'phrase': r['phrases'][0], 'candidate_emotion': r.get('primary_emotion_code', 'FAM-NEU')} for r in chunk_results],
                 'metadata': {'source': 'EILProcessor (story mode)', 'input_type': input_type},
                 'emotion_family': emotion_data['emotion_family'],
                 'primary_emotion_code': emotion_data['primary_emotion_code'],
                 'arc': emotion_data['arc'],
                 'resonance': emotion_data['resonance'],
+                'blend': blend_accum,
+                'trajectory': trajectory,
             }
             return packet
                 'primary_emotion_code': emotion_data['primary_emotion_code'],
                 'arc': emotion_data['arc'],
                 'resonance': emotion_data['resonance'],
+                'blend': {emotion_data['primary_emotion_code']: 1.0},
+                'trajectory': [emotion_data['primary_emotion_code']],
             }
             return packet
                 'primary_emotion_code': variant_code,
                 'arc': 'Pending',
                 'resonance': 'Pending',
+                'blend': {variant_code: 1.0},
+                'trajectory': [variant_code],
             }
             return packet
+        # 5️⃣ Signal normalization - blend detection & negation
+        blend = self.detect_emotion_blend_with_negation(norm_text)
         if blend:
             total = sum(blend.values())
             for k in blend:
                 blend[k] /= total
                 'primary_emotion_code': emotion_data['primary_emotion_code'],
                 'arc': emotion_data['arc'],
                 'resonance': emotion_data['resonance'],
+                'blend': blend,
+                'trajectory': [primary_code],
             }
             return packet
+        # 6️⃣ Sentiment-to-emotion mapping for non-EI language
+        sentiment, sentiment_conf = self.get_sentiment(norm_text)
+        print(f"[EILProcessor] Sentiment fallback: {sentiment} ({sentiment_conf:.2f})")
+        for sent, cue, fam in self.sentiment_cue_map:
+            if sent == sentiment and re.search(cue, norm_text):
+                emotion_data = self.codex_informer.resolve_emotion_family(fam)
+                packet = {
+                    'phrases': [input_text],
+                    'emotion_candidates': [{'phrase': input_text, 'candidate_emotion': fam}],
+                    'metadata': {'source': 'EILProcessor (sentiment-to-emotion)', 'input_type': input_type},
+                    'emotion_family': emotion_data['emotion_family'],
+                    'primary_emotion_code': emotion_data['primary_emotion_code'],
+                    'arc': emotion_data['arc'],
+                    'resonance': emotion_data['resonance'],
+                    'blend': {fam: 1.0},
+                    'trajectory': [fam],
+                }
+                return packet
+        # 7️⃣ Model fallback (last resort)
+        print(f"[EILProcessor] No crosswalk/alias/keyword/sentiment match — running model on: '{norm_text}'")
         tokens = self.tokenizer(norm_text, return_tensors='pt')
         with torch.no_grad():
             logits = self.model(**tokens).logits
         primary_emotion_code = model_to_codex_map.get(predicted_label.lower(), "FAM-NEU")
         emotion_data = self.codex_informer.resolve_emotion_family(primary_emotion_code)
         blend = {emotion_data['primary_emotion_code']: 1.0}
         packet = {
             'phrases': [input_text],
             'emotion_candidates': [{'phrase': input_text, 'candidate_emotion': predicted_label}],
+            'metadata': {'source': 'EILProcessor (model)', 'input_type': input_type, 'confidence': confidence},
             'emotion_family': emotion_data['emotion_family'],
             'primary_emotion_code': emotion_data['primary_emotion_code'],
             'arc': emotion_data['arc'],
             'resonance': emotion_data['resonance'],
+            'blend': blend,
+            'trajectory': [emotion_data['primary_emotion_code']],
+            'confidence': confidence
         }
         return packet