ThreatLevelD commited on
Commit
9b2f3b7
·
1 Parent(s): bea66c7

Upgrade EILProcessor to world-class signal normalization: adds subphrase/keyword blend detection, chunk weighting by model confidence, negation/contrast handling, emotion arc trajectory output, and sentiment-to-emotion mapping for non-EI language. Significantly improves long-form and ambiguous emotional inference.

Browse files
Files changed (1) hide show
  1. core/eil_processor.py +84 -29
core/eil_processor.py CHANGED
@@ -1,5 +1,5 @@
1
  # core/eil_processor.py
2
- # Master Emotional Core (MEC) - EIL Processor (Signal Normalization Edition)
3
 
4
  import yaml
5
  import re
@@ -42,11 +42,11 @@ class EILProcessor:
42
  # Emotion keyword dictionary for signal normalization/blending
43
  self.emotion_keyword_map = {
44
  "FAM-ANG": ["anger", "angry", "hate", "furious", "rage", "resentment"],
45
- "FAM-HEL": ["helpless", "powerless", "can't", "unable", "trapped", "stuck"],
46
- "FAM-SAD": ["sad", "down", "unhappy", "miserable", "depressed", "blue"],
47
- "FAM-FEA": ["afraid", "scared", "fear", "terrified", "worried", "nervous", "anxious"],
48
- "FAM-LOV": ["love", "loved", "loving", "caring", "affection"],
49
- "FAM-JOY": ["joy", "happy", "excited", "delighted", "content"],
50
  "FAM-SUR": ["surprised", "amazed", "astonished", "shocked"],
51
  "FAM-DIS": ["disgust", "disgusted", "gross", "revolted"],
52
  "FAM-SHA": ["ashamed", "shame", "embarrassed", "humiliated"],
@@ -54,9 +54,23 @@ class EILProcessor:
54
  # Add more as needed
55
  }
56
 
57
- # Load tokenizer and model
 
 
 
 
 
 
 
 
 
 
 
 
58
  self.tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-emotion')
59
  self.model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-emotion')
 
 
60
 
61
  def normalize_text(self, text):
62
  normalization_map = {
@@ -79,24 +93,36 @@ class EILProcessor:
79
  clause_markers = [',', ';', '.', 'but', 'because', 'so that', 'which', 'when', 'while']
80
  token_count = len(text.split())
81
  clause_hits = any(marker in text for marker in clause_markers)
82
- if token_count > 12 or clause_hits:
83
- return True
84
- return False
85
 
86
  def chunk_story(self, text):
87
- # Also split on conjunctions and relative pronouns, not just punctuation
88
  chunks = re.split(r'[.,;!?]|\b(?:and|but|because|so|although|though|while|when)\b', text, flags=re.IGNORECASE)
89
  chunks = [chunk.strip() for chunk in chunks if chunk and chunk.strip()]
90
  return chunks
91
 
92
- def detect_emotion_blend(self, norm_text):
93
  blend = {}
94
  for fam, keywords in self.emotion_keyword_map.items():
95
  for kw in keywords:
 
 
 
 
 
 
96
  if kw in norm_text:
97
  blend[fam] = blend.get(fam, 0) + 1.0
98
  return blend
99
 
 
 
 
 
 
 
 
 
 
100
  def infer_emotion(self, input_text):
101
  norm_text = self.normalize_text(input_text)
102
 
@@ -113,7 +139,8 @@ class EILProcessor:
113
  'primary_emotion_code': emotion_data['primary_emotion_code'],
114
  'arc': emotion_data['arc'],
115
  'resonance': emotion_data['resonance'],
116
- 'blend': {emotion_data['primary_emotion_code']: 1.0}
 
117
  }
118
  return packet
119
 
@@ -126,35 +153,41 @@ class EILProcessor:
126
 
127
  chunk_results = []
128
  blend_accum = {}
 
129
 
130
  for chunk in chunks:
131
  sub_result = self.infer_emotion(chunk) # RECURSIVE CALL
132
  chunk_results.append(sub_result)
133
- # Accumulate blends
 
134
  for fam, val in sub_result.get('blend', {}).items():
135
- blend_accum[fam] = blend_accum.get(fam, 0) + val
 
 
 
136
 
137
  # Normalize blend
138
  if blend_accum:
139
  total = sum(blend_accum.values())
140
  for k in blend_accum:
141
  blend_accum[k] /= total
142
-
143
  dominant_family = max(blend_accum.items(), key=lambda x: x[1])[0]
144
  else:
145
  dominant_family = "FAM-NEU"
146
  blend_accum = {"FAM-NEU": 1.0}
 
147
 
148
  emotion_data = self.codex_informer.resolve_emotion_family(dominant_family)
149
  packet = {
150
  'phrases': [input_text] + [r['phrases'][0] for r in chunk_results],
151
- 'emotion_candidates': [{'phrase': r['phrases'][0], 'candidate_emotion': r['primary_emotion_code']} for r in chunk_results],
152
  'metadata': {'source': 'EILProcessor (story mode)', 'input_type': input_type},
153
  'emotion_family': emotion_data['emotion_family'],
154
  'primary_emotion_code': emotion_data['primary_emotion_code'],
155
  'arc': emotion_data['arc'],
156
  'resonance': emotion_data['resonance'],
157
- 'blend': blend_accum
 
158
  }
159
  return packet
160
 
@@ -171,7 +204,8 @@ class EILProcessor:
171
  'primary_emotion_code': emotion_data['primary_emotion_code'],
172
  'arc': emotion_data['arc'],
173
  'resonance': emotion_data['resonance'],
174
- 'blend': {emotion_data['primary_emotion_code']: 1.0}
 
175
  }
176
  return packet
177
 
@@ -189,14 +223,14 @@ class EILProcessor:
189
  'primary_emotion_code': variant_code,
190
  'arc': 'Pending',
191
  'resonance': 'Pending',
192
- 'blend': {variant_code: 1.0}
 
193
  }
194
  return packet
195
 
196
- # 5️⃣ Signal normalization - keyword blend check
197
- blend = self.detect_emotion_blend(norm_text)
198
  if blend:
199
- # Normalize
200
  total = sum(blend.values())
201
  for k in blend:
202
  blend[k] /= total
@@ -211,12 +245,32 @@ class EILProcessor:
211
  'primary_emotion_code': emotion_data['primary_emotion_code'],
212
  'arc': emotion_data['arc'],
213
  'resonance': emotion_data['resonance'],
214
- 'blend': blend
 
215
  }
216
  return packet
217
 
218
- # 6️⃣ Model fallback
219
- print(f"[EILProcessor] No crosswalk/alias/keyword match — running model on: '{norm_text}'")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  tokens = self.tokenizer(norm_text, return_tensors='pt')
221
  with torch.no_grad():
222
  logits = self.model(**tokens).logits
@@ -243,15 +297,16 @@ class EILProcessor:
243
  primary_emotion_code = model_to_codex_map.get(predicted_label.lower(), "FAM-NEU")
244
  emotion_data = self.codex_informer.resolve_emotion_family(primary_emotion_code)
245
  blend = {emotion_data['primary_emotion_code']: 1.0}
246
-
247
  packet = {
248
  'phrases': [input_text],
249
  'emotion_candidates': [{'phrase': input_text, 'candidate_emotion': predicted_label}],
250
- 'metadata': {'source': 'EILProcessor (model)', 'input_type': input_type},
251
  'emotion_family': emotion_data['emotion_family'],
252
  'primary_emotion_code': emotion_data['primary_emotion_code'],
253
  'arc': emotion_data['arc'],
254
  'resonance': emotion_data['resonance'],
255
- 'blend': blend
 
 
256
  }
257
  return packet
 
1
  # core/eil_processor.py
2
+ # MEC EIL Processor World-Class Signal Normalization Edition
3
 
4
  import yaml
5
  import re
 
42
  # Emotion keyword dictionary for signal normalization/blending
43
  self.emotion_keyword_map = {
44
  "FAM-ANG": ["anger", "angry", "hate", "furious", "rage", "resentment"],
45
+ "FAM-HEL": ["helpless", "powerless", "can't", "unable", "trapped", "stuck", "overwhelmed", "overwhelm"],
46
+ "FAM-SAD": ["sad", "down", "unhappy", "miserable", "depressed", "blue", "empty"],
47
+ "FAM-FEA": ["afraid", "scared", "fear", "terrified", "worried", "nervous", "anxious", "can't sleep"],
48
+ "FAM-LOV": ["love", "loved", "loving", "caring", "affection", "proud"],
49
+ "FAM-JOY": ["joy", "happy", "excited", "delighted", "content", "proud"],
50
  "FAM-SUR": ["surprised", "amazed", "astonished", "shocked"],
51
  "FAM-DIS": ["disgust", "disgusted", "gross", "revolted"],
52
  "FAM-SHA": ["ashamed", "shame", "embarrassed", "humiliated"],
 
54
  # Add more as needed
55
  }
56
 
57
+ # For sentiment-to-emotion mapping of ambiguous/indirect language
58
+ self.sentiment_cue_map = [
59
+ # (sentiment, regex or cue, mapped emotion)
60
+ ("negative", r"can.?t sleep|insomnia|restless|wake up", "FAM-FEA"),
61
+ ("negative", r"too much|overwhelmed|can.?t cope|can.?t deal", "FAM-HEL"),
62
+ ("negative", r"nothing feels right|empty|pointless|no purpose", "FAM-SAD"),
63
+ ("negative", r"don't care|apathy|numb", "FAM-LON"),
64
+ ("positive", r"did it|proud|relieved", "FAM-JOY"),
65
+ ("neutral", r"just tired|exhausted", "FAM-HEL"),
66
+ # ...add more for coverage
67
+ ]
68
+
69
+ # Load emotion and sentiment models
70
  self.tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-emotion')
71
  self.model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-emotion')
72
+ self.sentiment_tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment-latest')
73
+ self.sentiment_model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment-latest')
74
 
75
  def normalize_text(self, text):
76
  normalization_map = {
 
93
  clause_markers = [',', ';', '.', 'but', 'because', 'so that', 'which', 'when', 'while']
94
  token_count = len(text.split())
95
  clause_hits = any(marker in text for marker in clause_markers)
96
+ return token_count > 12 or clause_hits
 
 
97
 
98
  def chunk_story(self, text):
 
99
  chunks = re.split(r'[.,;!?]|\b(?:and|but|because|so|although|though|while|when)\b', text, flags=re.IGNORECASE)
100
  chunks = [chunk.strip() for chunk in chunks if chunk and chunk.strip()]
101
  return chunks
102
 
103
+ def detect_emotion_blend_with_negation(self, norm_text):
104
  blend = {}
105
  for fam, keywords in self.emotion_keyword_map.items():
106
  for kw in keywords:
107
+ negation_patterns = [
108
+ rf"not {kw}", rf"no longer {kw}", rf"never {kw}",
109
+ rf"no {kw}", rf"\bwithout {kw}"
110
+ ]
111
+ if any(re.search(p, norm_text) for p in negation_patterns):
112
+ continue
113
  if kw in norm_text:
114
  blend[fam] = blend.get(fam, 0) + 1.0
115
  return blend
116
 
117
+ def get_sentiment(self, norm_text):
118
+ tokens = self.sentiment_tokenizer(norm_text, return_tensors='pt')
119
+ with torch.no_grad():
120
+ logits = self.sentiment_model(**tokens).logits
121
+ probs = F.softmax(logits, dim=-1).squeeze()
122
+ top_prob, top_idx = torch.max(probs, dim=-1)
123
+ sentiment_label = self.sentiment_model.config.id2label[top_idx.item()]
124
+ return sentiment_label.lower(), top_prob.item()
125
+
126
  def infer_emotion(self, input_text):
127
  norm_text = self.normalize_text(input_text)
128
 
 
139
  'primary_emotion_code': emotion_data['primary_emotion_code'],
140
  'arc': emotion_data['arc'],
141
  'resonance': emotion_data['resonance'],
142
+ 'blend': {emotion_data['primary_emotion_code']: 1.0},
143
+ 'trajectory': [emotion_data['primary_emotion_code']],
144
  }
145
  return packet
146
 
 
153
 
154
  chunk_results = []
155
  blend_accum = {}
156
+ trajectory = []
157
 
158
  for chunk in chunks:
159
  sub_result = self.infer_emotion(chunk) # RECURSIVE CALL
160
  chunk_results.append(sub_result)
161
+ # Accumulate blends (weighted by confidence if available)
162
+ conf = sub_result.get('confidence', 1.0)
163
  for fam, val in sub_result.get('blend', {}).items():
164
+ blend_accum[fam] = blend_accum.get(fam, 0) + val * conf
165
+ # Trajectory
166
+ if 'primary_emotion_code' in sub_result:
167
+ trajectory.append(sub_result['primary_emotion_code'])
168
 
169
  # Normalize blend
170
  if blend_accum:
171
  total = sum(blend_accum.values())
172
  for k in blend_accum:
173
  blend_accum[k] /= total
 
174
  dominant_family = max(blend_accum.items(), key=lambda x: x[1])[0]
175
  else:
176
  dominant_family = "FAM-NEU"
177
  blend_accum = {"FAM-NEU": 1.0}
178
+ trajectory = ["FAM-NEU"]
179
 
180
  emotion_data = self.codex_informer.resolve_emotion_family(dominant_family)
181
  packet = {
182
  'phrases': [input_text] + [r['phrases'][0] for r in chunk_results],
183
+ 'emotion_candidates': [{'phrase': r['phrases'][0], 'candidate_emotion': r.get('primary_emotion_code', 'FAM-NEU')} for r in chunk_results],
184
  'metadata': {'source': 'EILProcessor (story mode)', 'input_type': input_type},
185
  'emotion_family': emotion_data['emotion_family'],
186
  'primary_emotion_code': emotion_data['primary_emotion_code'],
187
  'arc': emotion_data['arc'],
188
  'resonance': emotion_data['resonance'],
189
+ 'blend': blend_accum,
190
+ 'trajectory': trajectory,
191
  }
192
  return packet
193
 
 
204
  'primary_emotion_code': emotion_data['primary_emotion_code'],
205
  'arc': emotion_data['arc'],
206
  'resonance': emotion_data['resonance'],
207
+ 'blend': {emotion_data['primary_emotion_code']: 1.0},
208
+ 'trajectory': [emotion_data['primary_emotion_code']],
209
  }
210
  return packet
211
 
 
223
  'primary_emotion_code': variant_code,
224
  'arc': 'Pending',
225
  'resonance': 'Pending',
226
+ 'blend': {variant_code: 1.0},
227
+ 'trajectory': [variant_code],
228
  }
229
  return packet
230
 
231
+ # 5️⃣ Signal normalization - blend detection & negation
232
+ blend = self.detect_emotion_blend_with_negation(norm_text)
233
  if blend:
 
234
  total = sum(blend.values())
235
  for k in blend:
236
  blend[k] /= total
 
245
  'primary_emotion_code': emotion_data['primary_emotion_code'],
246
  'arc': emotion_data['arc'],
247
  'resonance': emotion_data['resonance'],
248
+ 'blend': blend,
249
+ 'trajectory': [primary_code],
250
  }
251
  return packet
252
 
253
+ # 6️⃣ Sentiment-to-emotion mapping for non-EI language
254
+ sentiment, sentiment_conf = self.get_sentiment(norm_text)
255
+ print(f"[EILProcessor] Sentiment fallback: {sentiment} ({sentiment_conf:.2f})")
256
+ for sent, cue, fam in self.sentiment_cue_map:
257
+ if sent == sentiment and re.search(cue, norm_text):
258
+ emotion_data = self.codex_informer.resolve_emotion_family(fam)
259
+ packet = {
260
+ 'phrases': [input_text],
261
+ 'emotion_candidates': [{'phrase': input_text, 'candidate_emotion': fam}],
262
+ 'metadata': {'source': 'EILProcessor (sentiment-to-emotion)', 'input_type': input_type},
263
+ 'emotion_family': emotion_data['emotion_family'],
264
+ 'primary_emotion_code': emotion_data['primary_emotion_code'],
265
+ 'arc': emotion_data['arc'],
266
+ 'resonance': emotion_data['resonance'],
267
+ 'blend': {fam: 1.0},
268
+ 'trajectory': [fam],
269
+ }
270
+ return packet
271
+
272
+ # 7️⃣ Model fallback (last resort)
273
+ print(f"[EILProcessor] No crosswalk/alias/keyword/sentiment match — running model on: '{norm_text}'")
274
  tokens = self.tokenizer(norm_text, return_tensors='pt')
275
  with torch.no_grad():
276
  logits = self.model(**tokens).logits
 
297
  primary_emotion_code = model_to_codex_map.get(predicted_label.lower(), "FAM-NEU")
298
  emotion_data = self.codex_informer.resolve_emotion_family(primary_emotion_code)
299
  blend = {emotion_data['primary_emotion_code']: 1.0}
 
300
  packet = {
301
  'phrases': [input_text],
302
  'emotion_candidates': [{'phrase': input_text, 'candidate_emotion': predicted_label}],
303
+ 'metadata': {'source': 'EILProcessor (model)', 'input_type': input_type, 'confidence': confidence},
304
  'emotion_family': emotion_data['emotion_family'],
305
  'primary_emotion_code': emotion_data['primary_emotion_code'],
306
  'arc': emotion_data['arc'],
307
  'resonance': emotion_data['resonance'],
308
+ 'blend': blend,
309
+ 'trajectory': [emotion_data['primary_emotion_code']],
310
+ 'confidence': confidence
311
  }
312
  return packet