hhem

Sleeping

App Files Files Community

hhem / app.py

eaglelandsonce

Update app.py

2cd5e2c verified almost 2 years ago

raw

history blame

12.2 kB

	import streamlit as st
	import requests
	import json
	import os
	import pandas as pd
	from sentence_transformers import CrossEncoder
	import numpy as np
	import re

	from textwrap import dedent
	import google.generativeai as genai


	# Tool import
	from crewai.tools.gemini_tools import GeminiSearchTools
	from crewai.tools.mixtral_tools import MixtralSearchTools
	from crewai.tools.zephyr_tools import ZephyrSearchTools
	from crewai.tools.phi2_tools import Phi2SearchTools


	# Google Langchain
	from langchain_google_genai import GoogleGenerativeAI

	#Crew imports
	from crewai import Agent, Task, Crew, Process

	# Retrieve API Key from Environment Variable
	GOOGLE_AI_STUDIO = os.environ.get('GOOGLE_API_KEY')

	# Ensure the API key is available
	if not GOOGLE_AI_STUDIO:
	raise ValueError("API key not found. Please set the GOOGLE_AI_STUDIO2 environment variable.")

	# Set gemini_llm
	gemini_llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_AI_STUDIO)

	# CrewAI +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

	def crewai_process_gemini(research_topic):
	# Define your agents with roles and goals
	GeminiAgent = Agent(
	role='Summary Evaluator',
	goal='To learn how to manage her anxiety in social situations through group therapy.',
	backstory="""Skilled in running query evaluation""",
	verbose=True,
	allow_delegation=False,
	llm = gemini_llm,
	tools=[
	GeminiSearchTools.gemini_search

	]

	)


	# Create tasks for your agents
	task1 = Task(
	description=f"""Summarize {research_topic}""",
	agent=GeminiAgent
	)

	# Instantiate your crew with a sequential process
	crew = Crew(
	agents=[GeminiAgent],
	tasks=[task1],
	verbose=2,
	process=Process.sequential
	)

	# Get your crew to work!
	result = crew.kickoff()

	return result



	def crewai_process_mixtral_crazy(research_topic):
	# Define your agents with roles and goals
	MixtralCrazyAgent = Agent(
	role='Summary Evaluator',
	goal='Evaluate the summary using the HHEM-Victara Tuner',
	backstory="""Skilled in running query evaluation""",
	verbose=True,
	allow_delegation=False,
	llm = gemini_llm,
	tools=[
	MixtralSearchTools.mixtral_crazy
	]

	)


	# Create tasks for your agents
	task1 = Task(
	description=f"""Summarize {research_topic}""",
	agent=MixtralCrazyAgent
	)

	# Instantiate your crew with a sequential process
	crew = Crew(
	agents=[MixtralCrazyAgent],
	tasks=[task1],
	verbose=2,
	process=Process.sequential
	)

	# Get your crew to work!
	result = crew.kickoff()

	return result


	def crewai_process_mixtral_normal(research_topic):
	# Define your agents with roles and goals
	MixtralNormalAgent = Agent(
	role='Summary Evaluator',
	goal='Evaluate the summary using the HHEM-Victara Tuner',
	backstory="""Skilled in running query evaluation""",
	verbose=True,
	allow_delegation=False,
	llm = gemini_llm,
	tools=[
	MixtralSearchTools.mixtral_normal
	]

	)


	# Create tasks for your agents
	task1 = Task(
	description=f"""Summarize {research_topic}""",
	agent=MixtralNormalAgent
	)

	# Instantiate your crew with a sequential process
	crew = Crew(
	agents=[MixtralNormalAgent],
	tasks=[task1],
	verbose=2,
	process=Process.sequential
	)

	# Get your crew to work!
	result = crew.kickoff()

	return result


	def crewai_process_zephyr_normal(research_topic):
	# Define your agents with roles and goals
	ZephrNormalAgent = Agent(
	role='Summary Evaluator',
	goal='Evaluate the summary using the HHEM-Victara Tuner',
	backstory="""Skilled in running query evaluation""",
	verbose=True,
	allow_delegation=False,
	llm = gemini_llm,
	tools=[
	ZephyrSearchTools.zephyr_normal
	]

	)


	# Create tasks for your agents
	task1 = Task(
	description=f"""Summarize {research_topic}""",
	agent=ZephrNormalAgent
	)

	# Instantiate your crew with a sequential process
	crew = Crew(
	agents=[ZephrNormalAgent],
	tasks=[task1],
	verbose=2,
	process=Process.sequential
	)

	# Get your crew to work!
	result = crew.kickoff()

	return result


	def crewai_process_phi2(research_topic):
	# Define your agents with roles and goals
	Phi2Agent = Agent(
	role='Emily Mental Patient Graphic Designer Anxiety',
	goal='Evaluate the summary using the HHEM-Victara Tuner',
	backstory="""Skilled in running query evaluation""",
	verbose=True,
	allow_delegation=False,
	llm = gemini_llm,
	tools=[
	Phi2SearchTools.phi2_search
	]

	)


	# Create tasks for your agents
	task1 = Task(
	description=f"""Summarize {research_topic}""",
	agent=Phi2Agent
	)

	# Instantiate your crew with a sequential process
	crew = Crew(
	agents=[Phi2Agent],
	tasks=[task1],
	verbose=2,
	process=Process.sequential
	)

	# Get your crew to work!
	result = crew.kickoff()

	return result



	# Credentials ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

	corpus_id = os.environ['VECTARA_CORPUS_ID']
	customer_id = os.environ['VECTARA_CUSTOMER_ID']
	api_key = os.environ['VECTARA_API_KEY']


	# Get Data +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


	def get_post_headers() -> dict:
	"""Returns headers that should be attached to each post request."""
	return {
	"x-api-key": api_key,
	"customer-id": customer_id,
	"Content-Type": "application/json",
	}

	def query_vectara(query: str, filter_str="", lambda_val=0.0) -> str:
	corpus_key = {
	"customerId": customer_id,
	"corpusId": corpus_id,
	"lexicalInterpolationConfig": {"lambda": lambda_val},
	}
	if filter_str:
	corpus_key["metadataFilter"] = filter_str

	data = {
	"query": [
	{
	"query": query,
	"start": 0,
	"numResults": 10,
	"contextConfig": {
	"sentencesBefore": 2,
	"sentencesAfter": 2
	},
	"corpusKey": [corpus_key],
	"summary": [
	{
	"responseLang": "eng",
	"maxSummarizedResults": 5,
	"summarizerPromptName": "vectara-summary-ext-v1.2.0"
	},
	]
	}
	]
	}

	response = requests.post(
	"https://api.vectara.io/v1/query",
	headers=get_post_headers(),
	data=json.dumps(data),
	timeout=130,
	)

	if response.status_code != 200:
	st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})")
	return ""

	result = response.json()

	answer = result["responseSet"][0]["summary"][0]["text"]
	return re.sub(r'\[\d+(,\d+){0,5}\]', '', answer)



	# Initialize the HHEM model +++++++++++++++++++++++++++++++++++++++++++++++
	model = CrossEncoder('vectara/hallucination_evaluation_model')

	# Function to compute HHEM scores
	def compute_hhem_scores(texts, summary):
	pairs = [[text, summary] for text in texts]
	scores = model.predict(pairs)
	return scores

	# Define the Vectara query function
	def vectara_query(query: str, config: dict):
	corpus_key = [{
	"customerId": config["customer_id"],
	"corpusId": config["corpus_id"],
	"lexicalInterpolationConfig": {"lambda": config.get("lambda_val", 0.5)},
	}]
	data = {
	"query": [{
	"query": query,
	"start": 0,
	"numResults": config.get("top_k", 10),
	"contextConfig": {
	"sentencesBefore": 2,
	"sentencesAfter": 2,
	},
	"corpusKey": corpus_key,
	"summary": [{
	"responseLang": "eng",
	"maxSummarizedResults": 5,
	}]
	}]
	}

	headers = {
	"x-api-key": config["api_key"],
	"customer-id": config["customer_id"],
	"Content-Type": "application/json",
	}
	response = requests.post(
	headers=headers,
	url="https://api.vectara.io/v1/query",
	data=json.dumps(data),
	)
	if response.status_code != 200:
	st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})")
	return [], ""

	result = response.json()
	responses = result["responseSet"][0]["response"]
	summary = result["responseSet"][0]["summary"][0]["text"]

	res = [[r['text'], r['score']] for r in responses]
	return res, summary


	# Create the main app with three tabs
	tab1, tab2, tab3, tab4 = st.tabs(["Synthetic Data", "Data Query", "HHEM-Victara Query Tuner", "Model Evaluation"])

	with tab1:
	st.header("Synthetic Data")
	st.link_button("Create Synthetic Medical Data", "https://chat.openai.com/g/g-XyHciw52w-synthetic-clinical-data")



	with tab2:
	st.header("Data Query")
	st.link_button("Query & Summarize Data", "https://chat.openai.com/g/g-9tWqg4gRY-explore-summarize-medical-data")

	with tab3:

	st.header("HHEM-Victara Query Tuner")

	# User inputs
	query = st.text_area("Enter your text for query tuning", "", height=75)
	lambda_val = st.slider("Lambda Value", min_value=0.0, max_value=1.0, value=0.5)
	top_k = st.number_input("Top K Results", min_value=1, max_value=50, value=10)


	if st.button("Query Vectara"):
	config = {

	"api_key": os.environ.get("VECTARA_API_KEY", ""),
	"customer_id": os.environ.get("VECTARA_CUSTOMER_ID", ""),
	"corpus_id": os.environ.get("VECTARA_CORPUS_ID", ""),

	"lambda_val": lambda_val,
	"top_k": top_k,
	}

	results, summary = vectara_query(query, config)

	if results:
	st.subheader("Summary")
	st.write(summary)

	st.subheader("Top Results")

	# Extract texts from results
	texts = [r[0] for r in results[:5]]

	# Compute HHEM scores
	scores = compute_hhem_scores(texts, summary)

	# Prepare and display the dataframe
	df = pd.DataFrame({'Fact': texts, 'HHEM Score': scores})
	st.dataframe(df)
	else:
	st.write("No results found.")

	with tab4:

	st.header("Model Evaluation")

	# User input for the research topic
	research_topic = st.text_input('Enter your research topic:', '')

	# Selection box for the function to execute
	process_selection = st.selectbox(
	'Choose the process to run:',
	('crewai_process_gemini', 'crewai_process_mixtral_crazy', 'crewai_process_mixtral_normal', 'crewai_process_zephyr_normal', 'crewai_process_phi2')
	)

	# Button to execute the chosen function
	if st.button('Run Process'):
	if research_topic: # Ensure there's a topic provided
	if process_selection == 'crewai_process_gemini':
	result = crewai_process_gemini(research_topic)
	elif process_selection == 'crewai_process_mixtral_crazy':
	result = crewai_process_mixtral_crazy(research_topic)
	elif process_selection == 'crewai_process_mixtral_normal':
	result = crewai_process_mixtral_normal(research_topic)
	elif process_selection == 'crewai_process_zephyr_normal':
	result = crewai_process_zephyr_normal(research_topic)
	elif process_selection == 'crewai_process_phi2':
	result = crewai_process_phi2(research_topic)
	st.write(result)
	else:
	st.warning('Please enter a research topic.')