store-user-feedback

Paused

App Files Files Community

WillHeld commited on May 6

Commit

b31b98c

verified ·

1 Parent(s): d951e6a

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -163

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ from threading import Thread
 import os
 import json
 import uuid
-from datasets import Dataset, load_dataset
 from huggingface_hub import HfApi, login
 import time
@@ -28,63 +28,6 @@ DATASET_FILENAME = "feedback.jsonl"  # Filename for feedback data
 # Ensure feedback directory exists
 os.makedirs(DATASET_PATH, exist_ok=True)
-# Sync existing dataset from Hub if available
-def sync_dataset_from_hub():
-    """Download existing dataset from Hub and merge with local data"""
-    try:
-        # Try to get token from environment variable
-        hf_token = os.environ.get("HF_TOKEN")
-        if hf_token:
-            login(token=hf_token)
-        # Check if the dataset exists on Hub
-        api = HfApi()
-        try:
-            dataset_info = api.dataset_info(DATASET_REPO)
-            # Dataset exists, download it
-            print(f"Syncing existing dataset from {DATASET_REPO}")
-            remote_dataset = load_dataset(DATASET_REPO)
-            # Convert to list of dictionaries
-            remote_data = [item for item in remote_dataset['train']]
-            # Check if local file exists
-            local_file = os.path.join(DATASET_PATH, DATASET_FILENAME)
-            local_data = []
-            if os.path.exists(local_file):
-                # Read local data
-                with open(local_file, 'r') as f:
-                    for line in f:
-                        try:
-                            local_data.append(json.loads(line))
-                        except json.JSONDecodeError:
-                            continue
-            # Merge data (using IDs to avoid duplicates)
-            all_items = {}
-            for item in remote_data + local_data:
-                all_items[item['id']] = item
-            # Write back merged data
-            with open(local_file, 'w') as f:
-                for item in all_items.values():
-                    f.write(json.dumps(item) + '\n')
-            print(f"Synced {len(all_items)} feedback items")
-            return True
-        except Exception as e:
-            print(f"Dataset {DATASET_REPO} does not exist yet or could not be accessed: {e}")
-            return False
-    except Exception as e:
-        print(f"Error syncing dataset: {e}")
-        return False
-# Call sync on startup
-sync_dataset_from_hub()
 # Feedback storage functions
 def save_feedback_locally(conversation, satisfaction, feedback_text):
     """Save feedback to a local JSONL file"""
@@ -150,17 +93,49 @@ def push_feedback_to_hub(hf_token=None):
         print(f"Error pushing feedback data to Hub: {e}")
         return False
 # Function to handle the research feedback submission
-def submit_research_feedback(conv_history, satisfaction, feedback_text):
     """Save user feedback both locally and to HuggingFace Hub"""
-    # Print debug information
-    print(f"Saving feedback with conversation history containing {len(conv_history)} messages")
-    if conv_history and len(conv_history) > 0:
-        print(f"First message: {conv_history[0]['role']}: {conv_history[0]['content'][:30]}...")
-        print(f"Last message: {conv_history[-1]['role']}: {conv_history[-1]['content'][:30]}...")
     # Save locally first
-    feedback_id = save_feedback_locally(conv_history, satisfaction, feedback_text)
     # Get token from environment variable
     env_token = os.environ.get("HF_TOKEN")
@@ -175,114 +150,29 @@ def submit_research_feedback(conv_history, satisfaction, feedback_text):
     return status_msg
-# Initial state - set up at app start
-def initialize_state():
-    """Initialize the conversation state - this could load previous sessions or start fresh"""
-    return []  # Start with empty conversation history
 # Create the Gradio blocks interface
 with gr.Blocks() as demo:
-    # Create state to store full conversation history with proper initialization
-    conv_state = gr.State(initialize_state)
     with gr.Row():
         with gr.Column(scale=3):
-            # Create a custom predict function that updates our state
-            def enhanced_predict(message, history, temperature, top_p, state):
-                # Initialize state if needed
-                if state is None:
-                    state = []
-                    print("Initializing empty state")
-                # Copy history to state if state is empty but history exists
-                if len(state) == 0 and len(history) > 0:
-                    state = history.copy()
-                    print(f"Copied {len(history)} messages from history to state")
-                # Add user message to state
-                state.append({"role": "user", "content": message})
-                # Process with the model (this doesn't modify the original history)
-                input_text = tokenizer.apply_chat_template(state, tokenize=False, add_generation_prompt=True)
-                inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
-                # Create a streamer
-                streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-                # Set up generation parameters
-                generation_kwargs = {
-                    "input_ids": inputs,
-                    "max_new_tokens": 1024,
-                    "temperature": float(temperature),
-                    "top_p": float(top_p),
-                    "do_sample": True,
-                    "streamer": streamer,
-                    "eos_token_id": 128009,
-                }
-                # Run generation in a separate thread
-                thread = Thread(target=model.generate, kwargs=generation_kwargs)
-                thread.start()
-                # Yield from the streamer as tokens are generated
-                response = ""
-                for new_text in streamer:
-                    response += new_text
-                    # For each partial response, yield the text only
-                    # We'll update the state after generation is complete
-                    yield response
-                # After generation completes, update our state with the final response
-                state.append({"role": "assistant", "content": response})
-                # Return the updated state
-                return state
-            # Create a wrapper that connects to ChatInterface but also updates our state
-            def chat_with_state(message, history, temperature, top_p):
-                # This function is what interfaces with the ChatInterface
-                nonlocal conv_state
-                # Access the current state
-                current_state = conv_state.value if conv_state.value else []
-                # Call the main function that generates responses and updates state
-                # This is a generator function, so we need to iterate through its outputs
-                response_gen = enhanced_predict(message, history, temperature, top_p, current_state)
-                # For each response, yield it and also update our state at the end
-                last_response = None
-                for response in response_gen:
-                    last_response = response
-                    yield response
-                # After generation is complete, update our state
-                if last_response is not None:
-                    # Create a full copy of the history plus the new exchange
-                    updated_state = []
-                    # Add all previous history
-                    for msg in history:
-                        updated_state.append(msg.copy())
-                    # Add new exchange
-                    updated_state.append({"role": "user", "content": message})
-                    updated_state.append({"role": "assistant", "content": last_response})
-                    # Store in our state
-                    conv_state.value = updated_state
-                    # Debug
-                    print(f"Updated conversation state with {len(updated_state)} messages")
-                    if updated_state:
-                        last_msg = updated_state[-1]
-                        print(f"Last message: {last_msg['role']}: {last_msg['content'][:30]}...")
             # Create ChatInterface
             chatbot = gr.ChatInterface(
                 chat_with_state,
                 additional_inputs=[
                     gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
                     gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
                 ],
                 type="messages"
             )
@@ -317,10 +207,10 @@ with gr.Blocks() as demo:
         feedback_modal
     )
-    # Connect the submit button to the submit_research_feedback function
     submit_button.click(
         submit_research_feedback,
-        inputs=[conv_state, satisfaction, feedback_text],
         outputs=response_text
     )

 import os
 import json
 import uuid
+from datasets import Dataset
 from huggingface_hub import HfApi, login
 import time
 # Ensure feedback directory exists
 os.makedirs(DATASET_PATH, exist_ok=True)
 # Feedback storage functions
 def save_feedback_locally(conversation, satisfaction, feedback_text):
     """Save feedback to a local JSONL file"""
         print(f"Error pushing feedback data to Hub: {e}")
         return False
+# Modified predict function to update conversation state
+@spaces.GPU(duration=120)
+def predict(message, history, state, temperature, top_p):
+    # Update history with user message
+    history.append({"role": "user", "content": message})
+    input_text = tokenizer.apply_chat_template(history, tokenize=False, add_generation_prompt=True)
+    inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
+    # Create a streamer
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    # Set up generation parameters
+    generation_kwargs = {
+        "input_ids": inputs,
+        "max_new_tokens": 1024,
+        "temperature": float(temperature),
+        "top_p": float(top_p),
+        "do_sample": True,
+        "streamer": streamer,
+    }
+    # Run generation in a separate thread
+    thread = Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    # Yield from the streamer as tokens are generated
+    partial_text = ""
+    for new_text in streamer:
+        partial_text += new_text
+        yield partial_text, state
+    # After full generation, update state with assistant's response
+    history.append({"role": "assistant", "content": partial_text})
+    state = history.copy()
+    return partial_text, state
 # Function to handle the research feedback submission
+def submit_research_feedback(conversation_state, satisfaction, feedback_text):
     """Save user feedback both locally and to HuggingFace Hub"""
     # Save locally first
+    feedback_id = save_feedback_locally(conversation_state, satisfaction, feedback_text)
     # Get token from environment variable
     env_token = os.environ.get("HF_TOKEN")
     return status_msg
 # Create the Gradio blocks interface
 with gr.Blocks() as demo:
+    # State to track conversation history
+    conversation_state = gr.State([])
     with gr.Row():
         with gr.Column(scale=3):
+            # Custom chat function wrapper to update state
+            def chat_with_state(message, history, state, temperature, top_p):
+                for partial_response, updated_state in predict(message, history, state, temperature, top_p):
+                    # Update our state with each yield
+                    state = updated_state
+                    yield partial_response, state
             # Create ChatInterface
             chatbot = gr.ChatInterface(
                 chat_with_state,
                 additional_inputs=[
+                    conversation_state,
                     gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
                     gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
                 ],
+                additional_outputs=[conversation_state],
                 type="messages"
             )
         feedback_modal
     )
+    # Connect the submit button to the submit_research_feedback function with the current conversation state
     submit_button.click(
         submit_research_feedback,
+        inputs=[conversation_state, satisfaction, feedback_text],
         outputs=response_text
     )