Spaces:

0xEmir
/

Big_data_sentiment_analysis

Build error

App Files Files Community

0xEmir commited on Dec 8, 2024

Commit

c9652bd

verified ·

1 Parent(s): 0f7b5a7

Create app.py

Browse files

Files changed (1) hide show

app.py +88 -0

app.py ADDED Viewed

	@@ -0,0 +1,88 @@

+# Importing the required packages
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import nltk
+# Set the style sheet for plots
+plt.style.use('ggplot')
+# Read the data
+df = pd.read_csv("hf://datasets/patrickbdevaney/tripadvisor_hotel_reviews/data/tripadvisor_hotel_reviews.csv")
+df = df.reset_index().rename(columns={'index': 'Id'})
+df.head()
+df.head()
+# Check the shape of the DataFrame
+print(df.shape)
+# Count the number of reviews for each rating and plot a bar chart
+ax = df['Rating'].value_counts().sort_index() \
+    .plot(kind='bar',
+          title='Count of Reviews by Stars',
+          figsize=(10, 5))
+ax.set_xlabel('Review Stars')
+ax.set_ylabel('No. of Stars')
+plt.show()
+# Select a review for sentiment analysis
+rev250 = df['Review'][200]
+print(rev250)
+# Preprocess the review text
+tokens = nltk.word_tokenize(rev250)  # Tokenization
+tagged = nltk.pos_tag(tokens)  # Part-of-speech tagging
+entities = nltk.chunk.ne_chunk(tagged)  # Entity recognition
+entities.pprint()
+# Perform sentiment analysis using VADER
+from nltk.sentiment import SentimentIntensityAnalyzer
+sia = SentimentIntensityAnalyzer()
+# Analyze sentiment for a positive sentence
+print(sia.polarity_scores('I am so happy!'))
+#>> {'neg': 0.0, 'neu': 0.318, 'pos': 0.682, 'compound': 0.6468}
+# Analyze sentiment for a negative sentence
+print(sia.polarity_scores('I hate sweet aroma!'))
+#>> {'neg': 0.499, 'neu': 0.125, 'pos': 0.375, 'compound': -0.2481}
+# Analyze sentiment for the selected review
+print(sia.polarity_scores(rev250))
+#>> {'neg': 0.1, 'neu': 0.612, 'pos': 0.288, 'compound': 0.9556}
+# Perform sentiment analysis on the entire dataset
+from tqdm import tqdm
+res = {}  # Store the sentiment scores
+for i, row in tqdm(df.iterrows(), total=len(df)):
+    text = row['Review']
+    myid = row['Id']
+    res[myid] = sia.polarity_scores(text)
+# Create a DataFrame from the sentiment scores and merge it with the original DataFrame
+vaders = pd.DataFrame(res).T
+vaders = vaders.reset_index().rename(columns={'index': 'Id'})
+vaders = vaders.merge(df, how='left')
+vaders.head()
+# Visualize the sentiment scores
+fig, axs = plt.subplots(1, 3, figsize=(12, 3))
+sns.barplot(data=vaders, x='Rating', y='pos', ax=axs[0])
+sns.barplot(data=vaders, x='Rating', y='neu', ax=axs[1])
+sns.barplot(data=vaders, x='Rating', y='neg', ax=axs[2])
+# Set titles for the subplots
+axs[0].set_title('Positive')
+axs[1].set_title('Neutral')
+axs[2].set_title('Negative')
+# Add spacing between the subplots
+plt.tight_layout()
+plt.show()