Spaces:

0xEmir
/

Big_data_sentiment_analysis

Build error

App Files Files Community

Big_data_sentiment_analysis / app.py

0xEmir

Create app.py

c9652bd verified 12 months ago

raw

history blame contribute delete

2.55 kB

	# Importing the required packages
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import nltk
	# Set the style sheet for plots
	plt.style.use('ggplot')

	# Read the data
	df = pd.read_csv("hf://datasets/patrickbdevaney/tripadvisor_hotel_reviews/data/tripadvisor_hotel_reviews.csv")

	df = df.reset_index().rename(columns={'index': 'Id'})

	df.head()

	df.head()

	# Check the shape of the DataFrame
	print(df.shape)

	# Count the number of reviews for each rating and plot a bar chart
	ax = df['Rating'].value_counts().sort_index() \
	.plot(kind='bar',
	title='Count of Reviews by Stars',
	figsize=(10, 5))
	ax.set_xlabel('Review Stars')
	ax.set_ylabel('No. of Stars')
	plt.show()

	# Select a review for sentiment analysis
	rev250 = df['Review'][200]
	print(rev250)

	# Preprocess the review text
	tokens = nltk.word_tokenize(rev250) # Tokenization
	tagged = nltk.pos_tag(tokens) # Part-of-speech tagging
	entities = nltk.chunk.ne_chunk(tagged) # Entity recognition

	entities.pprint()

	# Perform sentiment analysis using VADER
	from nltk.sentiment import SentimentIntensityAnalyzer
	sia = SentimentIntensityAnalyzer()

	# Analyze sentiment for a positive sentence
	print(sia.polarity_scores('I am so happy!'))
	#>> {'neg': 0.0, 'neu': 0.318, 'pos': 0.682, 'compound': 0.6468}

	# Analyze sentiment for a negative sentence
	print(sia.polarity_scores('I hate sweet aroma!'))
	#>> {'neg': 0.499, 'neu': 0.125, 'pos': 0.375, 'compound': -0.2481}

	# Analyze sentiment for the selected review
	print(sia.polarity_scores(rev250))
	#>> {'neg': 0.1, 'neu': 0.612, 'pos': 0.288, 'compound': 0.9556}

	# Perform sentiment analysis on the entire dataset
	from tqdm import tqdm

	res = {} # Store the sentiment scores

	for i, row in tqdm(df.iterrows(), total=len(df)):
	text = row['Review']
	myid = row['Id']
	res[myid] = sia.polarity_scores(text)

	# Create a DataFrame from the sentiment scores and merge it with the original DataFrame
	vaders = pd.DataFrame(res).T
	vaders = vaders.reset_index().rename(columns={'index': 'Id'})
	vaders = vaders.merge(df, how='left')

	vaders.head()

	# Visualize the sentiment scores
	fig, axs = plt.subplots(1, 3, figsize=(12, 3))
	sns.barplot(data=vaders, x='Rating', y='pos', ax=axs[0])
	sns.barplot(data=vaders, x='Rating', y='neu', ax=axs[1])
	sns.barplot(data=vaders, x='Rating', y='neg', ax=axs[2])

	# Set titles for the subplots
	axs[0].set_title('Positive')
	axs[1].set_title('Neutral')
	axs[2].set_title('Negative')

	# Add spacing between the subplots
	plt.tight_layout()
	plt.show()