JoshMe1 commited on
Commit
8b37e35
·
1 Parent(s): a619003

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import streamlit as st
4
+ from sklearn.linear_model import LinearRegression
5
+
6
+ def predict_hotel_price(train_features_path, train_label_path, test_features_path):
7
+ # Baca data dari file train_features.csv
8
+ train_features = pd.read_csv(train_features_path)
9
+
10
+ # Baca data dari file train_label.csv
11
+ train_label = pd.read_csv(train_label_path)
12
+
13
+ # Gabungkan kedua dataframe berdasarkan indeks
14
+ df_merged = pd.concat([train_features, train_label], axis=1)
15
+
16
+ # Tambahkan kolom 'id' di paling kiri dengan menggunakan range indeks
17
+ df_merged.insert(0, 'ID', range(len(df_merged)))
18
+
19
+ # Simpan dataframe ke dalam file CSV
20
+ df_merged.to_csv('merged_data.csv', index=False)
21
+
22
+ # Baca file merged_data.csv sebagai hasil prapemrosesan
23
+ hasil_features = pd.read_csv('merged_data.csv')
24
+
25
+ # Prapemrosesan data pada kolom rating dengan mengubah format string menjadi float
26
+ hasil_features['rating'] = hasil_features['rating'].apply(lambda x: float(x.split()[0]) if isinstance(x, str) and len(x.split())>0 and x.split()[0].replace('.','').isdigit() else None)
27
+ hasil_features['Price'] = hasil_features['Price'].apply(lambda x: float(x.replace(',', '').replace('avg/night', '')) if isinstance(x, str) else x)
28
+
29
+ # Menghilangkan missing value pada kolom rating
30
+ hasil_features.dropna(subset=['rating'], inplace=True)
31
+ hasil_features = hasil_features.drop(['facilities', 'location'], axis=1)
32
+
33
+ # Membuat model Linear Regression
34
+ model = LinearRegression()
35
+
36
+ # Melatih model dengan dataset train
37
+ model.fit(hasil_features.drop(['ID', 'Price'], axis=1), hasil_features['Price'])
38
+
39
+ # Membaca dataset test dan menghapus kolom facilities, location, dan ID
40
+ test_features = pd.read_csv(test_features_path)
41
+ test_features = test_features.drop(['facilities', 'location', 'ID'], axis=1)
42
+
43
+ # Prapemrosesan data pada kolom rating dengan mengubah format string menjadi float
44
+ test_features['rating'] = test_features['rating'].apply(lambda x: float(x.split()[0]) if isinstance(x, str) else x)
45
+
46
+ # Melakukan prediksi terhadap dataset test
47
+ predictions = model.predict(test_features)
48
+
49
+ # Convert predictions to a pandas dataframe
50
+ predictions_df = pd.DataFrame(predictions, columns=['Price'])
51
+
52
+ # Add the 'ID' column using square bracket notation
53
+ predictions_df.insert(loc=0, column='ID', value=range(len(predictions_df)))
54
+
55
+ # mengubah nilai kolom Price menjadi bilangan bulat
56
+ predictions_df['Price'] = predictions_df['Price'].astype(int)
57
+
58
+ # Membuat file CSV dari dataframe predictions_df
59
+ predictions_df.to_csv('predictions.csv', index=False)
60
+ return predictions_df
61
+
62
+ def main():
63
+ st.title("Hotel Price Prediction")
64
+ st.write("Predict the price of hotel rooms based on the given features")
65
+
66
+ # Membuat list nama file dari direktori yang berisi file input
67
+ input_dir = '/content/dataset'
68
+ input_files = os.listdir(input_dir)
69
+
70
+ # Mengubah list nama file menjadi opsi dropdown
71
+ train_features_path = st.selectbox("Train Features", [os.path.join(input_dir, file) for file in input_files])
72
+ train_label_path = st.selectbox("Train Label", [os.path.join(input_dir, file) for file in input_files])
73
+ test_features_path = st.selectbox("Test Features", [os.path.join(input_dir, file) for file in input_files])
74
+
75
+ # Menjalankan fungsi predict_hotel_price dan menampilkan hasilnya
76
+ if st.button("Predict"):
77
+ predictions_df = predict_hotel_price(train_features_path, train_label_path, test_features_path)
78
+ st.write(predictions_df)
79
+
80
+ if __name__ == '__main__':
81
+ main()