import os import pandas as pd import streamlit as st from sklearn.linear_model import LinearRegression def predict_hotel_price(train_features_path, train_label_path, test_features_path): # Baca data dari file train_features.csv train_features = pd.read_csv(train_features_path) # Baca data dari file train_label.csv train_label = pd.read_csv(train_label_path) # Gabungkan kedua dataframe berdasarkan indeks df_merged = pd.concat([train_features, train_label], axis=1) # Tambahkan kolom 'id' di paling kiri dengan menggunakan range indeks df_merged.insert(0, 'ID', range(len(df_merged))) # Simpan dataframe ke dalam file CSV df_merged.to_csv('merged_data.csv', index=False) # Baca file merged_data.csv sebagai hasil prapemrosesan hasil_features = pd.read_csv('merged_data.csv') # Prapemrosesan data pada kolom rating dengan mengubah format string menjadi float hasil_features['rating'] = hasil_features['rating'].apply(lambda x: float(x.split()[0]) if isinstance(x, str) and len(x.split())>0 and x.split()[0].replace('.','').isdigit() else None) hasil_features['Price'] = hasil_features['Price'].apply(lambda x: float(x.replace(',', '').replace('avg/night', '')) if isinstance(x, str) else x) # Menghilangkan missing value pada kolom rating hasil_features.dropna(subset=['rating'], inplace=True) hasil_features = hasil_features.drop(['facilities', 'location'], axis=1) # Membuat model Linear Regression model = LinearRegression() # Melatih model dengan dataset train model.fit(hasil_features.drop(['ID', 'Price'], axis=1), hasil_features['Price']) # Membaca dataset test dan menghapus kolom facilities, location, dan ID test_features = pd.read_csv(test_features_path) test_features = test_features.drop(['facilities', 'location', 'ID'], axis=1) # Prapemrosesan data pada kolom rating dengan mengubah format string menjadi float test_features['rating'] = test_features['rating'].apply(lambda x: float(x.split()[0]) if isinstance(x, str) else x) # Melakukan prediksi terhadap dataset test predictions = model.predict(test_features) # Convert predictions to a pandas dataframe predictions_df = pd.DataFrame(predictions, columns=['Price']) # Add the 'ID' column using square bracket notation predictions_df.insert(loc=0, column='ID', value=range(len(predictions_df))) # mengubah nilai kolom Price menjadi bilangan bulat predictions_df['Price'] = predictions_df['Price'].astype(int) # Membuat file CSV dari dataframe predictions_df predictions_df.to_csv('predictions.csv', index=False) return predictions_df def main(): st.title("Hotel Price Prediction With Linear Regression") st.write("Memprediksi Harga Hotel Berdasarkan Rating") # Membuat list nama file dari direktori yang berisi file input input_dir = 'dataset' input_files = os.listdir(input_dir) # Mengubah list nama file menjadi opsi dropdown train_features_path = st.selectbox("Train Features = 'Berisi Fitur-Fitur Dari Data Latih'", [os.path.join(input_dir, file) for file in input_files]) train_label_path = st.selectbox("Train Label = 'Berisi Label Dari Data Latih'", [os.path.join(input_dir, file) for file in input_files]) test_features_path = st.selectbox("Test Features = 'Berisi Fitur-Fitur Dari Data Uji'", [os.path.join(input_dir, file) for file in input_files]) # Menjalankan fungsi predict_hotel_price dan menampilkan hasilnya if st.button("Prediksi Hasil Harga"): predictions_df = predict_hotel_price(train_features_path, train_label_path, test_features_path) st.write(predictions_df) st.download_button( label="Download Hasil Prediksi CSV", data=predictions_df.to_csv(index=False), file_name="predictions.csv", mime="text/csv" ) if __name__ == '__main__': main()