Spaces:
Sleeping
Sleeping
| import os | |
| import pandas as pd | |
| import streamlit as st | |
| from sklearn.linear_model import LinearRegression | |
| def predict_hotel_price(train_features_path, train_label_path, test_features_path): | |
| # Baca data dari file train_features.csv | |
| train_features = pd.read_csv(train_features_path) | |
| # Baca data dari file train_label.csv | |
| train_label = pd.read_csv(train_label_path) | |
| # Gabungkan kedua dataframe berdasarkan indeks | |
| df_merged = pd.concat([train_features, train_label], axis=1) | |
| # Tambahkan kolom 'id' di paling kiri dengan menggunakan range indeks | |
| df_merged.insert(0, 'ID', range(len(df_merged))) | |
| # Simpan dataframe ke dalam file CSV | |
| df_merged.to_csv('merged_data.csv', index=False) | |
| # Baca file merged_data.csv sebagai hasil prapemrosesan | |
| hasil_features = pd.read_csv('merged_data.csv') | |
| # Prapemrosesan data pada kolom rating dengan mengubah format string menjadi float | |
| hasil_features['rating'] = hasil_features['rating'].apply(lambda x: float(x.split()[0]) if isinstance(x, str) and len(x.split())>0 and x.split()[0].replace('.','').isdigit() else None) | |
| hasil_features['Price'] = hasil_features['Price'].apply(lambda x: float(x.replace(',', '').replace('avg/night', '')) if isinstance(x, str) else x) | |
| # Menghilangkan missing value pada kolom rating | |
| hasil_features.dropna(subset=['rating'], inplace=True) | |
| hasil_features = hasil_features.drop(['facilities', 'location'], axis=1) | |
| # Membuat model Linear Regression | |
| model = LinearRegression() | |
| # Melatih model dengan dataset train | |
| model.fit(hasil_features.drop(['ID', 'Price'], axis=1), hasil_features['Price']) | |
| # Membaca dataset test dan menghapus kolom facilities, location, dan ID | |
| test_features = pd.read_csv(test_features_path) | |
| test_features = test_features.drop(['facilities', 'location', 'ID'], axis=1) | |
| # Prapemrosesan data pada kolom rating dengan mengubah format string menjadi float | |
| test_features['rating'] = test_features['rating'].apply(lambda x: float(x.split()[0]) if isinstance(x, str) else x) | |
| # Melakukan prediksi terhadap dataset test | |
| predictions = model.predict(test_features) | |
| # Convert predictions to a pandas dataframe | |
| predictions_df = pd.DataFrame(predictions, columns=['Price']) | |
| # Add the 'ID' column using square bracket notation | |
| predictions_df.insert(loc=0, column='ID', value=range(len(predictions_df))) | |
| # mengubah nilai kolom Price menjadi bilangan bulat | |
| predictions_df['Price'] = predictions_df['Price'].astype(int) | |
| # Membuat file CSV dari dataframe predictions_df | |
| predictions_df.to_csv('predictions.csv', index=False) | |
| return predictions_df | |
| def main(): | |
| st.title("Hotel Price Prediction With Linear Regression") | |
| st.write("Memprediksi Harga Hotel Berdasarkan Rating") | |
| # Membuat list nama file dari direktori yang berisi file input | |
| input_dir = 'dataset' | |
| input_files = os.listdir(input_dir) | |
| # Mengubah list nama file menjadi opsi dropdown | |
| train_features_path = st.selectbox("Train Features = 'Berisi Fitur-Fitur Dari Data Latih'", [os.path.join(input_dir, file) for file in input_files]) | |
| train_label_path = st.selectbox("Train Label = 'Berisi Label Dari Data Latih'", [os.path.join(input_dir, file) for file in input_files]) | |
| test_features_path = st.selectbox("Test Features = 'Berisi Fitur-Fitur Dari Data Uji'", [os.path.join(input_dir, file) for file in input_files]) | |
| # Menjalankan fungsi predict_hotel_price dan menampilkan hasilnya | |
| if st.button("Prediksi Hasil Harga"): | |
| predictions_df = predict_hotel_price(train_features_path, train_label_path, test_features_path) | |
| st.write(predictions_df) | |
| st.download_button( | |
| label="Download Hasil Prediksi CSV", | |
| data=predictions_df.to_csv(index=False), | |
| file_name="predictions.csv", | |
| mime="text/csv" | |
| ) | |
| if __name__ == '__main__': | |
| main() | |