Spaces:
Sleeping
Sleeping
Yoon-gu Hwang
Claude
commited on
Commit
ยท
3e3dc68
0
Parent(s):
Initial commit: Concept Drift Simulator
Browse files- 4๊ฐ์ง ๋๋ฆฌํํธ ์ ํ ์๋ฎฌ๋ ์ด์
(Sudden, Gradual, Incremental, Recurring)
- Plotly ๊ธฐ๋ฐ ์ธํฐ๋ํฐ๋ธ ์๊ฐํ
- Gradio UI ๊ตฌ์ฑ
- ๋๋ฆฌํํธ ๋ถ์ ๊ธฐ๋ฅ
๐ค Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <[email protected]>
- .gitignore +17 -0
- README.md +56 -0
- analyzer.py +81 -0
- app.py +92 -0
- drift_simulator.py +97 -0
- requirements.txt +3 -0
- visualizer.py +110 -0
.gitignore
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.py[cod]
|
| 3 |
+
*$py.class
|
| 4 |
+
*.so
|
| 5 |
+
.Python
|
| 6 |
+
env/
|
| 7 |
+
venv/
|
| 8 |
+
ENV/
|
| 9 |
+
.venv
|
| 10 |
+
*.egg-info/
|
| 11 |
+
dist/
|
| 12 |
+
build/
|
| 13 |
+
.DS_Store
|
| 14 |
+
.idea/
|
| 15 |
+
.vscode/
|
| 16 |
+
*.log
|
| 17 |
+
flagged/
|
README.md
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Concept Drift Simulator
|
| 2 |
+
|
| 3 |
+
์ปจ์
๋๋ฆฌํํธ(Concept Drift)์ 4๊ฐ์ง ์ฃผ์ ์ ํ์ ์๋ฎฌ๋ ์ด์
ํ๊ณ ์๊ฐํํ๋ ์ธํฐ๋ํฐ๋ธ ์น ์ ํ๋ฆฌ์ผ์ด์
์
๋๋ค.
|
| 4 |
+
|
| 5 |
+
## ์ฃผ์ ๊ธฐ๋ฅ
|
| 6 |
+
|
| 7 |
+
4๊ฐ์ง ์ปจ์
๋๋ฆฌํํธ ์ ํ ์๋ฎฌ๋ ์ด์
:
|
| 8 |
+
|
| 9 |
+
1. **Sudden (๊ธ๊ฒฉํ ๋๋ฆฌํํธ)**: ํน์ ์์ ์์ ๋ฐ์ดํฐ ๋ถํฌ๊ฐ ๊ฐ์๊ธฐ ๋ณ๊ฒฝ
|
| 10 |
+
2. **Gradual (์ ์ง์ ๋๋ฆฌํํธ)**: ์ด์ ๋ถํฌ์ ์ ๋ถํฌ๊ฐ ์์ด๋ฉฐ ์ฒ์ฒํ ์ ํ
|
| 11 |
+
3. **Incremental (์ฆ๋ถ์ ๋๋ฆฌํํธ)**: ์์ ๋จ๊ณ๋ก ๋ณํ๊ฐ ๋ฐ์ํ์ฌ ๊ณ๋จ์ ํจํด ํ์ฑ
|
| 12 |
+
4. **Recurring (๋ฐ๋ณต์ ๋๋ฆฌํํธ)**: ์ด์ ๋ถํฌ๊ฐ ์ฃผ๊ธฐ์ ์ผ๋ก ๋ค์ ๋ํ๋จ
|
| 13 |
+
|
| 14 |
+
## ๊ธฐ์ ์คํ
|
| 15 |
+
|
| 16 |
+
- **Gradio**: ์ธํฐ๋ํฐ๋ธ UI ๊ตฌ์ฑ
|
| 17 |
+
- **Plotly**: ๋์ ์๊ฐํ
|
| 18 |
+
- **NumPy**: ๋ฐ์ดํฐ ์์ฑ ๋ฐ ๋ถ์
|
| 19 |
+
|
| 20 |
+
## ์ค์น ๋ฐ ์คํ
|
| 21 |
+
|
| 22 |
+
```bash
|
| 23 |
+
# ์์กด์ฑ ์ค์น
|
| 24 |
+
pip install -r requirements.txt
|
| 25 |
+
|
| 26 |
+
# ์ฑ ์คํ
|
| 27 |
+
python app.py
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
๋ธ๋ผ์ฐ์ ์์ `http://localhost:7860` ์ ์
|
| 31 |
+
|
| 32 |
+
## ์ฌ์ฉ ๋ฐฉ๋ฒ
|
| 33 |
+
|
| 34 |
+
1. ์ผ์ชฝ ํจ๋์์ ๋๋ฆฌํํธ ์ ํ ์ ํ
|
| 35 |
+
2. "์๋ฎฌ๋ ์ด์
์คํ" ๋ฒํผ ํด๋ฆญ
|
| 36 |
+
3. ์ค๋ฅธ์ชฝ์ ์ธํฐ๋ํฐ๋ธ Plotly ์ฐจํธ์ ๋ถ์ ๊ฒฐ๊ณผ ํ์ธ
|
| 37 |
+
|
| 38 |
+
## ํ๋ก์ ํธ ๊ตฌ์กฐ
|
| 39 |
+
|
| 40 |
+
```
|
| 41 |
+
concept-drift/
|
| 42 |
+
โโโ app.py # Gradio ์ ํ๋ฆฌ์ผ์ด์
|
| 43 |
+
โโโ drift_simulator.py # ๋๋ฆฌํํธ ๋ฐ์ดํฐ ์์ฑ
|
| 44 |
+
โโโ visualizer.py # Plotly ์๊ฐํ
|
| 45 |
+
โโโ analyzer.py # ๋๋ฆฌํํธ ๋ถ์
|
| 46 |
+
โโโ requirements.txt # ํจํค์ง ์์กด์ฑ
|
| 47 |
+
โโโ README.md
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
## Hugging Face Spaces
|
| 51 |
+
|
| 52 |
+
๋ฐฐํฌ๋ ์ฑ: (๋ฐฐํฌ ํ ๋งํฌ ์ถ๊ฐ)
|
| 53 |
+
|
| 54 |
+
## ๋ผ์ด์ ์ค
|
| 55 |
+
|
| 56 |
+
MIT License
|
analyzer.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from typing import Dict, Tuple
|
| 3 |
+
|
| 4 |
+
def analyze_drift(X: np.ndarray, y: np.ndarray, drift_points: np.ndarray, drift_type: str) -> Dict[str, any]:
|
| 5 |
+
"""๋๋ฆฌํํธ ๋ฐ์ดํฐ ๋ถ์"""
|
| 6 |
+
|
| 7 |
+
analysis = {
|
| 8 |
+
"drift_type": drift_type,
|
| 9 |
+
"total_samples": len(X),
|
| 10 |
+
"num_drift_points": len(drift_points),
|
| 11 |
+
"drift_locations": drift_points.tolist() if len(drift_points) > 0 else [],
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
# ์ ์ฒด ํต๊ณ
|
| 15 |
+
analysis["mean_y"] = float(np.mean(y))
|
| 16 |
+
analysis["std_y"] = float(np.std(y))
|
| 17 |
+
analysis["min_y"] = float(np.min(y))
|
| 18 |
+
analysis["max_y"] = float(np.max(y))
|
| 19 |
+
|
| 20 |
+
# ์ธ๊ทธ๋จผํธ๋ณ ๋ถ์
|
| 21 |
+
segments = []
|
| 22 |
+
segment_boundaries = [0] + drift_points.tolist() + [len(X)]
|
| 23 |
+
|
| 24 |
+
for i in range(len(segment_boundaries) - 1):
|
| 25 |
+
start = segment_boundaries[i]
|
| 26 |
+
end = segment_boundaries[i + 1]
|
| 27 |
+
|
| 28 |
+
segment_y = y[start:end]
|
| 29 |
+
segment_X = X[start:end]
|
| 30 |
+
|
| 31 |
+
# ์ ํ ํ๊ท ๊ณ์ ๊ณ์ฐ (๊ธฐ์ธ๊ธฐ)
|
| 32 |
+
if len(segment_X) > 1:
|
| 33 |
+
coeffs = np.polyfit(segment_X, segment_y, 1)
|
| 34 |
+
slope = float(coeffs[0])
|
| 35 |
+
intercept = float(coeffs[1])
|
| 36 |
+
else:
|
| 37 |
+
slope = 0.0
|
| 38 |
+
intercept = float(segment_y[0]) if len(segment_y) > 0 else 0.0
|
| 39 |
+
|
| 40 |
+
segments.append({
|
| 41 |
+
"segment_id": i,
|
| 42 |
+
"start_idx": int(start),
|
| 43 |
+
"end_idx": int(end),
|
| 44 |
+
"mean": float(np.mean(segment_y)),
|
| 45 |
+
"std": float(np.std(segment_y)),
|
| 46 |
+
"slope": slope,
|
| 47 |
+
"intercept": intercept
|
| 48 |
+
})
|
| 49 |
+
|
| 50 |
+
analysis["segments"] = segments
|
| 51 |
+
|
| 52 |
+
return analysis
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def format_analysis_summary(analysis: Dict) -> str:
|
| 56 |
+
"""๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ์ฌ๋์ด ์ฝ๊ธฐ ์ฌ์ด ํ์์ผ๋ก ํฌ๋งท"""
|
| 57 |
+
|
| 58 |
+
summary = f"""
|
| 59 |
+
## ๋๋ฆฌํํธ ๋ถ์ ๊ฒฐ๊ณผ
|
| 60 |
+
|
| 61 |
+
**๋๋ฆฌํํธ ์ ํ:** {analysis['drift_type'].upper()}
|
| 62 |
+
|
| 63 |
+
**์ ์ฒด ๋ฐ์ดํฐ:**
|
| 64 |
+
- ์ด ์ํ ์: {analysis['total_samples']}
|
| 65 |
+
- ๋๋ฆฌํํธ ๋ฐ์ ํ์: {analysis['num_drift_points']}
|
| 66 |
+
- ํ๊ท : {analysis['mean_y']:.2f}
|
| 67 |
+
- ํ์คํธ์ฐจ: {analysis['std_y']:.2f}
|
| 68 |
+
- ๋ฒ์: [{analysis['min_y']:.2f}, {analysis['max_y']:.2f}]
|
| 69 |
+
|
| 70 |
+
**์ธ๊ทธ๋จผํธ๋ณ ๋ถ์:**
|
| 71 |
+
"""
|
| 72 |
+
|
| 73 |
+
for seg in analysis['segments']:
|
| 74 |
+
summary += f"""
|
| 75 |
+
**์ธ๊ทธ๋จผํธ {seg['segment_id'] + 1}** (์ํ {seg['start_idx']}-{seg['end_idx']})
|
| 76 |
+
- ํ๊ท : {seg['mean']:.2f}
|
| 77 |
+
- ํ์คํธ์ฐจ: {seg['std']:.2f}
|
| 78 |
+
- ๊ด๊ณ์: y = {seg['slope']:.2f}x + {seg['intercept']:.2f}
|
| 79 |
+
"""
|
| 80 |
+
|
| 81 |
+
return summary
|
app.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from drift_simulator import (
|
| 3 |
+
generate_sudden_drift,
|
| 4 |
+
generate_gradual_drift,
|
| 5 |
+
generate_incremental_drift,
|
| 6 |
+
generate_recurring_drift,
|
| 7 |
+
get_drift_description
|
| 8 |
+
)
|
| 9 |
+
from visualizer import create_drift_visualization
|
| 10 |
+
from analyzer import analyze_drift, format_analysis_summary
|
| 11 |
+
|
| 12 |
+
def simulate_and_visualize(drift_type: str):
|
| 13 |
+
"""๋๋ฆฌํํธ ์๋ฎฌ๋ ์ด์
+ ์๊ฐํ + ๋ถ์"""
|
| 14 |
+
|
| 15 |
+
# ๋๋ฆฌํํธ ํ์
์ ๋ฐ๋ผ ๋ฐ์ดํฐ ์์ฑ
|
| 16 |
+
if drift_type == "Sudden (๊ธ๊ฒฉํ ๋๋ฆฌํํธ)":
|
| 17 |
+
X, y, drift_points = generate_sudden_drift(n_samples=1000, drift_point=500)
|
| 18 |
+
drift_key = "sudden"
|
| 19 |
+
elif drift_type == "Gradual (์ ์ง์ ๋๋ฆฌํํธ)":
|
| 20 |
+
X, y, drift_points = generate_gradual_drift(n_samples=1000, drift_start=300, drift_end=700)
|
| 21 |
+
drift_key = "gradual"
|
| 22 |
+
elif drift_type == "Incremental (์ฆ๋ถ์ ๋๋ฆฌํํธ)":
|
| 23 |
+
X, y, drift_points = generate_incremental_drift(n_samples=1000, n_steps=5)
|
| 24 |
+
drift_key = "incremental"
|
| 25 |
+
elif drift_type == "Recurring (๋ฐ๋ณต์ ๋๋ฆฌํํธ)":
|
| 26 |
+
X, y, drift_points = generate_recurring_drift(n_samples=1000, cycle_length=250)
|
| 27 |
+
drift_key = "recurring"
|
| 28 |
+
else:
|
| 29 |
+
return None, "์ฌ๋ฐ๋ฅธ ๋๋ฆฌํํธ ํ์
์ ์ ํํด์ฃผ์ธ์."
|
| 30 |
+
|
| 31 |
+
# ์๊ฐํ
|
| 32 |
+
fig = create_drift_visualization(X, y, drift_points, drift_key)
|
| 33 |
+
|
| 34 |
+
# ๋ถ์
|
| 35 |
+
analysis = analyze_drift(X, y, drift_points, drift_key)
|
| 36 |
+
summary = format_analysis_summary(analysis)
|
| 37 |
+
|
| 38 |
+
# ์ค๋ช
์ถ๊ฐ
|
| 39 |
+
description = get_drift_description(drift_key)
|
| 40 |
+
full_summary = f"### {description}\n\n{summary}"
|
| 41 |
+
|
| 42 |
+
return fig, full_summary
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# Gradio ์ธํฐํ์ด์ค
|
| 46 |
+
with gr.Blocks(title="Concept Drift Simulator", theme=gr.themes.Soft()) as demo:
|
| 47 |
+
gr.Markdown(
|
| 48 |
+
"""
|
| 49 |
+
# Concept Drift Simulator
|
| 50 |
+
|
| 51 |
+
์ปจ์
๋๋ฆฌํํธ(Concept Drift)์ 4๊ฐ์ง ์ ํ์ ์๋ฎฌ๋ ์ด์
ํ๊ณ ์๊ฐํํฉ๋๋ค.
|
| 52 |
+
|
| 53 |
+
๋๋ฆฌํํธ ์ ํ์ ์ ํํ๋ฉด ์๋์ผ๋ก ๋ฐ์ดํฐ๋ฅผ ์์ฑํ๊ณ ์ธํฐ๋ํฐ๋ธ ์ฐจํธ๋ฅผ ํ์ํฉ๋๋ค.
|
| 54 |
+
"""
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
with gr.Row():
|
| 58 |
+
with gr.Column(scale=1):
|
| 59 |
+
drift_type_radio = gr.Radio(
|
| 60 |
+
choices=[
|
| 61 |
+
"Sudden (๊ธ๊ฒฉํ ๋๋ฆฌํํธ)",
|
| 62 |
+
"Gradual (์ ์ง์ ๋๋ฆฌํํธ)",
|
| 63 |
+
"Incremental (์ฆ๋ถ์ ๋๋ฆฌํํธ)",
|
| 64 |
+
"Recurring (๋ฐ๋ณต์ ๋๋ฆฌํํธ)"
|
| 65 |
+
],
|
| 66 |
+
label="๋๋ฆฌํํธ ์ ํ ์ ํ",
|
| 67 |
+
value="Sudden (๊ธ๊ฒฉํ ๋๋ฆฌํํธ)"
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
simulate_btn = gr.Button("์๋ฎฌ๋ ์ด์
์คํ", variant="primary")
|
| 71 |
+
|
| 72 |
+
with gr.Column(scale=2):
|
| 73 |
+
plot_output = gr.Plot(label="๋๋ฆฌํํธ ์๊ฐํ")
|
| 74 |
+
|
| 75 |
+
analysis_output = gr.Markdown(label="๋ถ์ ๊ฒฐ๊ณผ")
|
| 76 |
+
|
| 77 |
+
# ์ด๋ฒคํธ ํธ๋ค๋ฌ
|
| 78 |
+
simulate_btn.click(
|
| 79 |
+
fn=simulate_and_visualize,
|
| 80 |
+
inputs=drift_type_radio,
|
| 81 |
+
outputs=[plot_output, analysis_output]
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
# ์ด๊ธฐ ๋ก๋
|
| 85 |
+
demo.load(
|
| 86 |
+
fn=simulate_and_visualize,
|
| 87 |
+
inputs=drift_type_radio,
|
| 88 |
+
outputs=[plot_output, analysis_output]
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
if __name__ == "__main__":
|
| 92 |
+
demo.launch()
|
drift_simulator.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from typing import Tuple
|
| 3 |
+
|
| 4 |
+
def generate_sudden_drift(n_samples: int = 1000, drift_point: int = 500) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
| 5 |
+
"""๊ธ๊ฒฉํ ๋๋ฆฌํํธ: t ์์ ์์ ๊ฐ์๊ธฐ ๋ฐ์ดํฐ ๋ถํฌ ๋ณ๊ฒฝ"""
|
| 6 |
+
X = np.linspace(0, 10, n_samples)
|
| 7 |
+
y = np.zeros(n_samples)
|
| 8 |
+
|
| 9 |
+
# Before drift: y = 2*X + noise
|
| 10 |
+
y[:drift_point] = 2 * X[:drift_point] + np.random.normal(0, 1, drift_point)
|
| 11 |
+
|
| 12 |
+
# After drift: y = -X + 5 + noise (์์ ํ ๋ค๋ฅธ ๊ด๊ณ)
|
| 13 |
+
y[drift_point:] = -X[drift_point:] + 5 + np.random.normal(0, 1, n_samples - drift_point)
|
| 14 |
+
|
| 15 |
+
drift_points = np.array([drift_point])
|
| 16 |
+
return X, y, drift_points
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def generate_gradual_drift(n_samples: int = 1000, drift_start: int = 300, drift_end: int = 700) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
| 20 |
+
"""์ ์ง์ ๋๋ฆฌํํธ: ๋ ๋ถํฌ๊ฐ ์์ด๋ฉฐ ์ฒ์ฒํ ์ ํ"""
|
| 21 |
+
X = np.linspace(0, 10, n_samples)
|
| 22 |
+
y = np.zeros(n_samples)
|
| 23 |
+
|
| 24 |
+
# Before drift: y = 2*X
|
| 25 |
+
y[:drift_start] = 2 * X[:drift_start] + np.random.normal(0, 1, drift_start)
|
| 26 |
+
|
| 27 |
+
# Gradual transition: mixture of old and new concepts
|
| 28 |
+
transition_length = drift_end - drift_start
|
| 29 |
+
for i in range(drift_start, drift_end):
|
| 30 |
+
weight = (i - drift_start) / transition_length
|
| 31 |
+
old_concept = 2 * X[i] + np.random.normal(0, 1)
|
| 32 |
+
new_concept = -X[i] + 5 + np.random.normal(0, 1)
|
| 33 |
+
y[i] = (1 - weight) * old_concept + weight * new_concept
|
| 34 |
+
|
| 35 |
+
# After drift: y = -X + 5
|
| 36 |
+
y[drift_end:] = -X[drift_end:] + 5 + np.random.normal(0, 1, n_samples - drift_end)
|
| 37 |
+
|
| 38 |
+
drift_points = np.array([drift_start, drift_end])
|
| 39 |
+
return X, y, drift_points
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def generate_incremental_drift(n_samples: int = 1000, n_steps: int = 5) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
| 43 |
+
"""์ฆ๋ถ์ ๋๋ฆฌํํธ: ๊ณ๋จ์์ผ๋ก ์์ ๋ณํ๊ฐ ๋์ """
|
| 44 |
+
X = np.linspace(0, 10, n_samples)
|
| 45 |
+
y = np.zeros(n_samples)
|
| 46 |
+
|
| 47 |
+
step_size = n_samples // (n_steps + 1)
|
| 48 |
+
drift_points = []
|
| 49 |
+
|
| 50 |
+
for step in range(n_steps + 1):
|
| 51 |
+
start_idx = step * step_size
|
| 52 |
+
end_idx = (step + 1) * step_size if step < n_steps else n_samples
|
| 53 |
+
|
| 54 |
+
# ๊ฐ ๋จ๊ณ๋ง๋ค ๊ธฐ์ธ๊ธฐ๊ฐ ์กฐ๊ธ์ฉ ๋ณํ
|
| 55 |
+
slope = 2 - (step / n_steps) * 3 # 2์์ -1๋ก ์ ์ง์ ๋ณํ
|
| 56 |
+
intercept = (step / n_steps) * 5 # 0์์ 5๋ก ์ ์ง์ ๋ณํ
|
| 57 |
+
|
| 58 |
+
y[start_idx:end_idx] = slope * X[start_idx:end_idx] + intercept + np.random.normal(0, 1, end_idx - start_idx)
|
| 59 |
+
|
| 60 |
+
if step > 0:
|
| 61 |
+
drift_points.append(start_idx)
|
| 62 |
+
|
| 63 |
+
return X, y, np.array(drift_points)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def generate_recurring_drift(n_samples: int = 1000, cycle_length: int = 250) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
| 67 |
+
"""๋ฐ๋ณต์ ๋๋ฆฌํํธ: ์ด์ ๋ถํฌ๊ฐ ์ฃผ๊ธฐ์ ์ผ๋ก ์ฌ๋ฑ์ฅ"""
|
| 68 |
+
X = np.linspace(0, 10, n_samples)
|
| 69 |
+
y = np.zeros(n_samples)
|
| 70 |
+
|
| 71 |
+
drift_points = []
|
| 72 |
+
|
| 73 |
+
for i in range(n_samples):
|
| 74 |
+
cycle_pos = i % cycle_length
|
| 75 |
+
|
| 76 |
+
if cycle_pos < cycle_length // 2:
|
| 77 |
+
# Concept A: y = 2*X
|
| 78 |
+
y[i] = 2 * X[i] + np.random.normal(0, 1)
|
| 79 |
+
else:
|
| 80 |
+
# Concept B: y = -X + 5
|
| 81 |
+
y[i] = -X[i] + 5 + np.random.normal(0, 1)
|
| 82 |
+
|
| 83 |
+
if cycle_pos == cycle_length // 2:
|
| 84 |
+
drift_points.append(i)
|
| 85 |
+
|
| 86 |
+
return X, y, np.array(drift_points)
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def get_drift_description(drift_type: str) -> str:
|
| 90 |
+
"""๋๋ฆฌํํธ ์ ํ๋ณ ์ค๋ช
๋ฐํ"""
|
| 91 |
+
descriptions = {
|
| 92 |
+
"sudden": "๊ธ๊ฒฉํ ๋๋ฆฌํํธ: ํน์ ์์ ์์ ๋ฐ์ดํฐ ๋ถํฌ๊ฐ ๊ฐ์๊ธฐ ๋ณ๊ฒฝ๋ฉ๋๋ค. ์: ํฌ๋ฐ๋ฏน, ์ ์ฑ
๋ณ๊ฒฝ ๋ฑ",
|
| 93 |
+
"gradual": "์ ์ง์ ๋๋ฆฌํํธ: ์ด์ ๋ถํฌ์ ์ ๋ถํฌ๊ฐ ์์ด๋ฉฐ ์ฒ์ฒํ ์ ํ๋ฉ๋๋ค. ์ ํ ๊ธฐ๊ฐ ๋์ ๋ ์ปจ์
์ด ๊ณต์กดํฉ๋๋ค.",
|
| 94 |
+
"incremental": "์ฆ๋ถ์ ๋๋ฆฌํํธ: ์์ ๋จ๊ณ๋ก ๋ณํ๊ฐ ๋ฐ์ํ์ฌ ๊ณ๋จ์ ํจํด์ ํ์ฑํฉ๋๋ค.",
|
| 95 |
+
"recurring": "๋ฐ๋ณต์ ๋๋ฆฌํํธ: ์ด์ ๋ถํฌ๊ฐ ์ฃผ๊ธฐ์ ์ผ๋ก ๋ค์ ๋ํ๋ฉ๋๋ค. ๊ณ์ ์ฑ์ด๋ ์ฃผ๊ธฐ์ ํจํด์์ ๋ฐ์ํฉ๋๋ค."
|
| 96 |
+
}
|
| 97 |
+
return descriptions.get(drift_type, "์ ์ ์๋ ๋๋ฆฌํํธ ์ ํ")
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
plotly
|
| 3 |
+
numpy
|
visualizer.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import plotly.graph_objects as go
|
| 2 |
+
import numpy as np
|
| 3 |
+
from typing import Tuple
|
| 4 |
+
|
| 5 |
+
def create_drift_visualization(X: np.ndarray, y: np.ndarray, drift_points: np.ndarray, drift_type: str) -> go.Figure:
|
| 6 |
+
"""๋๋ฆฌํํธ ๋ฐ์ดํฐ๋ฅผ Plotly๋ก ์๊ฐํ"""
|
| 7 |
+
|
| 8 |
+
fig = go.Figure()
|
| 9 |
+
|
| 10 |
+
# ๋ฉ์ธ ๋ฐ์ดํฐ scatter plot
|
| 11 |
+
fig.add_trace(go.Scatter(
|
| 12 |
+
x=X,
|
| 13 |
+
y=y,
|
| 14 |
+
mode='markers',
|
| 15 |
+
name='Data Points',
|
| 16 |
+
marker=dict(
|
| 17 |
+
size=4,
|
| 18 |
+
color=np.arange(len(X)),
|
| 19 |
+
colorscale='Viridis',
|
| 20 |
+
showscale=True,
|
| 21 |
+
colorbar=dict(title="Time")
|
| 22 |
+
),
|
| 23 |
+
hovertemplate='X: %{x:.2f}<br>y: %{y:.2f}<extra></extra>'
|
| 24 |
+
))
|
| 25 |
+
|
| 26 |
+
# ๋๋ฆฌํํธ ๋ฐ์ ์ง์ ํ์
|
| 27 |
+
y_min, y_max = y.min(), y.max()
|
| 28 |
+
y_range = y_max - y_min
|
| 29 |
+
|
| 30 |
+
for i, drift_point in enumerate(drift_points):
|
| 31 |
+
fig.add_vline(
|
| 32 |
+
x=X[drift_point],
|
| 33 |
+
line_dash="dash",
|
| 34 |
+
line_color="red",
|
| 35 |
+
annotation_text=f"Drift {i+1}",
|
| 36 |
+
annotation_position="top"
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
# ๋ ์ด์์ ์ค์
|
| 40 |
+
title_map = {
|
| 41 |
+
"sudden": "Sudden (Abrupt) Drift - ๊ธ๊ฒฉํ ๋๋ฆฌํํธ",
|
| 42 |
+
"gradual": "Gradual Drift - ์ ์ง์ ๋๋ฆฌํํธ",
|
| 43 |
+
"incremental": "Incremental Drift - ์ฆ๋ถ์ ๋๋ฆฌํํธ",
|
| 44 |
+
"recurring": "Recurring Drift - ๋ฐ๋ณต์ ๋๋ฆฌํํธ"
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
fig.update_layout(
|
| 48 |
+
title=dict(
|
| 49 |
+
text=title_map.get(drift_type, "Concept Drift"),
|
| 50 |
+
x=0.5,
|
| 51 |
+
xanchor='center'
|
| 52 |
+
),
|
| 53 |
+
xaxis_title="Feature (X)",
|
| 54 |
+
yaxis_title="Target (y)",
|
| 55 |
+
hovermode='closest',
|
| 56 |
+
template='plotly_white',
|
| 57 |
+
height=600,
|
| 58 |
+
showlegend=True,
|
| 59 |
+
legend=dict(
|
| 60 |
+
yanchor="top",
|
| 61 |
+
y=0.99,
|
| 62 |
+
xanchor="left",
|
| 63 |
+
x=0.01
|
| 64 |
+
)
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
return fig
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def create_comparison_visualization(drift_data_dict: dict) -> go.Figure:
|
| 71 |
+
"""์ฌ๋ฌ ๋๋ฆฌํํธ ์ ํ์ ํ ๋ฒ์ ๋น๊ต"""
|
| 72 |
+
from plotly.subplots import make_subplots
|
| 73 |
+
|
| 74 |
+
fig = make_subplots(
|
| 75 |
+
rows=2, cols=2,
|
| 76 |
+
subplot_titles=("Sudden Drift", "Gradual Drift", "Incremental Drift", "Recurring Drift")
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
positions = [(1, 1), (1, 2), (2, 1), (2, 2)]
|
| 80 |
+
drift_types = ["sudden", "gradual", "incremental", "recurring"]
|
| 81 |
+
|
| 82 |
+
for (row, col), drift_type in zip(positions, drift_types):
|
| 83 |
+
if drift_type in drift_data_dict:
|
| 84 |
+
X, y, drift_points = drift_data_dict[drift_type]
|
| 85 |
+
|
| 86 |
+
fig.add_trace(
|
| 87 |
+
go.Scatter(
|
| 88 |
+
x=X,
|
| 89 |
+
y=y,
|
| 90 |
+
mode='markers',
|
| 91 |
+
marker=dict(size=3, color=np.arange(len(X)), colorscale='Viridis'),
|
| 92 |
+
showlegend=False
|
| 93 |
+
),
|
| 94 |
+
row=row, col=col
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
# ๋๋ฆฌํํธ ์ง์ ํ์
|
| 98 |
+
for drift_point in drift_points:
|
| 99 |
+
fig.add_vline(
|
| 100 |
+
x=X[drift_point],
|
| 101 |
+
line_dash="dash",
|
| 102 |
+
line_color="red",
|
| 103 |
+
row=row, col=col
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
fig.update_xaxes(title_text="X")
|
| 107 |
+
fig.update_yaxes(title_text="y")
|
| 108 |
+
fig.update_layout(height=800, title_text="Concept Drift Types Comparison", showlegend=False)
|
| 109 |
+
|
| 110 |
+
return fig
|