Spaces:

yoon-gu
/

concept-drift-simulator

Sleeping

Yoon-gu Hwang Claude commited on 16 days ago

Commit

3e3dc68

0 Parent(s):

Initial commit: Concept Drift Simulator

- 4가지 드리프트 유형 시뮬레이션 (Sudden, Gradual, Incremental, Recurring)
- Plotly 기반 인터랙티브 시각화
- Gradio UI 구성
- 드리프트 분석 기능

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <[email protected]>

Files changed (7) hide show

.gitignore +17 -0
README.md +56 -0
analyzer.py +81 -0
app.py +92 -0
drift_simulator.py +97 -0
requirements.txt +3 -0
visualizer.py +110 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,17 @@

+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+venv/
+ENV/
+.venv
+*.egg-info/
+dist/
+build/
+.DS_Store
+.idea/
+.vscode/
+*.log
+flagged/

README.md ADDED Viewed

	@@ -0,0 +1,56 @@

+# Concept Drift Simulator
+컨셉 드리프트(Concept Drift)의 4가지 주요 유형을 시뮬레이션하고 시각화하는 인터랙티브 웹 애플리케이션입니다.
+## 주요 기능
+4가지 컨셉 드리프트 유형 시뮬레이션:
+1. **Sudden (급격한 드리프트)**: 특정 시점에서 데이터 분포가 갑자기 변경
+2. **Gradual (점진적 드리프트)**: 이전 분포와 새 분포가 섞이며 천천히 전환
+3. **Incremental (증분적 드리프트)**: 작은 단계로 변화가 발생하여 계단식 패턴 형성
+4. **Recurring (반복적 드리프트)**: 이전 분포가 주기적으로 다시 나타남
+## 기술 스택
+- **Gradio**: 인터랙티브 UI 구성
+- **Plotly**: 동적 시각화
+- **NumPy**: 데이터 생성 및 분석
+## 설치 및 실행
+```bash
+# 의존성 설치
+pip install -r requirements.txt
+# 앱 실행
+python app.py
+```
+브라우저에서 `http://localhost:7860` 접속
+## 사용 방법
+1. 왼쪽 패널에서 드리프트 유형 선택
+2. "시뮬레이션 실행" 버튼 클릭
+3. 오른쪽에 인터랙티브 Plotly 차트와 분석 결과 확인
+## 프로젝트 구조
+```
+concept-drift/
+├── app.py              # Gradio 애플리케이션
+├── drift_simulator.py  # 드리프트 데이터 생성
+├── visualizer.py       # Plotly 시각화
+├── analyzer.py         # 드리프트 분석
+├── requirements.txt    # 패키지 의존성
+└── README.md
+```
+## Hugging Face Spaces
+배포된 앱: (배포 후 링크 추가)
+## 라이선스
+MIT License

analyzer.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import numpy as np
+from typing import Dict, Tuple
+def analyze_drift(X: np.ndarray, y: np.ndarray, drift_points: np.ndarray, drift_type: str) -> Dict[str, any]:
+    """드리프트 데이터 분석"""
+    analysis = {
+        "drift_type": drift_type,
+        "total_samples": len(X),
+        "num_drift_points": len(drift_points),
+        "drift_locations": drift_points.tolist() if len(drift_points) > 0 else [],
+    }
+    # 전체 통계
+    analysis["mean_y"] = float(np.mean(y))
+    analysis["std_y"] = float(np.std(y))
+    analysis["min_y"] = float(np.min(y))
+    analysis["max_y"] = float(np.max(y))
+    # 세그먼트별 분석
+    segments = []
+    segment_boundaries = [0] + drift_points.tolist() + [len(X)]
+    for i in range(len(segment_boundaries) - 1):
+        start = segment_boundaries[i]
+        end = segment_boundaries[i + 1]
+        segment_y = y[start:end]
+        segment_X = X[start:end]
+        # 선형 회귀 계수 계산 (기울기)
+        if len(segment_X) > 1:
+            coeffs = np.polyfit(segment_X, segment_y, 1)
+            slope = float(coeffs[0])
+            intercept = float(coeffs[1])
+        else:
+            slope = 0.0
+            intercept = float(segment_y[0]) if len(segment_y) > 0 else 0.0
+        segments.append({
+            "segment_id": i,
+            "start_idx": int(start),
+            "end_idx": int(end),
+            "mean": float(np.mean(segment_y)),
+            "std": float(np.std(segment_y)),
+            "slope": slope,
+            "intercept": intercept
+        })
+    analysis["segments"] = segments
+    return analysis
+def format_analysis_summary(analysis: Dict) -> str:
+    """분석 결과를 사람이 읽기 쉬운 형식으로 포맷"""
+    summary = f"""
+## 드리프트 분석 결과
+**드리프트 유형:** {analysis['drift_type'].upper()}
+**전체 데이터:**
+- 총 샘플 수: {analysis['total_samples']}
+- 드리프트 발생 횟수: {analysis['num_drift_points']}
+- 평균: {analysis['mean_y']:.2f}
+- 표준편차: {analysis['std_y']:.2f}
+- 범위: [{analysis['min_y']:.2f}, {analysis['max_y']:.2f}]
+**세그먼트별 분석:**
+"""
+    for seg in analysis['segments']:
+        summary += f"""
+**세그먼트 {seg['segment_id'] + 1}** (샘플 {seg['start_idx']}-{seg['end_idx']})
+- 평균: {seg['mean']:.2f}
+- 표준편차: {seg['std']:.2f}
+- 관계식: y = {seg['slope']:.2f}x + {seg['intercept']:.2f}
+"""
+    return summary

app.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import gradio as gr
+from drift_simulator import (
+    generate_sudden_drift,
+    generate_gradual_drift,
+    generate_incremental_drift,
+    generate_recurring_drift,
+    get_drift_description
+)
+from visualizer import create_drift_visualization
+from analyzer import analyze_drift, format_analysis_summary
+def simulate_and_visualize(drift_type: str):
+    """드리프트 시뮬레이션 + 시각화 + 분석"""
+    # 드리프트 타입에 따라 데이터 생성
+    if drift_type == "Sudden (급격한 드리프트)":
+        X, y, drift_points = generate_sudden_drift(n_samples=1000, drift_point=500)
+        drift_key = "sudden"
+    elif drift_type == "Gradual (점진적 드리프트)":
+        X, y, drift_points = generate_gradual_drift(n_samples=1000, drift_start=300, drift_end=700)
+        drift_key = "gradual"
+    elif drift_type == "Incremental (증분적 드리프트)":
+        X, y, drift_points = generate_incremental_drift(n_samples=1000, n_steps=5)
+        drift_key = "incremental"
+    elif drift_type == "Recurring (반복적 드리프트)":
+        X, y, drift_points = generate_recurring_drift(n_samples=1000, cycle_length=250)
+        drift_key = "recurring"
+    else:
+        return None, "올바른 드리프트 타입을 선택해주세요."
+    # 시각화
+    fig = create_drift_visualization(X, y, drift_points, drift_key)
+    # 분석
+    analysis = analyze_drift(X, y, drift_points, drift_key)
+    summary = format_analysis_summary(analysis)
+    # 설명 추가
+    description = get_drift_description(drift_key)
+    full_summary = f"### {description}\n\n{summary}"
+    return fig, full_summary
+# Gradio 인터페이스
+with gr.Blocks(title="Concept Drift Simulator", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # Concept Drift Simulator
+        컨셉 드리프트(Concept Drift)의 4가지 유형을 시뮬레이션하고 시각화합니다.
+        드리프트 유형을 선택하면 자동으로 데이터를 생성하고 인터랙티브 차트를 표시합니다.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            drift_type_radio = gr.Radio(
+                choices=[
+                    "Sudden (급격한 드리프트)",
+                    "Gradual (점진적 드리프트)",
+                    "Incremental (증분적 드리프트)",
+                    "Recurring (반복적 드리프트)"
+                ],
+                label="드리프트 유형 선택",
+                value="Sudden (급격한 드리프트)"
+            )
+            simulate_btn = gr.Button("시뮬레이션 실행", variant="primary")
+        with gr.Column(scale=2):
+            plot_output = gr.Plot(label="드리프트 시각화")
+    analysis_output = gr.Markdown(label="분석 결과")
+    # 이벤트 핸들러
+    simulate_btn.click(
+        fn=simulate_and_visualize,
+        inputs=drift_type_radio,
+        outputs=[plot_output, analysis_output]
+    )
+    # 초기 로드
+    demo.load(
+        fn=simulate_and_visualize,
+        inputs=drift_type_radio,
+        outputs=[plot_output, analysis_output]
+    )
+if __name__ == "__main__":
+    demo.launch()

drift_simulator.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import numpy as np
+from typing import Tuple
+def generate_sudden_drift(n_samples: int = 1000, drift_point: int = 500) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """급격한 드리프트: t 시점에서 갑자기 데이터 분포 변경"""
+    X = np.linspace(0, 10, n_samples)
+    y = np.zeros(n_samples)
+    # Before drift: y = 2*X + noise
+    y[:drift_point] = 2 * X[:drift_point] + np.random.normal(0, 1, drift_point)
+    # After drift: y = -X + 5 + noise (완전히 다른 관계)
+    y[drift_point:] = -X[drift_point:] + 5 + np.random.normal(0, 1, n_samples - drift_point)
+    drift_points = np.array([drift_point])
+    return X, y, drift_points
+def generate_gradual_drift(n_samples: int = 1000, drift_start: int = 300, drift_end: int = 700) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """점진적 드리프트: 두 분포가 섞이며 천천히 전환"""
+    X = np.linspace(0, 10, n_samples)
+    y = np.zeros(n_samples)
+    # Before drift: y = 2*X
+    y[:drift_start] = 2 * X[:drift_start] + np.random.normal(0, 1, drift_start)
+    # Gradual transition: mixture of old and new concepts
+    transition_length = drift_end - drift_start
+    for i in range(drift_start, drift_end):
+        weight = (i - drift_start) / transition_length
+        old_concept = 2 * X[i] + np.random.normal(0, 1)
+        new_concept = -X[i] + 5 + np.random.normal(0, 1)
+        y[i] = (1 - weight) * old_concept + weight * new_concept
+    # After drift: y = -X + 5
+    y[drift_end:] = -X[drift_end:] + 5 + np.random.normal(0, 1, n_samples - drift_end)
+    drift_points = np.array([drift_start, drift_end])
+    return X, y, drift_points
+def generate_incremental_drift(n_samples: int = 1000, n_steps: int = 5) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """증분적 드리프트: 계단식으로 작은 변화가 누적"""
+    X = np.linspace(0, 10, n_samples)
+    y = np.zeros(n_samples)
+    step_size = n_samples // (n_steps + 1)
+    drift_points = []
+    for step in range(n_steps + 1):
+        start_idx = step * step_size
+        end_idx = (step + 1) * step_size if step < n_steps else n_samples
+        # 각 단계마다 기울기가 조금씩 변화
+        slope = 2 - (step / n_steps) * 3  # 2에서 -1로 점진적 변화
+        intercept = (step / n_steps) * 5  # 0에서 5로 점진적 변화
+        y[start_idx:end_idx] = slope * X[start_idx:end_idx] + intercept + np.random.normal(0, 1, end_idx - start_idx)
+        if step > 0:
+            drift_points.append(start_idx)
+    return X, y, np.array(drift_points)
+def generate_recurring_drift(n_samples: int = 1000, cycle_length: int = 250) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """반복적 드리프트: 이전 분포가 주기적으로 재등장"""
+    X = np.linspace(0, 10, n_samples)
+    y = np.zeros(n_samples)
+    drift_points = []
+    for i in range(n_samples):
+        cycle_pos = i % cycle_length
+        if cycle_pos < cycle_length // 2:
+            # Concept A: y = 2*X
+            y[i] = 2 * X[i] + np.random.normal(0, 1)
+        else:
+            # Concept B: y = -X + 5
+            y[i] = -X[i] + 5 + np.random.normal(0, 1)
+        if cycle_pos == cycle_length // 2:
+            drift_points.append(i)
+    return X, y, np.array(drift_points)
+def get_drift_description(drift_type: str) -> str:
+    """드리프트 유형별 설명 반환"""
+    descriptions = {
+        "sudden": "급격한 드리프트: 특정 시점에서 데이터 분포가 갑자기 변경됩니다. 예: 팬데믹, 정책 변경 등",
+        "gradual": "점진적 드리프트: 이전 분포와 새 분포가 섞이며 천천히 전환됩니다. 전환 기간 동안 두 컨셉이 공존합니다.",
+        "incremental": "증분적 드리프트: 작은 단계로 변화가 발생하여 계단식 패턴을 형성합니다.",
+        "recurring": "반복적 드리프트: 이전 분포가 주기적으로 다시 나타납니다. 계절성이나 주기적 패턴에서 발생합니다."
+    }
+    return descriptions.get(drift_type, "알 수 없는 드리프트 유형")

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+plotly
+numpy

visualizer.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import plotly.graph_objects as go
+import numpy as np
+from typing import Tuple
+def create_drift_visualization(X: np.ndarray, y: np.ndarray, drift_points: np.ndarray, drift_type: str) -> go.Figure:
+    """드리프트 데이터를 Plotly로 시각화"""
+    fig = go.Figure()
+    # 메인 데이터 scatter plot
+    fig.add_trace(go.Scatter(
+        x=X,
+        y=y,
+        mode='markers',
+        name='Data Points',
+        marker=dict(
+            size=4,
+            color=np.arange(len(X)),
+            colorscale='Viridis',
+            showscale=True,
+            colorbar=dict(title="Time")
+        ),
+        hovertemplate='X: %{x:.2f}<br>y: %{y:.2f}<extra></extra>'
+    ))
+    # 드리프트 발생 지점 표시
+    y_min, y_max = y.min(), y.max()
+    y_range = y_max - y_min
+    for i, drift_point in enumerate(drift_points):
+        fig.add_vline(
+            x=X[drift_point],
+            line_dash="dash",
+            line_color="red",
+            annotation_text=f"Drift {i+1}",
+            annotation_position="top"
+        )
+    # 레이아웃 설정
+    title_map = {
+        "sudden": "Sudden (Abrupt) Drift - 급격한 드리프트",
+        "gradual": "Gradual Drift - 점진적 드리프트",
+        "incremental": "Incremental Drift - 증분적 드리프트",
+        "recurring": "Recurring Drift - 반복적 드리프트"
+    }
+    fig.update_layout(
+        title=dict(
+            text=title_map.get(drift_type, "Concept Drift"),
+            x=0.5,
+            xanchor='center'
+        ),
+        xaxis_title="Feature (X)",
+        yaxis_title="Target (y)",
+        hovermode='closest',
+        template='plotly_white',
+        height=600,
+        showlegend=True,
+        legend=dict(
+            yanchor="top",
+            y=0.99,
+            xanchor="left",
+            x=0.01
+        )
+    )
+    return fig
+def create_comparison_visualization(drift_data_dict: dict) -> go.Figure:
+    """여러 드리프트 유형을 한 번에 비교"""
+    from plotly.subplots import make_subplots
+    fig = make_subplots(
+        rows=2, cols=2,
+        subplot_titles=("Sudden Drift", "Gradual Drift", "Incremental Drift", "Recurring Drift")
+    )
+    positions = [(1, 1), (1, 2), (2, 1), (2, 2)]
+    drift_types = ["sudden", "gradual", "incremental", "recurring"]
+    for (row, col), drift_type in zip(positions, drift_types):
+        if drift_type in drift_data_dict:
+            X, y, drift_points = drift_data_dict[drift_type]
+            fig.add_trace(
+                go.Scatter(
+                    x=X,
+                    y=y,
+                    mode='markers',
+                    marker=dict(size=3, color=np.arange(len(X)), colorscale='Viridis'),
+                    showlegend=False
+                ),
+                row=row, col=col
+            )
+            # 드리프트 지점 표시
+            for drift_point in drift_points:
+                fig.add_vline(
+                    x=X[drift_point],
+                    line_dash="dash",
+                    line_color="red",
+                    row=row, col=col
+                )
+    fig.update_xaxes(title_text="X")
+    fig.update_yaxes(title_text="y")
+    fig.update_layout(height=800, title_text="Concept Drift Types Comparison", showlegend=False)
+    return fig