Spaces:
Sleeping
Sleeping
Commit
·
1e0d7f9
1
Parent(s):
82e47b6
Set all analysis default TRUE and cleanup unused files
Browse files- .gitignore +9 -0
- app/api/routes.py +4 -4
- backup_old_files/REDIS_CONFIG_NOTES.md +0 -312
- tempo.py +0 -154
- upload_model_to_hf.py +0 -175
.gitignore
CHANGED
|
@@ -63,3 +63,12 @@ Thumbs.db
|
|
| 63 |
# Jupyter
|
| 64 |
.ipynb_checkpoints/
|
| 65 |
*.ipynb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
# Jupyter
|
| 64 |
.ipynb_checkpoints/
|
| 65 |
*.ipynb
|
| 66 |
+
|
| 67 |
+
# Local testing files only
|
| 68 |
+
local_only/
|
| 69 |
+
test_*.py
|
| 70 |
+
*.bat
|
| 71 |
+
ffmpeg.exe
|
| 72 |
+
tempo.py
|
| 73 |
+
upload_model_to_hf.py
|
| 74 |
+
backup_old_files/
|
app/api/routes.py
CHANGED
|
@@ -27,8 +27,8 @@ async def analyze_audio(
|
|
| 27 |
analyze_tempo: bool = Form(True),
|
| 28 |
analyze_articulation: bool = Form(True),
|
| 29 |
analyze_structure: bool = Form(True),
|
| 30 |
-
analyze_keywords: bool = Form(
|
| 31 |
-
analyze_profanity: bool = Form(
|
| 32 |
):
|
| 33 |
"""
|
| 34 |
Submit audio file untuk analisis
|
|
@@ -42,8 +42,8 @@ async def analyze_audio(
|
|
| 42 |
- analyze_tempo: Analisis tempo (default: true)
|
| 43 |
- analyze_articulation: Analisis artikulasi (default: true)
|
| 44 |
- analyze_structure: Analisis struktur (default: true)
|
| 45 |
-
- analyze_keywords: Analisis kata kunci (default:
|
| 46 |
-
- analyze_profanity: Deteksi kata tidak senonoh (default:
|
| 47 |
|
| 48 |
Returns task_id yang bisa digunakan untuk check status
|
| 49 |
"""
|
|
|
|
| 27 |
analyze_tempo: bool = Form(True),
|
| 28 |
analyze_articulation: bool = Form(True),
|
| 29 |
analyze_structure: bool = Form(True),
|
| 30 |
+
analyze_keywords: bool = Form(True), # Default TRUE
|
| 31 |
+
analyze_profanity: bool = Form(True) # Default TRUE
|
| 32 |
):
|
| 33 |
"""
|
| 34 |
Submit audio file untuk analisis
|
|
|
|
| 42 |
- analyze_tempo: Analisis tempo (default: true)
|
| 43 |
- analyze_articulation: Analisis artikulasi (default: true)
|
| 44 |
- analyze_structure: Analisis struktur (default: true)
|
| 45 |
+
- analyze_keywords: Analisis kata kunci (default: true) - otomatis skip jika tidak ada topic_id/custom_keywords
|
| 46 |
+
- analyze_profanity: Deteksi kata tidak senonoh (default: true)
|
| 47 |
|
| 48 |
Returns task_id yang bisa digunakan untuk check status
|
| 49 |
"""
|
backup_old_files/REDIS_CONFIG_NOTES.md
DELETED
|
@@ -1,312 +0,0 @@
|
|
| 1 |
-
# 🔴 Redis Configuration - Technical Notes
|
| 2 |
-
|
| 3 |
-
## ✅ Configuration Summary
|
| 4 |
-
|
| 5 |
-
Konfigurasi Redis untuk Swara API sudah **BENAR** dan siap untuk deployment ke Hugging Face Spaces.
|
| 6 |
-
|
| 7 |
-
---
|
| 8 |
-
|
| 9 |
-
## 📋 Redis Settings
|
| 10 |
-
|
| 11 |
-
### 1. **Configuration File** (`app/config.py`)
|
| 12 |
-
|
| 13 |
-
```python
|
| 14 |
-
REDIS_HOST: str = os.getenv("REDIS_HOST", "localhost")
|
| 15 |
-
REDIS_PORT: int = int(os.getenv("REDIS_PORT", "6379"))
|
| 16 |
-
REDIS_DB: int = int(os.getenv("REDIS_DB", "0"))
|
| 17 |
-
REDIS_PASSWORD: str = os.getenv("REDIS_PASSWORD", "")
|
| 18 |
-
```
|
| 19 |
-
|
| 20 |
-
✅ **Correct**: Defaults ke `localhost:6379` untuk single-container deployment
|
| 21 |
-
|
| 22 |
-
---
|
| 23 |
-
|
| 24 |
-
### 2. **Redis Client** (`app/core/redis_client.py`)
|
| 25 |
-
|
| 26 |
-
**FIXED Issues:**
|
| 27 |
-
|
| 28 |
-
- ❌ **Before**: `decode_responses=True` → Caused RQ errors
|
| 29 |
-
- ✅ **After**: Removed `decode_responses` → RQ compatible
|
| 30 |
-
|
| 31 |
-
**Current Configuration:**
|
| 32 |
-
|
| 33 |
-
```python
|
| 34 |
-
def get_redis_connection():
|
| 35 |
-
redis_kwargs = {
|
| 36 |
-
'host': settings.REDIS_HOST,
|
| 37 |
-
'port': settings.REDIS_PORT,
|
| 38 |
-
'db': settings.REDIS_DB,
|
| 39 |
-
}
|
| 40 |
-
|
| 41 |
-
if settings.REDIS_PASSWORD:
|
| 42 |
-
redis_kwargs['password'] = settings.REDIS_PASSWORD
|
| 43 |
-
|
| 44 |
-
return redis.Redis(**redis_kwargs) # No decode_responses!
|
| 45 |
-
```
|
| 46 |
-
|
| 47 |
-
✅ **Benefits:**
|
| 48 |
-
|
| 49 |
-
- Compatible with RQ (Redis Queue)
|
| 50 |
-
- Proper bytes handling
|
| 51 |
-
- Password support (optional)
|
| 52 |
-
- Clean connection management
|
| 53 |
-
|
| 54 |
-
**New Functions:**
|
| 55 |
-
|
| 56 |
-
```python
|
| 57 |
-
def check_redis_connection():
|
| 58 |
-
"""Health check function"""
|
| 59 |
-
try:
|
| 60 |
-
conn = get_redis_connection()
|
| 61 |
-
conn.ping()
|
| 62 |
-
return True, None
|
| 63 |
-
except Exception as e:
|
| 64 |
-
return False, str(e)
|
| 65 |
-
```
|
| 66 |
-
|
| 67 |
-
✅ **Use case**: Health checks & startup validation
|
| 68 |
-
|
| 69 |
-
---
|
| 70 |
-
|
| 71 |
-
### 3. **Startup Script** (`start.sh`)
|
| 72 |
-
|
| 73 |
-
**Improvements Made:**
|
| 74 |
-
|
| 75 |
-
**Before:**
|
| 76 |
-
|
| 77 |
-
```bash
|
| 78 |
-
redis-server --daemonize yes
|
| 79 |
-
until redis-cli ping; do
|
| 80 |
-
echo "Waiting for Redis..."
|
| 81 |
-
sleep 1
|
| 82 |
-
done
|
| 83 |
-
```
|
| 84 |
-
|
| 85 |
-
**After:**
|
| 86 |
-
|
| 87 |
-
```bash
|
| 88 |
-
# Set environment variables
|
| 89 |
-
export REDIS_HOST=localhost
|
| 90 |
-
export REDIS_PORT=6379
|
| 91 |
-
export REDIS_DB=0
|
| 92 |
-
|
| 93 |
-
# Start Redis with specific binding
|
| 94 |
-
redis-server --daemonize yes --bind 127.0.0.1 --port 6379
|
| 95 |
-
|
| 96 |
-
# Wait with timeout
|
| 97 |
-
REDIS_TIMEOUT=30
|
| 98 |
-
until redis-cli -h localhost -p 6379 ping 2>/dev/null | grep -q PONG; do
|
| 99 |
-
if [ $ELAPSED -ge $REDIS_TIMEOUT ]; then
|
| 100 |
-
echo "ERROR: Redis failed to start"
|
| 101 |
-
exit 1
|
| 102 |
-
fi
|
| 103 |
-
sleep 2
|
| 104 |
-
done
|
| 105 |
-
```
|
| 106 |
-
|
| 107 |
-
✅ **Improvements:**
|
| 108 |
-
|
| 109 |
-
- Environment variables explicitly set
|
| 110 |
-
- Timeout protection (30s max)
|
| 111 |
-
- Specific binding to localhost
|
| 112 |
-
- Better error handling
|
| 113 |
-
- Clearer logging
|
| 114 |
-
|
| 115 |
-
---
|
| 116 |
-
|
| 117 |
-
### 4. **Worker** (`app/worker.py`)
|
| 118 |
-
|
| 119 |
-
**Added Retry Logic:**
|
| 120 |
-
|
| 121 |
-
```python
|
| 122 |
-
def run_worker():
|
| 123 |
-
# Wait for Redis with retries
|
| 124 |
-
max_retries = 30
|
| 125 |
-
for attempt in range(1, max_retries + 1):
|
| 126 |
-
is_connected, error_msg = check_redis_connection()
|
| 127 |
-
if is_connected:
|
| 128 |
-
break
|
| 129 |
-
time.sleep(2)
|
| 130 |
-
|
| 131 |
-
# Then start worker
|
| 132 |
-
worker = Worker([queue], connection=redis_conn)
|
| 133 |
-
worker.work()
|
| 134 |
-
```
|
| 135 |
-
|
| 136 |
-
✅ **Benefits:**
|
| 137 |
-
|
| 138 |
-
- Graceful startup
|
| 139 |
-
- Handles Redis not ready yet
|
| 140 |
-
- Clear error messages
|
| 141 |
-
- Auto-retry mechanism
|
| 142 |
-
|
| 143 |
-
---
|
| 144 |
-
|
| 145 |
-
### 5. **Health Check** (`app/api/routes.py`)
|
| 146 |
-
|
| 147 |
-
**Improved Endpoint:**
|
| 148 |
-
|
| 149 |
-
```python
|
| 150 |
-
@router.get("/health")
|
| 151 |
-
async def health_check():
|
| 152 |
-
is_connected, error_msg = check_redis_connection()
|
| 153 |
-
|
| 154 |
-
return {
|
| 155 |
-
"status": "healthy" if is_connected else "degraded",
|
| 156 |
-
"redis": "healthy" if is_connected else f"unhealthy: {error_msg}",
|
| 157 |
-
"version": settings.VERSION
|
| 158 |
-
}
|
| 159 |
-
```
|
| 160 |
-
|
| 161 |
-
✅ **Benefits:**
|
| 162 |
-
|
| 163 |
-
- Real-time Redis status
|
| 164 |
-
- Degraded state detection
|
| 165 |
-
- Useful for monitoring
|
| 166 |
-
|
| 167 |
-
---
|
| 168 |
-
|
| 169 |
-
## 🏗️ Architecture for HF Spaces
|
| 170 |
-
|
| 171 |
-
```
|
| 172 |
-
┌─────────────────────────────────────────┐
|
| 173 |
-
│ Hugging Face Space (Single Container) │
|
| 174 |
-
│ │
|
| 175 |
-
│ ┌──────────────────────────────────┐ │
|
| 176 |
-
│ │ Redis Server (localhost:6379) │ │
|
| 177 |
-
│ │ - In-memory data store │ │
|
| 178 |
-
│ │ - Task queue │ │
|
| 179 |
-
│ │ - Result storage (24h TTL) │ │
|
| 180 |
-
│ └─────────┬────────────────────────┘ │
|
| 181 |
-
│ │ │
|
| 182 |
-
│ ┌─────────▼───────────┐ │
|
| 183 |
-
│ │ RQ Worker │ │
|
| 184 |
-
│ │ - Process tasks │ │
|
| 185 |
-
│ │ - Run AI models │ │
|
| 186 |
-
│ └─────────┬───────────┘ │
|
| 187 |
-
│ │ │
|
| 188 |
-
│ ┌─────────▼───────────┐ │
|
| 189 |
-
│ │ FastAPI App │ │
|
| 190 |
-
│ │ - REST API │ │
|
| 191 |
-
│ │ - Port 7860 │ │
|
| 192 |
-
│ └─────────────────────┘ │
|
| 193 |
-
│ │
|
| 194 |
-
└──────────────────────────────────────────┘
|
| 195 |
-
▲
|
| 196 |
-
│ HTTP Requests
|
| 197 |
-
│
|
| 198 |
-
┌────┴─────┐
|
| 199 |
-
│ Client │
|
| 200 |
-
└──────────┘
|
| 201 |
-
```
|
| 202 |
-
|
| 203 |
-
---
|
| 204 |
-
|
| 205 |
-
## 🔍 Configuration Validation
|
| 206 |
-
|
| 207 |
-
### Check 1: Environment Variables
|
| 208 |
-
|
| 209 |
-
```bash
|
| 210 |
-
# In HF Spaces, these are auto-set by start.sh:
|
| 211 |
-
REDIS_HOST=localhost
|
| 212 |
-
REDIS_PORT=6379
|
| 213 |
-
REDIS_DB=0
|
| 214 |
-
```
|
| 215 |
-
|
| 216 |
-
✅ **Status**: Configured in `start.sh`
|
| 217 |
-
|
| 218 |
-
### Check 2: Redis Connection
|
| 219 |
-
|
| 220 |
-
```python
|
| 221 |
-
# Test connection
|
| 222 |
-
from app.core.redis_client import check_redis_connection
|
| 223 |
-
is_connected, error = check_redis_connection()
|
| 224 |
-
print(f"Connected: {is_connected}")
|
| 225 |
-
```
|
| 226 |
-
|
| 227 |
-
✅ **Status**: Function available
|
| 228 |
-
|
| 229 |
-
### Check 3: Queue Setup
|
| 230 |
-
|
| 231 |
-
```python
|
| 232 |
-
# Test queue
|
| 233 |
-
from app.core.redis_client import get_queue
|
| 234 |
-
queue = get_queue()
|
| 235 |
-
print(f"Queue: {queue.name}")
|
| 236 |
-
```
|
| 237 |
-
|
| 238 |
-
✅ **Status**: Queue name: `audio_analysis`
|
| 239 |
-
|
| 240 |
-
---
|
| 241 |
-
|
| 242 |
-
## 🚨 Common Issues & Solutions
|
| 243 |
-
|
| 244 |
-
### Issue 1: "Connection refused"
|
| 245 |
-
|
| 246 |
-
**Cause**: Redis not started yet
|
| 247 |
-
**Solution**: ✅ Fixed with retry logic in worker
|
| 248 |
-
|
| 249 |
-
### Issue 2: "decode_responses error"
|
| 250 |
-
|
| 251 |
-
**Cause**: RQ doesn't support `decode_responses=True`
|
| 252 |
-
**Solution**: ✅ Fixed by removing from connection
|
| 253 |
-
|
| 254 |
-
### Issue 3: Worker timeout
|
| 255 |
-
|
| 256 |
-
**Cause**: Long-running tasks
|
| 257 |
-
**Solution**: ✅ Set `JOB_TIMEOUT=3600` (1 hour)
|
| 258 |
-
|
| 259 |
-
### Issue 4: Results disappear
|
| 260 |
-
|
| 261 |
-
**Cause**: Default TTL too short
|
| 262 |
-
**Solution**: ✅ Set `RESULT_TTL=86400` (24 hours)
|
| 263 |
-
|
| 264 |
-
---
|
| 265 |
-
|
| 266 |
-
## 📊 Redis Performance Settings
|
| 267 |
-
|
| 268 |
-
### Current Settings:
|
| 269 |
-
|
| 270 |
-
```python
|
| 271 |
-
QUEUE_NAME: str = "audio_analysis"
|
| 272 |
-
JOB_TIMEOUT: int = 3600 # 1 hour
|
| 273 |
-
RESULT_TTL: int = 86400 # 24 hours
|
| 274 |
-
```
|
| 275 |
-
|
| 276 |
-
### Recommended for Production:
|
| 277 |
-
|
| 278 |
-
```python
|
| 279 |
-
# For high traffic:
|
| 280 |
-
RESULT_TTL: int = 3600 # 1 hour (save memory)
|
| 281 |
-
|
| 282 |
-
# For long audio:
|
| 283 |
-
JOB_TIMEOUT: int = 7200 # 2 hours
|
| 284 |
-
```
|
| 285 |
-
|
| 286 |
-
---
|
| 287 |
-
|
| 288 |
-
## ✅ Final Checklist
|
| 289 |
-
|
| 290 |
-
- [x] Redis connection without `decode_responses`
|
| 291 |
-
- [x] Environment variables in `start.sh`
|
| 292 |
-
- [x] Retry logic in worker
|
| 293 |
-
- [x] Health check endpoint
|
| 294 |
-
- [x] Timeout protection
|
| 295 |
-
- [x] Error handling
|
| 296 |
-
- [x] Graceful startup sequence
|
| 297 |
-
- [x] Proper binding to localhost
|
| 298 |
-
- [x] TTL configuration
|
| 299 |
-
|
| 300 |
-
---
|
| 301 |
-
|
| 302 |
-
## 🎯 Status: READY FOR DEPLOYMENT
|
| 303 |
-
|
| 304 |
-
Semua konfigurasi Redis sudah **BENAR** dan **OPTIMAL** untuk:
|
| 305 |
-
|
| 306 |
-
- ✅ Hugging Face Spaces (single container)
|
| 307 |
-
- ✅ Local development
|
| 308 |
-
- ✅ Production deployment
|
| 309 |
-
- ✅ High availability
|
| 310 |
-
- ✅ Error recovery
|
| 311 |
-
|
| 312 |
-
**No further Redis configuration needed!** 🚀
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tempo.py
DELETED
|
@@ -1,154 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
tempo.py
|
| 3 |
-
Analisis Tempo dan Jeda Bicara menggunakan Silero VAD
|
| 4 |
-
"""
|
| 5 |
-
|
| 6 |
-
import torch
|
| 7 |
-
import pandas as pd
|
| 8 |
-
from typing import Dict, List
|
| 9 |
-
import warnings
|
| 10 |
-
warnings.filterwarnings('ignore')
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
class TempoAnalyzer:
|
| 14 |
-
"""Analisis tempo dan jeda bicara"""
|
| 15 |
-
|
| 16 |
-
def __init__(self):
|
| 17 |
-
"""Initialize Silero VAD model"""
|
| 18 |
-
print("🔄 Loading Silero VAD model...")
|
| 19 |
-
torch.set_num_threads(1)
|
| 20 |
-
self.model, utils = torch.hub.load(
|
| 21 |
-
repo_or_dir='snakers4/silero-vad',
|
| 22 |
-
model='silero_vad',
|
| 23 |
-
force_reload=False
|
| 24 |
-
)
|
| 25 |
-
(self.get_speech_timestamps,
|
| 26 |
-
self.save_audio,
|
| 27 |
-
self.read_audio,
|
| 28 |
-
self.VADIterator,
|
| 29 |
-
self.collect_chunks) = utils
|
| 30 |
-
print("✅ Silero VAD model loaded!\n")
|
| 31 |
-
|
| 32 |
-
def analyze_tempo(self, audio_path: str, sampling_rate: int = 16000) -> Dict:
|
| 33 |
-
"""
|
| 34 |
-
Analisis tempo dan jeda dari file audio
|
| 35 |
-
|
| 36 |
-
Args:
|
| 37 |
-
audio_path: Path ke file audio
|
| 38 |
-
sampling_rate: Sample rate audio (default: 16000)
|
| 39 |
-
|
| 40 |
-
Returns:
|
| 41 |
-
Dict berisi hasil analisis lengkap
|
| 42 |
-
"""
|
| 43 |
-
print(f"🎧 Analyzing tempo: {audio_path}")
|
| 44 |
-
|
| 45 |
-
# Load audio
|
| 46 |
-
wav = self.read_audio(audio_path)
|
| 47 |
-
|
| 48 |
-
# Deteksi segmen bicara
|
| 49 |
-
speech_timestamps = self.get_speech_timestamps(
|
| 50 |
-
wav, self.model, sampling_rate=sampling_rate
|
| 51 |
-
)
|
| 52 |
-
|
| 53 |
-
# Buat daftar data analisis
|
| 54 |
-
data = []
|
| 55 |
-
total_pause = 0
|
| 56 |
-
total_score = 0
|
| 57 |
-
num_pauses = 0
|
| 58 |
-
|
| 59 |
-
for i, seg in enumerate(speech_timestamps):
|
| 60 |
-
start_time = seg['start'] / sampling_rate
|
| 61 |
-
end_time = seg['end'] / sampling_rate
|
| 62 |
-
duration = end_time - start_time
|
| 63 |
-
|
| 64 |
-
if i == 0:
|
| 65 |
-
pause_before = start_time # jeda awal sebelum bicara pertama
|
| 66 |
-
else:
|
| 67 |
-
pause_before = start_time - (speech_timestamps[i - 1]['end'] / sampling_rate)
|
| 68 |
-
|
| 69 |
-
# Hitung skor jeda (0 atau 1)
|
| 70 |
-
# Jika jeda <= 3 detik → 1, jika > 3 detik → 0
|
| 71 |
-
skor = 1 if pause_before <= 3.0 else 0
|
| 72 |
-
|
| 73 |
-
total_pause += pause_before
|
| 74 |
-
total_score += skor
|
| 75 |
-
num_pauses += 1
|
| 76 |
-
|
| 77 |
-
data.append({
|
| 78 |
-
'Segmen': i + 1,
|
| 79 |
-
'Mulai (detik)': round(start_time, 2),
|
| 80 |
-
'Selesai (detik)': round(end_time, 2),
|
| 81 |
-
'Durasi Bicara (detik)': round(duration, 2),
|
| 82 |
-
'Jeda Sebelum (detik)': round(pause_before, 2),
|
| 83 |
-
'Skor Jeda': skor
|
| 84 |
-
})
|
| 85 |
-
|
| 86 |
-
# Hitung rata-rata jeda dan skor
|
| 87 |
-
rata_jeda = total_pause / num_pauses if num_pauses > 0 else 0
|
| 88 |
-
rata_skor = total_score / num_pauses if num_pauses > 0 else 0
|
| 89 |
-
|
| 90 |
-
# Tentukan kategori
|
| 91 |
-
if rata_skor >= 0.9:
|
| 92 |
-
kategori = "Sangat Baik"
|
| 93 |
-
poin = 5
|
| 94 |
-
elif rata_skor >= 0.7:
|
| 95 |
-
kategori = "Baik"
|
| 96 |
-
poin = 4
|
| 97 |
-
elif rata_skor >= 0.5:
|
| 98 |
-
kategori = "Cukup"
|
| 99 |
-
poin = 3
|
| 100 |
-
elif rata_skor >= 0.3:
|
| 101 |
-
kategori = "Buruk"
|
| 102 |
-
poin = 2
|
| 103 |
-
else:
|
| 104 |
-
kategori = "Perlu Ditingkatkan"
|
| 105 |
-
poin = 1
|
| 106 |
-
|
| 107 |
-
print("✅ Tempo analysis complete!\n")
|
| 108 |
-
|
| 109 |
-
return {
|
| 110 |
-
'segments': data,
|
| 111 |
-
'total_segments': len(speech_timestamps),
|
| 112 |
-
'rata_rata_jeda': round(rata_jeda, 2),
|
| 113 |
-
'rata_rata_skor': round(rata_skor, 2),
|
| 114 |
-
'kategori': kategori,
|
| 115 |
-
'poin': poin,
|
| 116 |
-
'summary': {
|
| 117 |
-
'score': poin,
|
| 118 |
-
'category': kategori,
|
| 119 |
-
'avg_pause': round(rata_jeda, 2),
|
| 120 |
-
'avg_score': round(rata_skor, 2),
|
| 121 |
-
'total_segments': len(speech_timestamps)
|
| 122 |
-
}
|
| 123 |
-
}
|
| 124 |
-
|
| 125 |
-
def print_report(self, result: Dict):
|
| 126 |
-
"""Print detailed report"""
|
| 127 |
-
df = pd.DataFrame(result['segments'])
|
| 128 |
-
|
| 129 |
-
print("\n" + "="*70)
|
| 130 |
-
print("📊 ANALISIS TEMPO DAN JEDA BICARA")
|
| 131 |
-
print("="*70)
|
| 132 |
-
print(df.to_string(index=False))
|
| 133 |
-
print("\n" + "="*70)
|
| 134 |
-
print(f"Total Segmen Bicara : {result['total_segments']}")
|
| 135 |
-
print(f"Rata-rata Jeda (detik) : {result['rata_rata_jeda']}")
|
| 136 |
-
print(f"Rata-rata Skor Jeda : {result['rata_rata_skor']}/1")
|
| 137 |
-
print(f"Kategori : {result['kategori']}")
|
| 138 |
-
print(f"Poin : {result['poin']}/5")
|
| 139 |
-
print("="*70 + "\n")
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
# ========== DEMO ==========
|
| 143 |
-
|
| 144 |
-
def demo():
|
| 145 |
-
"""Demo function"""
|
| 146 |
-
analyzer = TempoAnalyzer()
|
| 147 |
-
|
| 148 |
-
audio_path = "./bad.wav"
|
| 149 |
-
result = analyzer.analyze_tempo(audio_path)
|
| 150 |
-
analyzer.print_report(result)
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
if __name__ == "__main__":
|
| 154 |
-
demo()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
upload_model_to_hf.py
DELETED
|
@@ -1,175 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Script untuk upload best_model ke Hugging Face Hub
|
| 3 |
-
Run sekali saja untuk upload model
|
| 4 |
-
"""
|
| 5 |
-
|
| 6 |
-
from huggingface_hub import HfApi, create_repo, login
|
| 7 |
-
import os
|
| 8 |
-
|
| 9 |
-
# Konfigurasi
|
| 10 |
-
MODEL_PATH = "./best_model" # Path ke model lokal
|
| 11 |
-
REPO_NAME = "Cyberlace/swara-structure-model" # Nama repository di HF Hub
|
| 12 |
-
|
| 13 |
-
def upload_model():
|
| 14 |
-
"""Upload model ke Hugging Face Hub"""
|
| 15 |
-
|
| 16 |
-
print("=" * 70)
|
| 17 |
-
print("📦 Uploading Structure Model to Hugging Face Hub")
|
| 18 |
-
print("=" * 70)
|
| 19 |
-
|
| 20 |
-
# Step 1: Check if already logged in
|
| 21 |
-
print("\n🔐 Step 1: Checking Hugging Face authentication")
|
| 22 |
-
|
| 23 |
-
from huggingface_hub import HfFolder
|
| 24 |
-
token = HfFolder.get_token()
|
| 25 |
-
|
| 26 |
-
if token is None:
|
| 27 |
-
print("❌ Not logged in!")
|
| 28 |
-
print("\n💡 Please login first:")
|
| 29 |
-
print(" Run: huggingface-cli login")
|
| 30 |
-
return
|
| 31 |
-
|
| 32 |
-
print("✅ Already logged in!")
|
| 33 |
-
|
| 34 |
-
# Step 2: Buat repository (jika belum ada)
|
| 35 |
-
print(f"\n📁 Step 2: Creating repository: {REPO_NAME}")
|
| 36 |
-
try:
|
| 37 |
-
create_repo(
|
| 38 |
-
repo_id=REPO_NAME,
|
| 39 |
-
repo_type="model",
|
| 40 |
-
exist_ok=True # Skip jika sudah ada
|
| 41 |
-
)
|
| 42 |
-
print("✅ Repository ready!")
|
| 43 |
-
except Exception as e:
|
| 44 |
-
print(f"⚠️ Repository might already exist: {e}")
|
| 45 |
-
|
| 46 |
-
# Step 3: Upload semua files di best_model
|
| 47 |
-
print(f"\n📤 Step 3: Uploading model files from {MODEL_PATH}")
|
| 48 |
-
|
| 49 |
-
api = HfApi()
|
| 50 |
-
|
| 51 |
-
# List semua files di best_model
|
| 52 |
-
files_to_upload = []
|
| 53 |
-
for root, dirs, files in os.walk(MODEL_PATH):
|
| 54 |
-
for file in files:
|
| 55 |
-
file_path = os.path.join(root, file)
|
| 56 |
-
# Relative path untuk upload
|
| 57 |
-
path_in_repo = os.path.relpath(file_path, MODEL_PATH)
|
| 58 |
-
files_to_upload.append((file_path, path_in_repo))
|
| 59 |
-
|
| 60 |
-
print(f" Found {len(files_to_upload)} files to upload:")
|
| 61 |
-
for file_path, path_in_repo in files_to_upload:
|
| 62 |
-
file_size = os.path.getsize(file_path) / (1024 * 1024) # MB
|
| 63 |
-
print(f" - {path_in_repo} ({file_size:.2f} MB)")
|
| 64 |
-
|
| 65 |
-
# Upload files
|
| 66 |
-
print("\n⏳ Uploading files...")
|
| 67 |
-
try:
|
| 68 |
-
for file_path, path_in_repo in files_to_upload:
|
| 69 |
-
print(f" Uploading {path_in_repo}...", end=" ")
|
| 70 |
-
api.upload_file(
|
| 71 |
-
path_or_fileobj=file_path,
|
| 72 |
-
path_in_repo=path_in_repo,
|
| 73 |
-
repo_id=REPO_NAME,
|
| 74 |
-
repo_type="model"
|
| 75 |
-
)
|
| 76 |
-
print("✅")
|
| 77 |
-
|
| 78 |
-
print("\n🎉 Upload complete!")
|
| 79 |
-
print(f"📍 Model URL: https://huggingface.co/{REPO_NAME}")
|
| 80 |
-
|
| 81 |
-
except Exception as e:
|
| 82 |
-
print(f"\n❌ Upload failed: {e}")
|
| 83 |
-
return
|
| 84 |
-
|
| 85 |
-
# Step 4: Create README
|
| 86 |
-
print("\n📝 Step 4: Creating README.md")
|
| 87 |
-
readme_content = f"""---
|
| 88 |
-
language:
|
| 89 |
-
- id
|
| 90 |
-
license: apache-2.0
|
| 91 |
-
tags:
|
| 92 |
-
- text-classification
|
| 93 |
-
- indonesian
|
| 94 |
-
- speech-structure
|
| 95 |
-
- bert
|
| 96 |
-
datasets:
|
| 97 |
-
- custom
|
| 98 |
-
---
|
| 99 |
-
|
| 100 |
-
# Swara Structure Analysis Model
|
| 101 |
-
|
| 102 |
-
BERT model untuk analisis struktur berbicara (opening, content, closing) dalam Bahasa Indonesia.
|
| 103 |
-
|
| 104 |
-
## Model Description
|
| 105 |
-
|
| 106 |
-
Model ini dilatih untuk mengklasifikasikan kalimat dalam pidato/presentasi menjadi 3 kategori:
|
| 107 |
-
- **Opening**: Pembukaan (salam, perkenalan, pengantar)
|
| 108 |
-
- **Content**: Isi utama (poin-poin, argumen, penjelasan)
|
| 109 |
-
- **Closing**: Penutup (kesimpulan, ucapan terima kasih)
|
| 110 |
-
|
| 111 |
-
## Usage
|
| 112 |
-
|
| 113 |
-
```python
|
| 114 |
-
from transformers import BertTokenizer, BertForSequenceClassification
|
| 115 |
-
import torch
|
| 116 |
-
|
| 117 |
-
# Load model
|
| 118 |
-
model_name = "{REPO_NAME}"
|
| 119 |
-
tokenizer = BertTokenizer.from_pretrained(model_name)
|
| 120 |
-
model = BertForSequenceClassification.from_pretrained(model_name)
|
| 121 |
-
|
| 122 |
-
# Predict
|
| 123 |
-
text = "Selamat pagi hadirin sekalian"
|
| 124 |
-
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
|
| 125 |
-
|
| 126 |
-
with torch.no_grad():
|
| 127 |
-
outputs = model(**inputs)
|
| 128 |
-
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
| 129 |
-
predicted_class = torch.argmax(probs, dim=1).item()
|
| 130 |
-
|
| 131 |
-
labels = {{0: "opening", 1: "content", 2: "closing"}}
|
| 132 |
-
print(f"Predicted: {{labels[predicted_class]}}")
|
| 133 |
-
```
|
| 134 |
-
|
| 135 |
-
## Training Data
|
| 136 |
-
|
| 137 |
-
Model dilatih dengan dataset pidato dan presentasi dalam Bahasa Indonesia.
|
| 138 |
-
|
| 139 |
-
## Intended Use
|
| 140 |
-
|
| 141 |
-
Model ini digunakan dalam sistem analisis public speaking untuk:
|
| 142 |
-
- Evaluasi struktur presentasi
|
| 143 |
-
- Feedback otomatis untuk pembicara
|
| 144 |
-
- Training public speaking
|
| 145 |
-
"""
|
| 146 |
-
|
| 147 |
-
try:
|
| 148 |
-
api.upload_file(
|
| 149 |
-
path_or_fileobj=readme_content.encode('utf-8'),
|
| 150 |
-
path_in_repo="README.md",
|
| 151 |
-
repo_id=REPO_NAME,
|
| 152 |
-
repo_type="model"
|
| 153 |
-
)
|
| 154 |
-
print("✅ README created!")
|
| 155 |
-
except Exception as e:
|
| 156 |
-
print(f"⚠️ README creation failed: {e}")
|
| 157 |
-
|
| 158 |
-
print("\n" + "=" * 70)
|
| 159 |
-
print("✅ ALL DONE!")
|
| 160 |
-
print("=" * 70)
|
| 161 |
-
print(f"\n📍 Model Repository: https://huggingface.co/{REPO_NAME}")
|
| 162 |
-
print("\n💡 Next steps:")
|
| 163 |
-
print(" 1. Update app/services/structure.py to use this model")
|
| 164 |
-
print(" 2. Remove best_model/ from your Space repository")
|
| 165 |
-
print(" 3. Deploy and test")
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
if __name__ == "__main__":
|
| 169 |
-
# Check if best_model exists
|
| 170 |
-
if not os.path.exists(MODEL_PATH):
|
| 171 |
-
print(f"❌ Error: Model path not found: {MODEL_PATH}")
|
| 172 |
-
print(" Please make sure best_model/ directory exists")
|
| 173 |
-
exit(1)
|
| 174 |
-
|
| 175 |
-
upload_model()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|