Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Test script to verify environment variables and cache directory permissions. | |
| This should be run before the main application to ensure everything is set up correctly. | |
| """ | |
| import os | |
| import tempfile | |
| import sys | |
| def test_environment_setup(): | |
| """Test that environment variables are set correctly.""" | |
| print("=" * 60) | |
| print("Testing Environment Setup") | |
| print("=" * 60) | |
| # Check critical environment variables | |
| critical_vars = [ | |
| 'HF_HOME', | |
| 'HF_CACHE_HOME', | |
| 'HF_HUB_CACHE', | |
| 'TRANSFORMERS_CACHE', | |
| 'HF_DATASETS_CACHE', | |
| 'TEMP_DIR', | |
| 'HOME', | |
| 'TMPDIR' | |
| ] | |
| all_good = True | |
| for var in critical_vars: | |
| value = os.environ.get(var) | |
| if value: | |
| print(f"β {var}: {value}") | |
| else: | |
| print(f"β {var}: NOT SET") | |
| all_good = False | |
| return all_good | |
| def test_cache_directories(): | |
| """Test that cache directories can be created and accessed.""" | |
| print("\n" + "=" * 60) | |
| print("Testing Cache Directory Access") | |
| print("=" * 60) | |
| cache_dirs = [ | |
| os.environ.get('HF_HOME', '/tmp/docling_temp/huggingface'), | |
| os.environ.get('HF_CACHE_HOME', '/tmp/docling_temp/huggingface_cache'), | |
| os.environ.get('HF_HUB_CACHE', '/tmp/docling_temp/huggingface_cache'), | |
| os.environ.get('TRANSFORMERS_CACHE', '/tmp/docling_temp/transformers_cache'), | |
| os.environ.get('HF_DATASETS_CACHE', '/tmp/docling_temp/datasets_cache'), | |
| os.environ.get('TORCH_HOME', '/tmp/docling_temp/torch'), | |
| os.environ.get('TENSORFLOW_HOME', '/tmp/docling_temp/tensorflow'), | |
| os.environ.get('KERAS_HOME', '/tmp/docling_temp/keras'), | |
| ] | |
| all_good = True | |
| for cache_dir in cache_dirs: | |
| try: | |
| os.makedirs(cache_dir, exist_ok=True) | |
| # Test writing a file | |
| test_file = os.path.join(cache_dir, 'test_write.txt') | |
| with open(test_file, 'w') as f: | |
| f.write('test') | |
| os.remove(test_file) | |
| print(f"β {cache_dir}: WRITABLE") | |
| except Exception as e: | |
| print(f"β {cache_dir}: ERROR - {e}") | |
| all_good = False | |
| return all_good | |
| def test_root_filesystem_access(): | |
| """Test that we cannot access critical root filesystem paths.""" | |
| print("\n" + "=" * 60) | |
| print("Testing Critical Root Filesystem Access Prevention") | |
| print("=" * 60) | |
| # Only test critical paths that the application might try to access | |
| critical_root_paths = [ | |
| '/.cache', # This is the main one that causes issues | |
| '/.config', | |
| '/.local', | |
| '/.huggingface', | |
| '/.cache/huggingface', | |
| '/.cache/transformers', | |
| ] | |
| all_good = True | |
| for path in critical_root_paths: | |
| try: | |
| os.makedirs(path, exist_ok=True) | |
| print(f"β {path}: SUCCESSFULLY CREATED (SHOULD FAIL)") | |
| all_good = False | |
| except PermissionError: | |
| print(f"β {path}: PERMISSION DENIED (GOOD)") | |
| except Exception as e: | |
| print(f"β οΈ {path}: OTHER ERROR - {e}") | |
| # Test that we can access our temp directory (this is what matters) | |
| print(f"\nTesting temp directory access: {os.environ.get('TEMP_DIR', '/tmp/docling_temp')}") | |
| try: | |
| temp_test_file = os.path.join(os.environ.get('TEMP_DIR', '/tmp/docling_temp'), 'test_access.txt') | |
| with open(temp_test_file, 'w') as f: | |
| f.write('test') | |
| os.remove(temp_test_file) | |
| print(f"β Temp directory is writable (CRITICAL)") | |
| except Exception as e: | |
| print(f"β Temp directory not writable: {e}") | |
| all_good = False | |
| return all_good | |
| def test_temp_directory(): | |
| """Test temp directory access.""" | |
| print("\n" + "=" * 60) | |
| print("Testing Temp Directory Access") | |
| print("=" * 60) | |
| temp_dir = os.environ.get('TEMP_DIR', '/tmp/docling_temp') | |
| try: | |
| os.makedirs(temp_dir, exist_ok=True) | |
| test_file = os.path.join(temp_dir, 'test_temp.txt') | |
| with open(test_file, 'w') as f: | |
| f.write('temp test') | |
| os.remove(test_file) | |
| print(f"β {temp_dir}: WRITABLE") | |
| return True | |
| except Exception as e: | |
| print(f"β {temp_dir}: ERROR - {e}") | |
| return False | |
| def main(): | |
| """Run all tests.""" | |
| print("Docling Environment and Permission Test") | |
| print("This script tests that the environment is set up correctly for Hugging Face Spaces") | |
| # Set environment variables if not already set | |
| if not os.environ.get('TEMP_DIR'): | |
| temp_dir = os.path.join(tempfile.gettempdir(), "docling_temp") | |
| os.environ.update({ | |
| 'TEMP_DIR': temp_dir, | |
| 'HOME': temp_dir, | |
| 'USERPROFILE': temp_dir, | |
| 'TMPDIR': temp_dir, | |
| 'TEMP': temp_dir, | |
| 'TMP': temp_dir, | |
| 'HF_HOME': os.path.join(temp_dir, 'huggingface'), | |
| 'HF_CACHE_HOME': os.path.join(temp_dir, 'huggingface_cache'), | |
| 'HF_HUB_CACHE': os.path.join(temp_dir, 'huggingface_cache'), | |
| 'TRANSFORMERS_CACHE': os.path.join(temp_dir, 'transformers_cache'), | |
| 'HF_DATASETS_CACHE': os.path.join(temp_dir, 'datasets_cache'), | |
| 'DIFFUSERS_CACHE': os.path.join(temp_dir, 'diffusers_cache'), | |
| 'ACCELERATE_CACHE': os.path.join(temp_dir, 'accelerate_cache'), | |
| 'TORCH_HOME': os.path.join(temp_dir, 'torch'), | |
| 'TENSORFLOW_HOME': os.path.join(temp_dir, 'tensorflow'), | |
| 'KERAS_HOME': os.path.join(temp_dir, 'keras'), | |
| 'XDG_CACHE_HOME': os.path.join(temp_dir, 'cache'), | |
| 'XDG_CONFIG_HOME': os.path.join(temp_dir, 'config'), | |
| 'XDG_DATA_HOME': os.path.join(temp_dir, 'data'), | |
| }) | |
| # Run tests | |
| env_ok = test_environment_setup() | |
| cache_ok = test_cache_directories() | |
| temp_ok = test_temp_directory() | |
| # Only test critical root paths, not all root access | |
| root_ok = test_root_filesystem_access() | |
| # Summary - focus on what's critical for the application | |
| print("\n" + "=" * 60) | |
| print("TEST SUMMARY") | |
| print("=" * 60) | |
| print(f"Environment Variables: {'β PASS' if env_ok else 'β FAIL'}") | |
| print(f"Cache Directories: {'β PASS' if cache_ok else 'β FAIL'}") | |
| print(f"Temp Directory: {'β PASS' if temp_ok else 'β FAIL'}") | |
| print(f"Critical Root Access Prevention: {'β PASS' if root_ok else 'β FAIL'}") | |
| # The application will work if cache directories and temp directory are working | |
| critical_success = env_ok and cache_ok and temp_ok | |
| overall_success = critical_success and root_ok | |
| print(f"\nCritical for Application: {'β PASS' if critical_success else 'β FAIL'}") | |
| print(f"Overall Result: {'β ALL TESTS PASSED' if overall_success else 'β οΈ SOME TESTS FAILED'}") | |
| # Exit with success if critical tests pass, even if root access test fails | |
| if critical_success: | |
| print("\nπ Critical tests passed! The environment is ready for Docling.") | |
| print("Note: Some root access tests failed, but this doesn't affect the application.") | |
| sys.exit(0) | |
| else: | |
| print("\nβ Critical tests failed. Please check the environment setup.") | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() |