Source code for torch_sla.backends

"""
Backend management for torch-sla

This module provides a unified interface for different sparse linear algebra backends:

Backends:
- 'scipy': SciPy backend (CPU only) - Uses SuperLU for direct solvers
- 'eigen': Eigen backend (CPU only) - Iterative solvers (CG, BiCGStab)
- 'pytorch': PyTorch-native (CPU & CUDA) - Iterative solvers with Jacobi preconditioning
- 'cusolver': NVIDIA cuSOLVER (CUDA only) - Direct solvers (not recommended, use cudss)
- 'cudss': NVIDIA cuDSS (CUDA only) - Direct solvers (LU, Cholesky, LDLT)

Methods (solver algorithms):
- 'superlu': SuperLU direct solver (scipy backend)
- 'umfpack': UMFPACK direct solver (scipy backend, requires scikit-umfpack)
- 'cg': Conjugate Gradient (iterative, for SPD matrices)
- 'bicgstab': BiCGStab (iterative, general matrices)
- 'gmres': GMRES (iterative, general matrices)
- 'qr': QR decomposition (direct)
- 'lu': LU decomposition (direct)
- 'cholesky': Cholesky decomposition (direct, SPD matrices)
- 'ldlt': LDLT decomposition (direct, symmetric matrices)

Recommended Backends (based on benchmarks):
==========================================

float64 (double precision):
---------------------------
- CPU, DOF < 100K: scipy+superlu (best balance of speed and accuracy)
- CPU, DOF >= 100K: scipy+superlu (still fast, machine precision ~1e-15)
- CUDA, DOF < 100K: cudss+cholesky (for SPD) or cudss+lu (general)
- CUDA, DOF >= 100K: cudss+cholesky (direct, ~1e-14 precision)
- CUDA, DOF > 2M: pytorch+cg (iterative, ~1e-6 precision, memory efficient)

float32 (single precision):
---------------------------
- CPU: scipy+superlu (precision ~1e-6)
- CUDA: cudss+cholesky/ldlt (precision ~1e-6)
- Note: cuSOLVER does NOT support float32!
- Note: Iterative methods may not converge well with float32

Usage:
    # Auto-select backend based on device and problem size
    x = spsolve(A, b)  # Uses scipy for CPU, cudss for CUDA (small), pytorch for CUDA (large)
    
    # Specify backend and method
    x = spsolve(A, b, backend='scipy', method='superlu')
    x = spsolve(A, b, backend='cudss', method='cholesky')
    x = spsolve(A, b, backend='pytorch', method='cg')  # GPU iterative
"""

from typing import Optional, List, Dict, Literal
import torch

# Type aliases
BackendType = Literal['scipy', 'eigen', 'pytorch', 'cusolver', 'cudss', 'auto']
MethodType = Literal[
    'auto',
    # Direct methods
    'superlu', 'umfpack', 'lu', 'qr', 'cholesky', 'ldlt',
    # Iterative methods
    'cg', 'bicgstab', 'gmres', 'lgmres', 'minres', 'qmr'
]

# Backend -> supported methods mapping
BACKEND_METHODS: Dict[str, List[str]] = {
    'scipy': ['superlu', 'umfpack', 'cg', 'bicgstab', 'gmres', 'lgmres', 'minres', 'qmr'],
    'eigen': ['cg', 'bicgstab'],
    'pytorch': ['cg', 'bicgstab'],  # PyTorch-native iterative with Jacobi preconditioning
    'cusolver': ['qr', 'cholesky', 'lu'],  # Note: does not support float32!
    'cudss': ['lu', 'cholesky', 'ldlt'],
}

# Default methods for each backend (based on benchmarks)
DEFAULT_METHODS: Dict[str, str] = {
    'scipy': 'superlu',      # Best for CPU: fast + machine precision
    'eigen': 'bicgstab',
    'pytorch': 'cg',         # Use CG for SPD (most common), with Jacobi preconditioning
    'cusolver': 'lu',        # Not recommended, use cudss instead
    'cudss': 'cholesky',     # Best for CUDA: fastest + high precision
}

# Threshold for switching from direct to iterative on CUDA (DOF)
# Based on benchmark: direct solvers (cudss) work well up to ~2M DOF
CUDA_ITERATIVE_THRESHOLD = 2_000_000

# Backend availability flags
_scipy_available: Optional[bool] = None
_cusolver_available: Optional[bool] = None
_cudss_available: Optional[bool] = None
_eigen_available: Optional[bool] = None


def _check_cuda() -> bool:
    """Check if CUDA is available"""
    return torch.cuda.is_available()


[docs] def is_scipy_available() -> bool: """Check if SciPy backend is available""" global _scipy_available if _scipy_available is None: try: import scipy.sparse.linalg _scipy_available = True except ImportError: _scipy_available = False return _scipy_available
[docs] def is_eigen_available() -> bool: """Check if Eigen backend (C++ extension) is available""" global _eigen_available if _eigen_available is None: try: _load_eigen_backend() _eigen_available = True except Exception: _eigen_available = False return _eigen_available
def is_pytorch_available() -> bool: """Check if PyTorch-native backend is available (always True)""" return True
[docs] def is_cusolver_available() -> bool: """Check if cuSOLVER backend is available""" global _cusolver_available if _cusolver_available is None: if not _check_cuda(): _cusolver_available = False else: try: _load_cusolver_backend() _cusolver_available = True except Exception: _cusolver_available = False return _cusolver_available
[docs] def is_cudss_available() -> bool: """Check if cuDSS backend is available""" global _cudss_available if _cudss_available is None: if not _check_cuda(): _cudss_available = False else: try: _load_cudss_backend() _cudss_available = True except Exception: _cudss_available = False return _cudss_available
[docs] def get_available_backends() -> List[str]: """Get list of available backends""" backends = [] if is_scipy_available(): backends.append('scipy') if is_eigen_available(): backends.append('eigen') backends.append('pytorch') # Always available if is_cusolver_available(): backends.append('cusolver') if is_cudss_available(): backends.append('cudss') return backends
[docs] def get_backend_methods(backend: str) -> List[str]: """Get list of methods supported by a backend""" return BACKEND_METHODS.get(backend, [])
[docs] def get_default_method(backend: str) -> str: """Get default method for a backend""" return DEFAULT_METHODS.get(backend, 'auto')
[docs] def select_backend( device: torch.device, n: Optional[int] = None, dtype: Optional[torch.dtype] = None, prefer_direct: bool = True ) -> str: """ Auto-select the best backend based on device, problem size, and dtype. Recommendations based on benchmark results: - CPU: scipy+superlu (all sizes, fast + machine precision) - CUDA (DOF < 2M): cudss+cholesky (fast + high precision) - CUDA (DOF >= 2M): pytorch+cg (memory efficient, ~1e-6 precision) Parameters ---------- device : torch.device Target device (cpu or cuda) n : int, optional Problem size (DOF). If > CUDA_ITERATIVE_THRESHOLD, prefer iterative. dtype : torch.dtype, optional Data type. Note: cuSOLVER does not support float32! prefer_direct : bool If True, prefer direct solvers over iterative (when applicable) Returns ------- str Backend name ('scipy', 'eigen', 'pytorch', 'cusolver', or 'cudss') """ if device.type == 'cpu': # CPU: scipy is best (SuperLU: fast + machine precision) if is_scipy_available(): return 'scipy' if is_eigen_available(): return 'eigen' return 'pytorch' # Fallback to PyTorch-native elif device.type == 'cuda': # Check dtype - cuSOLVER does NOT support float32! is_float32 = dtype == torch.float32 if dtype is not None else False # Large problem: use iterative (PyTorch-native on GPU with Jacobi preconditioning) if n is not None and n > CUDA_ITERATIVE_THRESHOLD: return 'pytorch' # Small/medium problem: prefer direct solvers if prefer_direct: # cuDSS is best for CUDA (supports both float32 and float64) if is_cudss_available(): return 'cudss' # cuSOLVER only for float64 if is_cusolver_available() and not is_float32: return 'cusolver' # Fallback to iterative return 'pytorch' else: raise ValueError(f"Unsupported device type: {device.type}")
[docs] def select_method( backend: str, is_symmetric: bool = False, is_spd: bool = False, prefer_direct: bool = True ) -> str: """ Auto-select the best method for a given backend and matrix properties. Recommendations based on benchmark results: - scipy: superlu (direct, best precision) or cg (iterative, for SPD) - cudss: cholesky (SPD, fastest) > ldlt (symmetric) > lu (general) - pytorch: cg (SPD) or bicgstab (general), both with Jacobi preconditioning Parameters ---------- backend : str Backend name is_symmetric : bool Whether the matrix is symmetric is_spd : bool Whether the matrix is symmetric positive definite prefer_direct : bool If True, prefer direct solvers Returns ------- str Method name """ methods = BACKEND_METHODS.get(backend, []) if backend == 'scipy': if prefer_direct: return 'superlu' # Best: fast + machine precision elif is_spd: return 'cg' else: return 'bicgstab' elif backend == 'eigen': return 'cg' if is_spd else 'bicgstab' elif backend == 'pytorch': # Iterative with Jacobi preconditioning return 'cg' if is_spd else 'bicgstab' elif backend == 'cusolver': # Note: cuSOLVER does not support float32! if is_spd and 'cholesky' in methods: return 'cholesky' return 'lu' if 'lu' in methods else 'qr' elif backend == 'cudss': # Recommendation: cholesky > ldlt > lu (based on benchmarks) if is_spd and 'cholesky' in methods: return 'cholesky' # Fastest for SPD elif is_symmetric and 'ldlt' in methods: return 'ldlt' return 'lu' return DEFAULT_METHODS.get(backend, methods[0] if methods else 'auto')
def _load_eigen_backend(): """Load Eigen backend (C++ iterative solvers)""" global _eigen_module if _eigen_module is not None: return _eigen_module import os from torch.utils.cpp_extension import load try: _eigen_module = load( name="spsolve", sources=[os.path.abspath(os.path.join( os.path.dirname(__file__), "..", "..", "csrc", "spsolve", "spsolve.cpp" ))], verbose=False ) return _eigen_module except Exception as e: raise RuntimeError(f"Failed to load Eigen backend: {e}") def _load_cusolver_backend(): """Load cuSOLVER backend""" global _cusolver_module if _cusolver_module is not None: return _cusolver_module if not _check_cuda(): raise RuntimeError("CUDA is not available") import os from torch.utils.cpp_extension import load try: _cusolver_module = load( name="cusolver_spsolve", sources=[os.path.abspath(os.path.join( os.path.dirname(__file__), "..", "..", "csrc", "cusolver", "cusolver_spsolve.cu" ))], extra_ldflags=['-lcusolver', '-lcusparse'], verbose=False ) return _cusolver_module except Exception as e: raise RuntimeError(f"Failed to load cuSOLVER backend: {e}") def _find_cudss_paths(): """Find cuDSS include and library paths from pip package""" import site import os # Search paths search_paths = site.getsitepackages() + [site.getusersitepackages()] for base in search_paths: # nvidia-cudss-cu12 installs to nvidia/cu12/ include_path = os.path.join(base, 'nvidia', 'cu12', 'include') lib_path = os.path.join(base, 'nvidia', 'cu12', 'lib') if os.path.exists(os.path.join(include_path, 'cudss.h')): return include_path, lib_path return None, None def _load_cudss_backend(): """Load cuDSS backend""" global _cudss_module if _cudss_module is not None: return _cudss_module if not _check_cuda(): raise RuntimeError("CUDA is not available") import os from torch.utils.cpp_extension import load # Find cuDSS paths cudss_include, cudss_lib = _find_cudss_paths() if cudss_include is None: raise RuntimeError("cuDSS not found. Install with: pip install nvidia-cudss-cu12") try: _cudss_module = load( name="cudss_spsolve", sources=[os.path.abspath(os.path.join( os.path.dirname(__file__), "..", "..", "csrc", "cudss", "cudss_spsolve.cu" ))], extra_include_paths=[cudss_include], extra_ldflags=[f'-L{cudss_lib}', '-lcudss', '-lcusparse', f'-Wl,-rpath,{cudss_lib}'], verbose=False ) return _cudss_module except Exception as e: raise RuntimeError(f"Failed to load cuDSS backend: {e}") # Lazy-loaded modules _eigen_module = None _cusolver_module = None _cudss_module = None # Convenience functions for getting modules def get_cpu_module(): """Get Eigen backend module (legacy name for compatibility)""" return get_eigen_module() def get_eigen_module(): """Get Eigen backend module""" return _load_eigen_backend() def get_cusolver_module(): """Get cuSOLVER backend module""" return _load_cusolver_backend() def get_cudss_module(): """Get cuDSS backend module""" return _load_cudss_backend()