Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

Building a Validation SDK

This guide walks through building a Python SDK that validates a CML AMP project against the Validation Rules Reference. The SDK can be used standalone or integrated into a CI/CD pipeline.

Data Model

from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path


class Severity(Enum):
    ERROR = "error"
    WARNING = "warning"


@dataclass
class ValidationIssue:
    rule: str              # e.g., "S-001"
    severity: Severity
    message: str           # human-readable description
    path: str | None = None  # file path that triggered the issue


@dataclass
class ValidationResult:
    issues: list[ValidationIssue] = field(default_factory=list)

    @property
    def passed(self) -> bool:
        return not any(i.severity == Severity.ERROR for i in self.issues)

    @property
    def errors(self) -> list[ValidationIssue]:
        return [i for i in self.issues if i.severity == Severity.ERROR]

    @property
    def warnings(self) -> list[ValidationIssue]:
        return [i for i in self.issues if i.severity == Severity.WARNING]

Validation Pipeline

The SDK runs five validation steps in sequence. Each step appends issues to the shared ValidationResult.

Step 1: Check Repository Structure (S-rules)

def validate_structure(root: Path, result: ValidationResult) -> None:
    required_files = {
        "S-001": ".project-metadata.yaml",
        "S-003": "requirements.txt",
        "S-004": "scripts/predict_fraud.py",
        "S-006": "cdsw-build.sh",
        "S-007": "utils/dask_utils.py",
        "S-008": "setup.py",
    }
    for rule, path in required_files.items():
        if not (root / path).exists():
            result.issues.append(ValidationIssue(
                rule=rule,
                severity=Severity.ERROR,
                message=f"Required file missing: {path}",
                path=path,
            ))

Rule S-005 (model/best-xgboost-model) should only be checked post-training. Pass a flag to control this:

    if check_model and not (root / "model" / "best-xgboost-model").exists():
        result.issues.append(ValidationIssue(
            rule="S-005",
            severity=Severity.ERROR,
            message="Trained model missing: model/best-xgboost-model",
            path="model/best-xgboost-model",
        ))

Step 2: Parse AMP Configuration (A-rules)

import yaml

def validate_amp_config(root: Path, result: ValidationResult) -> None:
    config_path = root / ".project-metadata.yaml"
    try:
        config = yaml.safe_load(config_path.read_text())
    except yaml.YAMLError:
        result.issues.append(ValidationIssue(
            rule="S-002", severity=Severity.ERROR,
            message=".project-metadata.yaml is not valid YAML",
            path=".project-metadata.yaml",
        ))
        return

    # A-001: runtimes must exist
    runtimes = config.get("runtimes", [])
    if not runtimes:
        result.issues.append(ValidationIssue(
            rule="A-001", severity=Severity.ERROR,
            message="No runtimes defined in .project-metadata.yaml",
        ))
        return

    runtime = runtimes[0]

    # A-002: Python 3.9+
    kernel = runtime.get("kernel", "")
    if "Python" not in kernel:
        result.issues.append(ValidationIssue(
            rule="A-002", severity=Severity.ERROR,
            message=f"Runtime kernel must be Python 3.9+, got: {kernel}",
        ))
    else:
        try:
            version = float(kernel.split("Python")[1].strip())
            if version < 3.9:
                result.issues.append(ValidationIssue(
                    rule="A-002", severity=Severity.ERROR,
                    message=f"Python version must be >= 3.9, got: {version}",
                ))
        except (ValueError, IndexError):
            pass

    # A-003: JupyterLab
    if runtime.get("editor") != "JupyterLab":
        result.issues.append(ValidationIssue(
            rule="A-003", severity=Severity.ERROR,
            message=f"Runtime editor must be JupyterLab, got: {runtime.get('editor')}",
        ))

Step 3: Validate Dependencies (D-rules)

def validate_dependencies(root: Path, result: ValidationResult) -> None:
    req_path = root / "requirements.txt"
    if not req_path.exists():
        return  # already caught by S-003

    content = req_path.read_text().lower()
    checks = {
        "D-001": "xgboost",
        "D-002": "dask",
        "D-003": "scikit-learn",
        "D-005": "numpy",
    }
    for rule, pkg in checks.items():
        if pkg not in content:
            result.issues.append(ValidationIssue(
                rule=rule, severity=Severity.ERROR,
                message=f"Required package missing from requirements.txt: {pkg}",
                path="requirements.txt",
            ))

    if "-e ." not in content and "-e." not in content:
        result.issues.append(ValidationIssue(
            rule="D-004", severity=Severity.ERROR,
            message="Local package install (-e .) missing from requirements.txt",
            path="requirements.txt",
        ))

Step 4: Validate Endpoint Contract (E-rules)

import ast

def validate_endpoint(root: Path, result: ValidationResult) -> None:
    script_path = root / "scripts" / "predict_fraud.py"
    if not script_path.exists():
        return  # already caught by S-004

    source = script_path.read_text()

    # E-001: valid Python
    try:
        tree = ast.parse(source)
    except SyntaxError as e:
        result.issues.append(ValidationIssue(
            rule="E-001", severity=Severity.ERROR,
            message=f"Syntax error in predict_fraud.py: {e}",
            path="scripts/predict_fraud.py",
        ))
        return

    # E-002 & E-003: function exists with correct signature
    func = None
    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef) and node.name == "predict_fraud":
            func = node
            break

    if func is None:
        result.issues.append(ValidationIssue(
            rule="E-002", severity=Severity.ERROR,
            message="Function 'predict_fraud' not found in predict_fraud.py",
            path="scripts/predict_fraud.py",
        ))
        return

    args = func.args
    total_args = len(args.args) - len(args.defaults)  # positional-only
    if len(args.args) != 1:
        result.issues.append(ValidationIssue(
            rule="E-003", severity=Severity.ERROR,
            message=f"predict_fraud must accept exactly 1 parameter, found {len(args.args)}",
            path="scripts/predict_fraud.py",
        ))

    # E-004 & E-005: check for model path and threshold in source
    if "best-xgboost-model" not in source:
        result.issues.append(ValidationIssue(
            rule="E-004", severity=Severity.ERROR,
            message="Model path '/home/cdsw/model/best-xgboost-model' not found",
            path="scripts/predict_fraud.py",
        ))

    if "threshold" not in source:
        result.issues.append(ValidationIssue(
            rule="E-005", severity=Severity.ERROR,
            message="'threshold' variable not found in predict_fraud.py",
            path="scripts/predict_fraud.py",
        ))

Step 5: Validate Cluster Utilities (C-rules)

def validate_cluster_utils(root: Path, result: ValidationResult) -> None:
    utils_path = root / "utils" / "dask_utils.py"
    if not utils_path.exists():
        return  # already caught by S-007

    source = utils_path.read_text()

    try:
        tree = ast.parse(source)
    except SyntaxError:
        return

    func_names = {
        node.name for node in ast.walk(tree)
        if isinstance(node, ast.FunctionDef)
    }

    if "run_dask_cluster" not in func_names:
        result.issues.append(ValidationIssue(
            rule="C-W01", severity=Severity.WARNING,
            message="Function 'run_dask_cluster' not found in dask_utils.py",
            path="utils/dask_utils.py",
        ))

    if "8786" not in source:
        result.issues.append(ValidationIssue(
            rule="C-W04", severity=Severity.WARNING,
            message="Scheduler port 8786 not referenced in dask_utils.py",
            path="utils/dask_utils.py",
        ))

Running the SDK

from pathlib import Path

def validate(project_root: str, check_model: bool = False) -> ValidationResult:
    root = Path(project_root)
    result = ValidationResult()

    validate_structure(root, result)
    validate_amp_config(root, result)
    validate_dependencies(root, result)
    validate_endpoint(root, result)
    validate_cluster_utils(root, result)

    return result


if __name__ == "__main__":
    result = validate(".")
    for issue in result.issues:
        print(f"[{issue.severity.value.upper()}] {issue.rule}: {issue.message}")
    if result.passed:
        print("\nValidation passed.")
    else:
        print(f"\nValidation failed with {len(result.errors)} error(s).")
        raise SystemExit(1)

Optional: Model Smoke Test

If the trained model is available, you can add a smoke test that loads it and runs inference with sample data:

def smoke_test_model(root: Path, result: ValidationResult) -> None:
    import numpy as np
    import xgboost as xgb

    model_path = root / "model" / "best-xgboost-model"
    booster = xgb.Booster(model_file=str(model_path))

    sample = np.array([[-1.35980713, -0.0727811733, 2.53634674, 1.37815522,
        -0.33832077, 0.462387778, 0.239598554, 0.0986979013,
        0.36378697, 0.090794172, -0.551599533, -0.617800856,
        -0.991389847, -0.311169354, 1.46817697, -0.470400525,
        0.207971242, 0.0257905802, 0.40399296, 0.251412098,
        -0.0183067779, 0.277837576, -0.11047391, 0.0669280749,
        0.128539358, -0.189114844, 0.133558377, -0.0210530535,
        149.62]])

    prediction = booster.inplace_predict(sample)
    assert 0.0 <= prediction[0] <= 1.0, f"Prediction out of range: {prediction[0]}"
    binary = 0 if prediction[0] <= 0.35 else 1
    assert binary in (0, 1)