Skip to content

Serialization

Overview

Strands Evals provides JSON serialization for experiments and reports, enabling you to save, load, version, and share evaluation work.

Saving Experiments

from strands_evals import Experiment

# Save to file
experiment.to_file("my_experiment.json")
experiment.to_file("my_experiment")  # .json added automatically

# Relative path
experiment.to_file("experiments/baseline.json")

# Absolute path
experiment.to_file("/path/to/experiments/baseline.json")

Loading Experiments

# Load from file
experiment = Experiment.from_file("my_experiment.json")

print(f"Loaded {len(experiment.cases)} cases")
print(f"Evaluators: {[e.get_type_name() for e in experiment.evaluators]}")

Custom Evaluators

Pass custom evaluator classes when loading:

from strands_evals.evaluators import Evaluator

class CustomEvaluator(Evaluator):
    def evaluate(self, evaluation_case):
        # Custom logic
        return EvaluationOutput(score=1.0, test_pass=True, reason="...")

# Save with custom evaluator
experiment = Experiment(
    cases=cases,
    evaluators=[CustomEvaluator()]
)
experiment.to_file("custom.json")

# Load with custom evaluator class
loaded = Experiment.from_file(
    "custom.json",
    custom_evaluators=[CustomEvaluator]
)

Dictionary Conversion

# To dictionary
experiment_dict = experiment.to_dict()

# From dictionary
experiment = Experiment.from_dict(experiment_dict)

# With custom evaluators
experiment = Experiment.from_dict(
    experiment_dict,
    custom_evaluators=[CustomEvaluator]
)

Saving Reports

import json

# Run evaluation
reports = experiment.run_evaluations(task_function)

# Save reports
for i, report in enumerate(reports):
    report_data = {
        "evaluator": experiment.evaluators[i].get_type_name(),
        "overall_score": report.overall_score,
        "scores": report.scores,
        "test_passes": report.test_passes,
        "reasons": report.reasons
    }

    with open(f"report_{i}.json", "w") as f:
        json.dump(report_data, f, indent=2)

Versioning Strategies

Timestamp Versioning

from datetime import datetime

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
experiment.to_file(f"experiment_{timestamp}.json")

Semantic Versioning

experiment.to_file("experiment_v1.json")
experiment.to_file("experiment_v2.json")

Organizing Files

Directory Structure

experiments/
├── baseline/
│   ├── experiment.json
│   └── reports/
├── iteration_1/
│   ├── experiment.json
│   └── reports/
└── final/
    ├── experiment.json
    └── reports/

Organized Saving

from pathlib import Path

base_dir = Path("experiments/iteration_1")
base_dir.mkdir(parents=True, exist_ok=True)

# Save experiment
experiment.to_file(base_dir / "experiment.json")

# Save reports
reports_dir = base_dir / "reports"
reports_dir.mkdir(exist_ok=True)

Saving Experiments with Reports

from pathlib import Path
import json

def save_with_reports(experiment, reports, base_name):
    base_path = Path(f"evaluations/{base_name}")
    base_path.mkdir(parents=True, exist_ok=True)

    # Save experiment
    experiment.to_file(base_path / "experiment.json")

    # Save reports
    for i, report in enumerate(reports):
        evaluator_name = experiment.evaluators[i].get_type_name()
        report_data = {
            "evaluator": evaluator_name,
            "overall_score": report.overall_score,
            "pass_rate": sum(report.test_passes) / len(report.test_passes),
            "scores": report.scores
        }

        with open(base_path / f"report_{evaluator_name}.json", "w") as f:
            json.dump(report_data, f, indent=2)

# Usage
reports = experiment.run_evaluations(task_function)
save_with_reports(experiment, reports, "baseline_20250115")

Error Handling

from pathlib import Path

def safe_load(path, custom_evaluators=None):
    try:
        file_path = Path(path)

        if not file_path.exists():
            raise FileNotFoundError(f"File not found: {path}")

        if file_path.suffix != ".json":
            raise ValueError(f"Expected .json file, got: {file_path.suffix}")

        experiment = Experiment.from_file(path, custom_evaluators=custom_evaluators)
        print(f"✓ Loaded {len(experiment.cases)} cases")
        return experiment

    except Exception as e:
        print(f"✗ Failed to load: {e}")
        return None

Best Practices

1. Use Consistent Naming

# Good
experiment.to_file("customer_service_baseline_v1.json")

# Less helpful
experiment.to_file("test.json")

2. Validate After Loading

experiment = Experiment.from_file("experiment.json")

assert len(experiment.cases) > 0, "No cases loaded"
assert len(experiment.evaluators) > 0, "No evaluators loaded"

3. Include Metadata

experiment_data = experiment.to_dict()
experiment_data["metadata"] = {
    "created_date": datetime.now().isoformat(),
    "description": "Baseline evaluation",
    "version": "1.0"
}

with open("experiment.json", "w") as f:
    json.dump(experiment_data, f, indent=2)