Custom Providers
QCOS is built around a plugin architecture — any hardware can be integrated by implementing the NodeProvider ABC. This page covers three approaches:
- Subclass
NodeProvider— for custom hardware or clusters - Use
CustomRESTProvider— for any server with an HTTP endpoint - Deploy a REST bridge on LUMI — to run real Aer on LUMI GPUs
Approach 1 — Subclass NodeProvider
Minimal implementation (10 lines)
from typing import List
from network.node_providers import NodeProvider, NodeProviderRegistry
from network.distributed_qvm import QPUNodeSpec, NodeType
class MyGPUProvider(NodeProvider):
"""On-premise NVIDIA DGX cluster."""
DISPLAY_NAME = "My DGX Cluster"
def build_nodes(self, **kwargs) -> List[QPUNodeSpec]:
n_nodes = kwargs.get("n_nodes", 1)
return [
QPUNodeSpec(
node_id=f"dgx-node-{i}",
node_type=NodeType.GPU_AER,
max_qubits=30,
tags=["dgx", "on-premise"],
credentials={"host": kwargs.get("host", "dgx-001.internal")}
)
for i in range(n_nodes)
]
# Register in the global registry
NodeProviderRegistry.register("my_dgx", MyGPUProvider)
Then use it like any built-in provider:
from network.node_providers import ClusterBuilder
registry = (
ClusterBuilder()
.add("my_dgx", n_nodes=4, host="dgx-001.internal")
.add("local_cpu", n_nodes=2)
.build()
)
Adding custom execution logic
Override execute() to control how circuits run on your hardware:
from typing import Dict, Optional
from qiskit import QuantumCircuit
class MyGPUProvider(NodeProvider):
DISPLAY_NAME = "My DGX Cluster"
def build_nodes(self, **kwargs) -> List[QPUNodeSpec]:
... # as above
def execute(
self,
circuit: QuantumCircuit,
shots: int,
node_spec: QPUNodeSpec,
) -> Optional[Dict[str, int]]:
"""Custom execution — return counts dict or None to fall through."""
host = node_spec.credentials.get("host")
# run your custom GPU job here...
import subprocess
result = subprocess.run(
["dgx-runner", "--shots", str(shots), "--host", host],
capture_output=True, text=True
)
counts = parse_result(result.stdout)
return counts # e.g. {"0000": 512, "1111": 512}
If execute() raises NotImplementedError (the default), QCOS falls back to the built-in Aer executor.
Adding a health check
def health_check(self) -> bool:
"""Return False if cluster is unreachable."""
import socket
host = self._last_host or "dgx-001.internal"
try:
socket.create_connection((host, 22), timeout=3)
return True
except OSError:
return False
Approach 2 — CustomRESTProvider
The custom_rest built-in provider connects to any HTTP server that accepts a standard POST /run-circuit request. No custom Python class needed.
Endpoint contract
POST /run-circuit
Content-Type: application/json
Authorization: Bearer <api_key> (if api_key provided)
Body:
{
"qasm": "<OpenQASM 3.0 string>",
"shots": 1024
}
Response (200 OK):
{
"counts": {
"0000": 512,
"1111": 512
}
}
Registration
registry = (
ClusterBuilder()
.add("custom_rest",
endpoints=[
"http://dgx-001.internal:8888",
"http://dgx-002.internal:8888",
"http://dgx-003.internal:8888",
],
max_qubits=30,
api_key="my-secret-key", # optional
timeout_s=60)
.build()
)
Each endpoint becomes one QCOS node (custom-rest-0, custom-rest-1, …).
Approach 3 — LUMI REST Bridge
Deploy a FastAPI bridge on a LUMI login node so QCOS can submit real Aer (ROCm) circuits via HTTP.
Bridge server (qcos_lumi_bridge.py)
"""
Minimal QCOS circuit bridge for LUMI.
Deploy on LUMI login node, then SSH-tunnel to localhost.
"""
from fastapi import FastAPI, HTTPException, Header
from pydantic import BaseModel
from typing import Optional, Dict
import asyncio, subprocess, json, os, tempfile
app = FastAPI(title="QCOS LUMI Bridge")
API_KEY = os.environ.get("BRIDGE_API_KEY", "")
VENV = "/flash/project_465002463/venv_qcos/bin/python"
class CircuitRequest(BaseModel):
qasm: str
shots: int = 1024
@app.post("/run-circuit")
async def run_circuit(
req: CircuitRequest,
authorization: Optional[str] = Header(None),
) -> Dict[str, dict]:
if API_KEY and authorization != f"Bearer {API_KEY}":
raise HTTPException(status_code=401, detail="Unauthorized")
# Write circuit to temp file
with tempfile.NamedTemporaryFile(suffix=".qasm", delete=False, mode="w") as f:
f.write(req.qasm)
qasm_path = f.name
try:
# Run Aer statevector on GPU via subprocess
script = f"""
import sys, json
from qiskit import qasm3
from qiskit_aer import AerSimulator
with open('{qasm_path}') as fh:
qc = qasm3.loads(fh.read())
backend = AerSimulator(method='statevector', device='GPU')
job = backend.run(qc, shots={req.shots})
counts = job.result().get_counts()
print(json.dumps(dict(counts)))
"""
proc = await asyncio.create_subprocess_exec(
VENV, "-c", script,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=300)
if proc.returncode != 0:
raise HTTPException(status_code=500, detail=stderr.decode())
counts = json.loads(stdout.decode().strip())
return {"counts": counts}
finally:
os.unlink(qasm_path)
@app.get("/health")
async def health():
return {"status": "ok"}
Deploy on LUMI
# 1. Upload bridge to LUMI
rsync qcos_lumi_bridge.py lumi:/flash/project_465002463/
# 2. Install FastAPI on LUMI venv
ssh lumi "source /flash/project_465002463/venv_qcos/bin/activate && \
pip install fastapi uvicorn"
# 3. Start the bridge (keep alive via tmux or nohup)
ssh lumi "cd /flash/project_465002463 && \
source venv_qcos/bin/activate && \
BRIDGE_API_KEY=my-secret \
nohup uvicorn qcos_lumi_bridge:app --host 0.0.0.0 --port 8888 \
> /scratch/project_465002463/logs/bridge.log 2>&1 &"
Connect from your local machine
# Open SSH tunnel (runs in background)
ssh -N -L 18888:localhost:8888 lumi &
# Test
curl http://localhost:18888/health
# {"status": "ok"}
# Register the LUMI bridge as custom_rest nodes
registry = (
ClusterBuilder()
.add("custom_rest",
endpoints=["http://localhost:18888"] * 8, # 8 virtual LUMI workers
max_qubits=34,
api_key="my-secret",
timeout_s=120)
.add("local_cpu", n_nodes=2) # local fallback
.build()
)
qvm = DistributedQVM(registry, mode=QVMMode.EMULATED, shots=2048)
result = qvm.run(my_circuit)
Registering providers globally
To make your provider available by name in all future ClusterBuilder calls, register it at import time:
# In your project's __init__.py or entrypoint
from network.node_providers import NodeProviderRegistry
from myproject.providers import MyGPUProvider, MyFPGAProvider
NodeProviderRegistry.register("my_dgx", MyGPUProvider)
NodeProviderRegistry.register("my_fpga", MyFPGAProvider)
Then anywhere in the codebase:
ClusterBuilder().add("my_dgx", n_nodes=4).build()
ClusterBuilder().add("my_fpga", n_nodes=2).build()
Provider checklist
| Item | Required | Notes |
|---|---|---|
DISPLAY_NAME class attribute | ✅ | Shown in registry list |
build_nodes(**kwargs) → List[QPUNodeSpec] | ✅ | Must return at least 1 node |
execute(circuit, shots, node_spec) | ❌ | Override for custom execution |
health_check() → bool | ❌ | Override for liveness checks |
NodeProviderRegistry.register(name, cls) | ✅ | Must register to use by name |
NodeProviderRegistry.register() is not thread-safe at startup. Always register providers before calling ClusterBuilder().build().
Use cases by provider type
| Use case | Provider approach |
|---|---|
| On-premise GPU server (Aer) | Subclass + override execute() with subprocess |
| On-premise QPU hardware | Subclass + override execute() with vendor SDK |
| Any server with HTTP endpoint | custom_rest (no code needed) |
| LUMI real GPU simulation | custom_rest + LUMI REST bridge |
| Mock / test provider | Subclass with hardcoded counts |
| Multi-tenant GPU pool | Subclass with load balancing in build_nodes() |