RunPod•3mo ago

How to Deploy VLLM Serverless using Programming Language

Hello how can we achieve deploying Severless VLLM instances using an API rather than going to the UI?

1 Reply

You can try our graphql api, here is an example:

url: https://api.runpod.io/graphql?api_key=api_key_here

mutation saveEndpoint($input: EndpointInput!) {
  saveEndpoint(input: $input) {
    gpuIds
    id
    idleTimeout
    locations
    name
    networkVolumeId
    scalerType
    scalerValue
    templateId
    userId
    workersMax
    workersMin
    gpuCount
  }
}
// input
{
  "gpuIds": "AMPERE_16",
  "gpuCount": 1,
  "allowedCudaVersions": "",
  "idleTimeout": 5,
  "locations": null,
  "name": "broken_violet_tarantula -fb",
  "networkVolumeId": null,
  "scalerType": "QUEUE_DELAY",
  "scalerValue": 4,
  "workersMax": 3,
  "workersMin": 0,
  "executionTimeoutMs": 600000,
  "template": {
    "containerDiskInGb": 5,
    "containerRegistryAuthId": "",
    "dockerArgs": "",
    "env": [],
    "imageName": "test/test",
    "startScript": "",
    "ports": "",
    "name": "broken_violet_tarantula__template__6oknzg"
  }
}

url: https://api.runpod.io/graphql?api_key=api_key_here

mutation saveEndpoint($input: EndpointInput!) {
  saveEndpoint(input: $input) {
    gpuIds
    id
    idleTimeout
    locations
    name
    networkVolumeId
    scalerType
    scalerValue
    templateId
    userId
    workersMax
    workersMin
    gpuCount
  }
}
// input
{
  "gpuIds": "AMPERE_16",
  "gpuCount": 1,
  "allowedCudaVersions": "",
  "idleTimeout": 5,
  "locations": null,
  "name": "broken_violet_tarantula -fb",
  "networkVolumeId": null,
  "scalerType": "QUEUE_DELAY",
  "scalerValue": 4,
  "workersMax": 3,
  "workersMin": 0,
  "executionTimeoutMs": 600000,
  "template": {
    "containerDiskInGb": 5,
    "containerRegistryAuthId": "",
    "dockerArgs": "",
    "env": [],
    "imageName": "test/test",
    "startScript": "",
    "ports": "",
    "name": "broken_violet_tarantula__template__6oknzg"
  }
}

Gaming

Programming

How to Deploy VLLM Serverless using Programming Language

Did you find this page helpful?