Deployment Details（Expert）

更新时间：2025-07-22 17:32:25

Base model service details.

GET

https://api.alayanew.com/api/serverless-infer/v1/deployment/{serviceId}

Authorizations

Authorizations：StringHeaderRequired

用户可通过已获取Open API Key做验证，例如：plain Credential=[YOUR_AK],Signature=[YOUR_SK]。

Path Parameters

serviceId：StringRequired

Service ID.

Response

状态码：

200

application/json

serviceUrl：String

Service URL.

apiKey：String

API Key.

status：String

Status.

instance：List<Map<String, Object>>

Instance information.

instance.instanceId：String

Instance ID.

instance.loraModels：List<String>

List of associated LoRA models.

instance.baseModelReady：boolean

Whether the base model is ready.

serviceId：String

Service ID.

vksId：String

Vital Kubernetes Service (VKS) Cluster ID.

namespace：String

Vital Kubernetes Service (VKS) Namespace.

name：String

Service name.

servedName：List<String>

Internal model identifier.

modelId：String

Model ID.

mode：String

Startup mode, e.g., quickStart/expert.

quickStart：Object

quickStart. backend：String

quickStart.backendVersion：String

quickStart.backenArgs：Array[String]

quickStart. resource：Object

cURL

Python

JavaScript

Java

curl --location --request GET 'https://api.alayanew.com/api/serverless-infer/v1/deployment/38fbfc3d-6a88-4c35-b8b6-9efc83949d47'      --header 'Authorization:plain Credential=YOUR_AK,Signature=YOUR_SK'      --header 'Content-Type: application/json'

200

400

401

403

404

500

{
    "code": 0,
    "data": {
        "serviceUrl":"string",
        "apikey":"String",
        "status":"String, model deployment status: starting, running, stopping, stopped, failed",
        "instance": [{
            "instanceId": "0",
            "loraModels": ["lora1","lora2"],
            "baseModelReady": true
        }],
        "vksId":"",
        "namespace":"",
        "name":"string, service display name customized by user",
        "servedName": ["string, internal model identifier"],
        "modelId": "String, model ID",
        "mode":"quickStart",
        "quickStart":{
            "backend":"vllm/sglang",
            "backendVersion":"0.8.4...",
            "backenArgs":[],
            "resource": {
                 "workers": "optional, int, number of workers",
                 "cpu": "required,int",
                 "mem": "required,int",
                 "gpu": {
                    "gpuType": "required, string, gpu type name",
                    "count": "required, int, number of gpu to be used"
          }
      },
      "expert":{...}
    }
  }
}

服务状态

服务状态（status）流转详情如下图所示。

服务调用

用户在部署模型服务后，可通过指定模型的参数来调用该服务。调用服务的代码示例如下所示。

cURL
Python
JavaScript
Go
Java

curl --location --request POST '[serviceUrl]/v1/chat/completions' \
     --header 'apiKey: [apiKey]' \
     --data-raw `{ "stream":false,
     "messages": [{"role":"user", "content":"你是谁，能干嘛"}],
     "model":"[servedName]"}`

import requests

url = "[serviceUrl]/v1/chat/completions"
headers = {
    "apiKey": "[apiKey]"
}
data = {
    "stream": False,
    "messages": [
        {
            "role": "user",
            "content": "你是谁，能干嘛"
        }
    ],
    "model": "[servedName]"
}

response = requests.post(url, headers=headers, json=data)
print(response.json())

const axios = require('axios');

const url = '[serviceUrl]/v1/chat/completions';
const headers = {
    'apiKey': '[apiKey]'
};
const data = {
    stream: false,
    messages: [
        {
            role: 'user',
            content: '你是谁，能干嘛'
        }
    ],
    model: '[servedName]'
};

axios.post(url, data, { headers })
    .then(response => {
        console.log(response);
    })
    .catch(error => {
        console.error('请求失败:', error);
    });

package main

import (
    "bytes"
    "encoding/json"
    "fmt"
    "net/http"
)

func main() {
    url := "[serviceUrl]/v1/chat/completions"

    type Message struct {
        Role    string `json:"role"`
        Content string `json:"content"`
    }

    type RequestBody struct {
        Stream   bool     `json:"stream"`
        Messages []Message `json:"messages"`
        Model    string   `json:"model"`
    }

    body := RequestBody{
        Stream: false,
        Messages: []Message{
            {Role: "user", Content: "你是谁，能干嘛"},
        },
        Model: "[servedName]",
    }

    jsonData, _ := json.Marshal(body)

    req, _ := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
    req.Header.Set("apiKey", "[apiKey]")
    req.Header.Set("Content-Type", "application/json")

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    var result map[string]interface{}
    json.NewDecoder(resp.Body).Decode(&result)
    fmt.Println(result)
}

import okhttp3.*;

import java.io.IOException;
import java.util.concurrent.TimeUnit;

public class Test {

    public static void main(String[] args) {
        String OPENAI_API_KEY = "[apiKey]";
        String OPENAI_API_BASE = "[serviceUrl]/v1/chat/completions";
        String model = "[servedName]";

        OkHttpClient client = new OkHttpClient().newBuilder()
                .connectTimeout(10, TimeUnit.SECONDS)
                .readTimeout(10, TimeUnit.MINUTES)
                .writeTimeout(10, TimeUnit.MINUTES)
                .build();

        String jsonInputString = "{"
                + "\"stream\":false,"
                + "\"messages\":[{\"role\":\"user\",\"content\":\"你是谁，能干嘛\"}],"
                + "\"model\":\"" + model + "\""
                + "}";

        System.out.println(jsonInputString);

        MediaType mediaType = MediaType.parse("application/json");
        RequestBody body = RequestBody.create(mediaType, jsonInputString);
        Request request = new Request.Builder()
                .url(OPENAI_API_BASE)
                .method("POST", body)
                .addHeader("Content-Type", "application/json")
                .addHeader("Accept", "application/json")
                .addHeader("apiKey", OPENAI_API_KEY)
                .build();

        try {
            Response response = client.newCall(request).execute();
            System.out.println(response.body().string());
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

服务状态​

服务调用​

服务状态

服务调用