lostdev Comments - Answer Overflow

For the curious, it was the max_tokens parameter, which I suspected but didn't know how to remedy. Turns out the proper way to set max_tokens in the JSON body of the request is in a sampling_params dictionary, and not a sibling to prompt. So intead of

{
  "input": {
    "prompt": "Tell me a story about two bananas who solve the case of the missing hamburger."
    "max_tokens": 5000
  }
}

{
  "input": {
    "prompt": "Tell me a story about two bananas who solve the case of the missing hamburger."
    "max_tokens": 5000
  }
}

it needs to be

{
  "input": {
    "prompt": "Tell me a story about two bananas who solve the case of the missing hamburger."
    "sampling_params": {
      "max_tokens": 5000
    }
  }
}

{
  "input": {
    "prompt": "Tell me a story about two bananas who solve the case of the missing hamburger."
    "sampling_params": {
      "max_tokens": 5000
    }
  }
}

which I couldn't figure out until I found the JobInput class

class JobInput:
    def __init__(self, job):
        self.llm_input = job.get("messages", job.get("prompt"))
        self.stream = job.get("stream", False)
        self.max_batch_size = job.get("max_batch_size")
        self.apply_chat_template = job.get("apply_chat_template", False)
        self.use_openai_format = job.get("use_openai_format", False)
        self.sampling_params = SamplingParams(**job.get("sampling_params", {}))
        self.request_id = random_uuid()
        batch_size_growth_factor = job.get("batch_size_growth_factor")
        self.batch_size_growth_factor = float(batch_size_growth_factor) if batch_size_growth_factor else None 
        min_batch_size = job.get("min_batch_size")
        self.min_batch_size = int(min_batch_size) if min_batch_size else None 
        self.openai_route = job.get("openai_route")
        self.openai_input = job.get("openai_input")

class JobInput:
    def __init__(self, job):
        self.llm_input = job.get("messages", job.get("prompt"))
        self.stream = job.get("stream", False)
        self.max_batch_size = job.get("max_batch_size")
        self.apply_chat_template = job.get("apply_chat_template", False)
        self.use_openai_format = job.get("use_openai_format", False)
        self.sampling_params = SamplingParams(**job.get("sampling_params", {}))
        self.request_id = random_uuid()
        batch_size_growth_factor = job.get("batch_size_growth_factor")
        self.batch_size_growth_factor = float(batch_size_growth_factor) if batch_size_growth_factor else None 
        min_batch_size = job.get("min_batch_size")
        self.min_batch_size = int(min_batch_size) if min_batch_size else None 
        self.openai_route = job.get("openai_route")
        self.openai_input = job.get("openai_input")

2 replies

Gaming

Programming