Anindya
Anindya
Explore posts from servers
MModular
Created by Anindya on 6/14/2024 in #questions
How to declare a function type in a trait?
yes, I am thinking of not adding try block but then you saying just using a raise would work with an if?
10 replies
MModular
Created by Anindya on 6/14/2024 in #questions
How to declare a function type in a trait?
Hi, so there was one more problem I am facing
from python import Python


struct Module:
var func_module: PythonObject

fn __init__(inout self, module_path: String):
try:
Python.add_to_path(module_path)
self.func_module = Python.import_module("func")
except Exception:
pass
from python import Python


struct Module:
var func_module: PythonObject

fn __init__(inout self, module_path: String):
try:
Python.add_to_path(module_path)
self.func_module = Python.import_module("func")
except Exception:
pass
So, in any how I was not able to initialize my func_module which is taken from func.py file. I got to insert it under try block other wise giving me error, and once that done, I am getting this kind of error:
/Users/anindyadeepsannigrahi/workspace/Mojo/autograd.mojo:7:8: error: 'self.func_module' is uninitialized at the implicit return from this function
fn __init__(inout self, module_path: String):
^
/Users/anindyadeepsannigrahi/workspace/Mojo/autograd.mojo:7:23: note: 'self' declared here
fn __init__(inout self, module_path: String):
^
mojo: error: failed to run the pass manager
/Users/anindyadeepsannigrahi/workspace/Mojo/autograd.mojo:7:8: error: 'self.func_module' is uninitialized at the implicit return from this function
fn __init__(inout self, module_path: String):
^
/Users/anindyadeepsannigrahi/workspace/Mojo/autograd.mojo:7:23: note: 'self' declared here
fn __init__(inout self, module_path: String):
^
mojo: error: failed to run the pass manager
10 replies
MModular
Created by Anindya on 6/14/2024 in #questions
How to declare a function type in a trait?
I see, but still it is a good start, thanks and now I am starting to see the cons of strong typings .. ughh
10 replies
MModular
Created by Anindya on 6/14/2024 in #questions
How to declare a function type in a trait?
Thanks for the answer, so is it not possible to be more flexible with this apperoach in any ways?
10 replies
RRunPod
Created by Anindya on 3/14/2024 in #⚡|serverless
Streaming is not quite working
And this is how the handler code looks like:
import runpod
from engine import HFEngine
from constants import DEFAULT_MAX_CONCURRENCY

class JobInput:
def __init__(self, job):
self.llm_input = job.get("messages")
self.stream = job.get("stream", False)
self.sampling_params = job.get(
"sampling_params", {
"temperature": 0.1,
"top_p": 0.7,
"max_new_tokens":512
}
)

async def handler(job):
engine = HFEngine()
job_input = JobInput(job["input"])

async for delta in engine.stream(
chat_input=job_input.llm_input,
generation_parameters=job_input.sampling_params
):
yield delta


runpod.serverless.start(
{
"handler": handler,
"concurrency_modifier": lambda x: DEFAULT_MAX_CONCURRENCY,

}
)
import runpod
from engine import HFEngine
from constants import DEFAULT_MAX_CONCURRENCY

class JobInput:
def __init__(self, job):
self.llm_input = job.get("messages")
self.stream = job.get("stream", False)
self.sampling_params = job.get(
"sampling_params", {
"temperature": 0.1,
"top_p": 0.7,
"max_new_tokens":512
}
)

async def handler(job):
engine = HFEngine()
job_input = JobInput(job["input"])

async for delta in engine.stream(
chat_input=job_input.llm_input,
generation_parameters=job_input.sampling_params
):
yield delta


runpod.serverless.start(
{
"handler": handler,
"concurrency_modifier": lambda x: DEFAULT_MAX_CONCURRENCY,

}
)
Even tried out with "return_aggregate_stream": True, and here is how my client code looks like which is a simple POST request
url = "http://localhost:8000/runsync"

headers = {
"Content-Type": "application/json"
}

data = {
"input": {
"messages": "Your prompt",
"stream": True
}
}

response = requests.post(url, headers=headers, data=json.dumps(data), timeout=600, stream=True)

if response.status_code == 200:
for res in response.iter_lines():
print(res)
url = "http://localhost:8000/runsync"

headers = {
"Content-Type": "application/json"
}

data = {
"input": {
"messages": "Your prompt",
"stream": True
}
}

response = requests.post(url, headers=headers, data=json.dumps(data), timeout=600, stream=True)

if response.status_code == 200:
for res in response.iter_lines():
print(res)
what is happening is, in my server side, I can see that it is doing streaming but on client side, I am getting responses when streaming finishes
4 replies
RRunPod
Created by Anindya on 3/14/2024 in #⚡|serverless
Streaming is not quite working
Here is how my inference code looks like
class HFEngine:
def __init__(self) -> None:
load_dotenv()
self.model, self.tokenizer, self.streamer = self._initialize_llm(
model_name_or_path=os.environ.get("HF_MODEL_NAME"),
tokenizer_name_or_path=os.environ.get("HF_TOKENIZER_NAME"),
device=os.environ.get("DEVICE") or "cpu"
)
self.device = os.environ.get("DEVICE")

async def stream(self, chat_input: Union[str, List[Dict[str, str]]], generation_parameters: Dict[str, Any]):
try:
async for output in self._stream(chat_input=chat_input, generation_parameters=generation_parameters):
yield output
except Exception as e:
yield {"error": str(e)}


async def _stream(self, chat_input: Union[str, List[Dict[str, str]]], generation_parameters: Dict[str, Any]):
if isinstance(chat_input, str):
chat_input = [{"user": chat_input}]

input_ids = self.tokenizer.apply_chat_template(
conversation=chat_input, tokenize=True, return_tensors="pt"
).to(self.device)

generation_kwargs = dict(
input_ids=input_ids,
streamer=self.streamer,
**generation_parameters
)
thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
thread.start()

for next_token in self.streamer:
try:
if next_token is not None:
yield {"status": 200, "delta": next_token}
except Empty:
await asyncio.sleep(0.001)
class HFEngine:
def __init__(self) -> None:
load_dotenv()
self.model, self.tokenizer, self.streamer = self._initialize_llm(
model_name_or_path=os.environ.get("HF_MODEL_NAME"),
tokenizer_name_or_path=os.environ.get("HF_TOKENIZER_NAME"),
device=os.environ.get("DEVICE") or "cpu"
)
self.device = os.environ.get("DEVICE")

async def stream(self, chat_input: Union[str, List[Dict[str, str]]], generation_parameters: Dict[str, Any]):
try:
async for output in self._stream(chat_input=chat_input, generation_parameters=generation_parameters):
yield output
except Exception as e:
yield {"error": str(e)}


async def _stream(self, chat_input: Union[str, List[Dict[str, str]]], generation_parameters: Dict[str, Any]):
if isinstance(chat_input, str):
chat_input = [{"user": chat_input}]

input_ids = self.tokenizer.apply_chat_template(
conversation=chat_input, tokenize=True, return_tensors="pt"
).to(self.device)

generation_kwargs = dict(
input_ids=input_ids,
streamer=self.streamer,
**generation_parameters
)
thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
thread.start()

for next_token in self.streamer:
try:
if next_token is not None:
yield {"status": 200, "delta": next_token}
except Empty:
await asyncio.sleep(0.001)
4 replies