from llama_stack_client import LlamaStackClient
LLAMA_STACK_PORT = 8321
client = LlamaStackClient(base_url=f"http://localhost:{LLAMA_STACK_PORT}")
# List all available models
models = client.models.list()
print("--- Available models: ---")
for m in models:
print(f"- {m.identifier}")
print()
# Choose a model from the list
model = "sambanova/sambanova/Meta-Llama-3.3-70B-Instruct"
# Run chat completion
response = client.inference.chat_completion(
messages=[
{"role": "system", "content": "You are a friendly assistant."},
{"role": "user", "content": "Write a two-sentence poem about llama."},
],
model_id=model,
)
print(response.completion_message.content)