# SPDX-License-Identifier: Apache-2.0# SPDX-FileCopyrightText: Copyright contributors to the vLLM project"""Example Python client for OpenAI Chat Completion using vLLM API serverNOTE: start a supported chat completion model server with `vllm serve`, e.g. vllm serve meta-llama/Llama-2-7b-chat-hf"""importargparsefromopenaiimportOpenAI# Modify OpenAI's API key and API base to use vLLM's API server.openai_api_key="EMPTY"openai_api_base="http://localhost:8000/v1"messages=[{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":"Who won the world series in 2020?"},{"role":"assistant","content":"The Los Angeles Dodgers won the World Series in 2020.",},{"role":"user","content":"Where was it played?"},]defparse_args():parser=argparse.ArgumentParser(description="Client for vLLM API server")parser.add_argument("--stream",action="store_true",help="Enable streaming response")returnparser.parse_args()defmain(args):client=OpenAI(# defaults to os.environ.get("OPENAI_API_KEY")api_key=openai_api_key,base_url=openai_api_base,)models=client.models.list()model=models.data[0].id# Chat Completion APIchat_completion=client.chat.completions.create(messages=messages,model=model,stream=args.stream,)print("-"*50)print("Chat completion results:")ifargs.stream:forcinchat_completion:print(c)else:print(chat_completion)print("-"*50)if__name__=="__main__":args=parse_args()main(args)