# SPDX-License-Identifier: Apache-2.0# SPDX-FileCopyrightText: Copyright contributors to the vLLM projectfromvllmimportLLM,RequestOutput,SamplingParams# Sample prompts.prompts=["Hello, my name is","The president of the United States is","The capital of France is","The future of AI is",]# Create a sampling params object.sampling_params=SamplingParams(temperature=0.8,top_p=0.95)defprint_prompts_and_outputs(outputs:list[RequestOutput])->None:print("-"*60)foroutputinoutputs:prompt=output.promptgenerated_text=output.outputs[0].textprint(f"Prompt: {prompt!r}")print(f"Output: {generated_text!r}")print("-"*60)defmain():# Create an LLM without loading real weightsllm=LLM(model="Qwen/Qwen3-0.6B",load_format="dummy",enforce_eager=True,tensor_parallel_size=4,)outputs=llm.generate(prompts,sampling_params)print("\nOutputs do not make sense:")print_prompts_and_outputs(outputs)# Update load format from `dummy` to `auto`llm.collective_rpc("update_config",args=({"load_config":{"load_format":"auto"}},))# Now reload real weights inplacellm.collective_rpc("reload_weights")# Check outputs make senseoutputs=llm.generate(prompts,sampling_params)print("\nOutputs make sense after loading real weights:")print_prompts_and_outputs(outputs)if__name__=="__main__":main()