-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathllama-cpp-cli.py
41 lines (35 loc) · 1.19 KB
/
llama-cpp-cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from llama_cpp import Llama
# Initialize the Llama model
llm = Llama(
model_path="./meta-llama-3-8B-instruct-Q8.gguf",
n_gpu_layers=-1, # Comment to disable GPU acceleration
# seed=1337,
# n_ctx=2048,
)
exit_msg = "\nExiting CLI. Goodbye!"
context = "Context: You are a helpful assistant who always responds in a friendly manner.\n"
conversation = context
while True:
try:
# Read user input
user_input = input("Enter your prompt (or 'exit' to quit, 'reset' for new chat): ")
if user_input.lower() == 'exit':
print(exit_msg)
break
if user_input.lower() == 'reset':
conversation = context
print("Chat reset.")
continue
conversation += f"User: {user_input} \nAssistant: "
output = llm(
conversation,
max_tokens=256, # 'None' to generate up to the end of the context window
stop=["User:"],
echo=True # Echo the prompt back in the output
)
print("\nResponse:")
conversation = output['choices'][0]['text']
print(conversation)
except KeyboardInterrupt:
print(exit_msg)
break