大语言模型微调及其应用的探索 跟踪前沿的技术
使用开源大模型给自己训练模型评分
import json
import urllib.request
def query_model(prompt, model="qwen3:latest", url="http://192.168.9.179:11434/api/chat"):
data = {
"model": model,
"option":{
"seed": 123, # for deterministic responses
"temperature": 0, # for deterministic responses
},
"messages": [
{"role": "user", "content": prompt}
]
}
payload = json.dumps(data).encode("utf-8")
request = urllib.request.Request(url, data=payload, method="POST")
request.add_header("Content-Type", "application/json")
response_data = ""
with urllib.request.urlopen(request) as response:
while True:
line = response.readline().decode("utf-8")
if not line:
break
response_json = json.loads(line)
response_data += response_json["message"]["content"]
return response_data
result = query_model(prompt = "你好啊!!!");
print(result);
with open("instruction-data-with-response.json", "r", encoding="utf-8") as f:
jsondata = json.load(f)
#print(data[0])
for entry in jsondata[:3]:
prompt = (
f" 给定一个输入: `{format_input(entry)}` "
f" 正确的输出为: `{entry['output']}`, "
f" 模型给的输出为: `{entry['model_response']}`"
f" 请为模型的输出打分,0表示最差,100表示最好,只给出分数。"
)
print("\nDataset response:")
print(">>", entry['output'])
print("\nModel response:")
print(">>", entry["model_response"])
print("\nScore:")
print(">>", chat_model(prompt))
print("\n-------------------------")
#测试评分
from tqdm import tqdm
def generate_model_scores(json_data):
scores = []
for entry in tqdm(json_data, desc="Scoring entries"):
prompt = (
f"给定一个输出: `{format_input(entry)}` "
f"正确的输出为: `{entry['output']}`, "
f"模型给的输出为: `{entry['model_response']}`"
f" 请为模型的输出打分,0表示最差,100表示最好,只给出分数。"
)
score = chat_model(prompt)
try:
scores.append(int(score))
except ValueError:
print(f"Could not convert score: {score}")
continue
return scores
scores = generate_model_scores(jsondata)
print(f"Number of scores: {len(scores)} of {len(jsondata)}")
print(f"Average score: {sum(scores)/len(scores):.2f}\n")