PromptLayer allows you to log requests made via custom LLM providers. This is useful for logging requests that are not made via the PromptLayer SDK.

The input and output must be in a prompt blueprint

from huggingface_hub import InferenceClient
from promptlayer import PromptLayer
from datetime import datetime

PROMPTLAYER_API_KEY = "pl_..."
pl_client = PromptLayer(api_key=PROMPTLAYER_API_KEY)

prompt_name = "weather"
prompt_version_number = 30
prompt_input_variables = {"city": "Paris"}
template = pl_client.templates.get(
    prompt_name,
    {"version": prompt_version_number, "input_variables": prompt_input_variables},
)

messages = [
    message
    if isinstance(message["content"], str)
    else {**message, "content": message["content"][0]["text"]}
    for message in template["llm_kwargs"]["messages"]
]
parameters = template["metadata"]["model"]["parameters"]

provider = "meta-llama"
model = "Meta-Llama-3-8B-Instruct"

HF_TOKEN = "hf_..."
client = InferenceClient(
    f"{provider}/{model}",
    api_key=HF_TOKEN,
)
request_start_time = datetime.now().timestamp()
response = client.chat_completion(messages=messages, **parameters)
request_end_time = datetime.now().timestamp()

input = {
    "type": "chat",
    "messages": [
        {**message, "content": [{"type": "text", "text": message["content"]}]}
        for message in messages
    ],
}

output = {
    "type": "chat",
    "messages": [
        {
            "role": "assistant",
            "content": [
                {
                    "type": "text",
                    "text": response.choices[0].message.content,
                }
            ],
        }
    ],
}

request_log = pl_client.log_request(
    provider=provider,
    model=model,
    input=input,
    output=output,
    request_start_time=request_start_time,
    parameters=parameters,
    request_end_time=request_end_time,
    prompt_name=prompt_name,
    prompt_version_number=prompt_version_number,
    prompt_input_variables=prompt_input_variables,
    input_tokens=response.usage.prompt_tokens,
    output_tokens=response.usage.completion_tokens,
    function_name="inference",
    tags=["chat"],
    metadata={"model": "custom-model"},
    price=0.00045,
    score=70,
)
print(request_log["id"])