from runcrate import Runcrate
import numpy as np
client = Runcrate(api_key="rc_live_...")
# Your documents
docs = [
"GPU instances are billed hourly. RTX 4090 starts at $0.35/hr, A100 at $1.20/hr, H100 at $2.50/hr.",
"Storage volumes cost $0.03/GB/month, charged weekly. Volumes persist across instance termination.",
"Auto-recharge tops up credits automatically when your balance drops below a threshold you set.",
"API keys are scoped to a workspace. The full key is shown only once at creation.",
"The Models API supports chat, image, video, TTS, and ASR across 140+ open-source models.",
]
# Embed all documents
doc_embeddings = []
for doc in docs:
resp = client.models.embed(model="BAAI/bge-large-en-v1.5", input=doc)
doc_embeddings.append(resp.data[0].embedding)
doc_embeddings = np.array(doc_embeddings)
def ask(question: str, top_k: int = 3) -> str:
# Embed the question
q_resp = client.models.embed(model="BAAI/bge-large-en-v1.5", input=question)
q_vec = np.array(q_resp.data[0].embedding)
# Cosine similarity
sims = doc_embeddings @ q_vec / (
np.linalg.norm(doc_embeddings, axis=1) * np.linalg.norm(q_vec)
)
top_indices = np.argsort(sims)[-top_k:][::-1]
context = "\n\n".join(docs[i] for i in top_indices)
# Generate answer
response = client.models.chat_completion(
model="deepseek-ai/DeepSeek-V3",
messages=[
{"role": "system", "content": f"Answer using ONLY this context:\n\n{context}"},
{"role": "user", "content": question},
],
)
return response.choices[0].message.content
print(ask("How much does an A100 cost per hour?"))
print(ask("What happens when my credits run out?"))