Skip to content

使用模式#

估算LLM和嵌入模型的令牌计数#

为了测量LLM和嵌入模型的令牌数量,您需要:

  1. 设置MockLLMMockEmbedding对象
from llama_index.core.llms import MockLLM
from llama_index.core import MockEmbedding

llm = MockLLM(max_tokens=256)
embed_model = MockEmbedding(embed_dim=1536)
  1. 配置TokenCountingCallback处理器
import tiktoken
from llama_index.core.callbacks import CallbackManager, TokenCountingHandler

token_counter = TokenCountingHandler(
    tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode
)

callback_manager = CallbackManager([token_counter])
  1. 将它们添加到全局Settings
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embed_model
Settings.callback_manager = callback_manager
  1. 构建索引
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader(
    "./docs/examples/data/paul_graham"
).load_data()

index = VectorStoreIndex.from_documents(documents)
  1. 测量令牌数量!
print(
    "嵌入令牌数: ",
    token_counter.total_embedding_token_count,
    "\n",
    "LLM提示令牌数: ",
    token_counter.prompt_llm_token_count,
    "\n",
    "LLM补全令牌数: ",
    token_counter.completion_llm_token_count,
    "\n",
    "LLM总令牌数: ",
    token_counter.total_llm_token_count,
    "\n",
)

# 重置计数器
token_counter.reset_counts()
  1. 执行查询并再次测量
query_engine = index.as_query_engine()

response = query_engine.query("query")

print(
    "嵌入令牌数: ",
    token_counter.total_embedding_token_count,
    "\n",
    "LLM提示令牌数: ",
    token_counter.prompt_llm_token_count,
    "\n",
    "LLM补全令牌数: ",
    token_counter.completion_llm_token_count,
    "\n",
    "LLM总令牌数: ",
    token_counter.total_llm_token_count,
    "\n",
)