使用模式#

估算LLM和嵌入模型的令牌计数#

为了测量LLM和嵌入模型的令牌数量，您需要：

设置MockLLM和MockEmbedding对象

from llama_index.core.llms import MockLLM
from llama_index.core import MockEmbedding

llm = MockLLM(max_tokens=256)
embed_model = MockEmbedding(embed_dim=1536)

配置TokenCountingCallback处理器

import tiktoken
from llama_index.core.callbacks import CallbackManager, TokenCountingHandler

token_counter = TokenCountingHandler(
    tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode
)

callback_manager = CallbackManager([token_counter])

将它们添加到全局Settings中

from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embed_model
Settings.callback_manager = callback_manager

构建索引

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader(
    "./docs/examples/data/paul_graham"
).load_data()

index = VectorStoreIndex.from_documents(documents)

测量令牌数量！

print(
    "嵌入令牌数: ",
    token_counter.total_embedding_token_count,
    "\n",
    "LLM提示令牌数: ",
    token_counter.prompt_llm_token_count,
    "\n",
    "LLM补全令牌数: ",
    token_counter.completion_llm_token_count,
    "\n",
    "LLM总令牌数: ",
    token_counter.total_llm_token_count,
    "\n",
)

# 重置计数器
token_counter.reset_counts()

执行查询并再次测量

query_engine = index.as_query_engine()

response = query_engine.query("query")

print(
    "嵌入令牌数: ",
    token_counter.total_embedding_token_count,
    "\n",
    "LLM提示令牌数: ",
    token_counter.prompt_llm_token_count,
    "\n",
    "LLM补全令牌数: ",
    token_counter.completion_llm_token_count,
    "\n",
    "LLM总令牌数: ",
    token_counter.total_llm_token_count,
    "\n",
)