使用模式#
估算LLM和嵌入模型的令牌计数#
为了测量LLM和嵌入模型的令牌数量,您需要:
- 设置
MockLLM和MockEmbedding对象
from llama_index.core.llms import MockLLM
from llama_index.core import MockEmbedding
llm = MockLLM(max_tokens=256)
embed_model = MockEmbedding(embed_dim=1536)
- 配置
TokenCountingCallback处理器
import tiktoken
from llama_index.core.callbacks import CallbackManager, TokenCountingHandler
token_counter = TokenCountingHandler(
tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode
)
callback_manager = CallbackManager([token_counter])
- 将它们添加到全局
Settings中
from llama_index.core import Settings
Settings.llm = llm
Settings.embed_model = embed_model
Settings.callback_manager = callback_manager
- 构建索引
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
documents = SimpleDirectoryReader(
"./docs/examples/data/paul_graham"
).load_data()
index = VectorStoreIndex.from_documents(documents)
- 测量令牌数量!
print(
"嵌入令牌数: ",
token_counter.total_embedding_token_count,
"\n",
"LLM提示令牌数: ",
token_counter.prompt_llm_token_count,
"\n",
"LLM补全令牌数: ",
token_counter.completion_llm_token_count,
"\n",
"LLM总令牌数: ",
token_counter.total_llm_token_count,
"\n",
)
# 重置计数器
token_counter.reset_counts()
- 执行查询并再次测量
query_engine = index.as_query_engine()
response = query_engine.query("query")
print(
"嵌入令牌数: ",
token_counter.total_embedding_token_count,
"\n",
"LLM提示令牌数: ",
token_counter.prompt_llm_token_count,
"\n",
"LLM补全令牌数: ",
token_counter.completion_llm_token_count,
"\n",
"LLM总令牌数: ",
token_counter.total_llm_token_count,
"\n",
)