防伪码查询网站怎么做的,玉林市住房和城乡建设厅网站,网站代码开发方式,个人接单做网站的平台1. 案例目标本案例旨在构建一个基于Notion知识库的新员工入职聊天机器人#xff0c;通过RAG#xff08;检索增强生成#xff09;技术帮助新员工快速获取公司相关信息#xff0c;提高入职效率。系统主要实现以下目标#xff1a;集成Notion作为中心知识库#xff0c;集中管…1. 案例目标本案例旨在构建一个基于Notion知识库的新员工入职聊天机器人通过RAG检索增强生成技术帮助新员工快速获取公司相关信息提高入职效率。系统主要实现以下目标集成Notion作为中心知识库集中管理公司文档和流程实现基于自然语言查询的智能问答系统使用LangGraph优化文档检索和相关性过滤实现复杂查询的智能分解和并行处理展示LangChain与LangGraph的无缝集成2. 技术栈与核心依赖LangChainLangGraphOpenAI GPTNotion APIChroma Vector StorePythonJupyter Notebook3. 环境配置3.1 安装依赖# 安装所需包 %pip install langchain-opentutorial from langchain_opentutorial import package package.install( [ langchain-community, langchain-openai, langchain-chroma, langchain-core, langgraph, ], verboseFalse, upgradeFalse, )3.2 环境变量设置# 设置环境变量 from langchain_opentutorial import set_env set_env( { OPENAI_API_KEY: , LANGCHAIN_API_KEY: , LANGCHAIN_TRACING_V2: true, LANGCHAIN_ENDPOINT: https://api.smith.langchain.com, LANGCHAIN_PROJECT: 07-Agent/19-NewEmployeeOnboardingChatbot, } )3.3 Notion API配置# 配置Notion API from langchain_community.document_loaders import NotionDBLoader # 使用提供的令牌和数据库ID加载数据 NOTION_TOKEN ntn _L3541776489aPP4RRULRr1dAfxDeeeBoJUufhX8ON0y4tM DATABASE_ID 1870d31b38698044b3f2fdd3c2c15e4c loader NotionDBLoader( integration_tokenNOTION_TOKEN, database_idDATABASE_ID, ) data loader.load()4. 案例实现4.1 系统架构设计4.2 基础RAG实现4.2.1 数据预处理与向量存储from langchain_chroma import Chroma from langchain_openai import OpenAIEmbeddings from langchain_core.documents import Document # 处理数据创建两种类型的文档 # 1. 原始内容文档 # 2. 标题作为内容的文档用于提高检索相关性 data_processed [ *[ Document( page_contentitem.page_content, metadata{ title: item.metadata[title], use_title_as_page_content: False, }, ) for item in data ], *[ Document( # 使用标题作为页面内容进行相似性搜索 page_contentitem.metadata[title], metadata{ page_content: item.page_content, title: item.metadata[title], use_title_as_page_content: True, }, ) for item in data ], ] # 创建向量存储 vector_store Chroma.from_documents( documentsdata_processed, embeddingOpenAIEmbeddings(), ) # 创建检索器 retriever_from_notion vector_store.as_retriever( search_kwargs{ k: 5, } )4.2.2 上下文解析器from langchain_core.runnables import chain from typing import List chain def context_parser(docs: List[Document]) - str: # 将检索到的文档转换为字符串 return \n\n.join( [ f# {doc.metadata[title]}\n f{doc.metadata[page_content] if doc.metadata[use_title_as_page_content] else doc.page_content} for doc in docs ] )4.2.3 基础RAG链from langchain_core.prompts import ChatPromptTemplate from langchain_openai import ChatOpenAI from langchain_core.output_parsers import StrOutputParser # 创建提示模板 prompt ChatPromptTemplate( [ ( system, You are a helpful assistant for onboarding new employees. \n Please answer the question based on the following documents. \n Documents: \n {context}, ), (human, {question}), ] ) # 创建LLM llm ChatOpenAI(modelgpt-4o-mini) # 构建RAG链 rag_chain ( { question: RunnablePassthrough(), context: retriever_from_notion | context_parser, } | prompt | llm | StrOutputParser() )4.3 LangGraph基础应用4.3.1 文档相关性过滤from langgraph.graph import StateGraph from typing import TypedDict, List # 定义状态 class RetrievalState(TypedDict): question: str retrieved_docs: List[Document] relevant_docs: List[Document] # 检索节点 def retrieve_node(state: RetrievalState) - RetrievalState: question state[question] return { question: question, retrieved_docs: retriever_from_notion.invoke(question), relevant_docs: [], } # 相关性过滤节点 def filter_relevant_docs_node(state: RetrievalState) - RetrievalState: question state[question] docs state[retrieved_docs] if not docs: return { question: question, retrieved_docs: docs, relevant_docs: [], } # 创建相关性检查提示 prompt_relevance_check ChatPromptTemplate([ Please determine whether the following question is relevant to the retrieved document.\n If it is relevant, output yes; otherwise, output no only.\n Question: {question}\n Retrieved Document:\n {context} ]) # 对每个文档进行相关性检查 idxed_docs reduce( lambda acc, item: {**acc, item[0]: item[1]}, enumerate(docs), {}, ) is_each_docs_relevant_chain RunnableParallel( # 动态创建链作为检索到的文档 { str(idx): { question: RunnablePassthrough(), context: RunnableLambda( lambda _, docdoc: context_parser.invoke([doc]) ), } | prompt_relevance_check | llm | StrOutputParser() for idx, doc in idxed_docs.items() } ) | RunnableLambda(lambda result: list(result.values())) relevance_response is_each_docs_relevant_chain.invoke(question) return { question: question, retrieved_docs: docs, relevant_docs: [ doc for doc, flag in zip(docs, relevance_response) if flag yes ], } # 构建图 graph StateGraph(state_schemaRetrievalState) graph.add_node(retrieve, retrieve_node) graph.add_node(filter_relevant_docs, filter_relevant_docs_node) graph.set_entry_point(retrieve) graph.add_edge(retrieve, filter_relevant_docs) # 编译图 langgraph_retriever graph.compile()4.3.2 集成LangGraph的RAG链# 将LangGraph集成到RAG链中 langgraph_applied_rag ( { question: RunnablePassthrough(), # 使用LangGraph替代原始检索器 context: { question: RunnablePassthrough(), } | langgraph_retriever | RunnableLambda(lambda result: result[relevant_docs]) | context_parser, } | prompt | llm | StrOutputParser() )4.4 LangGraph高级应用4.4.1 问题分解from typing import TypedDict, List # 定义问题状态 class QuestionState(TypedDict): question: str sub_questions: List[str] # 创建问题分解提示 prompt_split_question ChatPromptTemplate([ You are an assistant that helps refine and decompose complex questions.\n Your task is to split the given question into a few concise sub-questions only if necessary.\n Do not introduce any new topics or unrelated details.\n Keep the sub-questions directly relevant to the original question.\n If the question is already specific, return it as is.\n Ensure that no extra interpretations or additional information beyond the provided question are included.\n \n Original Question: {question}\n Output (one or more refined sub-questions, separated by newlines): ]) # 问题分解节点 def split_question_node(state: QuestionState) - QuestionState: question state[question] response ( prompt_split_question | llm | StrOutputParser() | RunnableLambda(lambda result: result.replace(\n\n, \n)) ).invoke({question: question}) # 将响应转换为列表 sub_questions response.split(\n) if \n in response else [response] return { question: question, sub_questions: sub_questions, } # 构建问题分解图 graph StateGraph(state_schemaQuestionState) graph.add_node(split_question, split_question_node) graph.set_entry_point(split_question) langgraph_question_splitter graph.compile()4.4.2 动态并行处理# 辅助函数将列表转换为字典 def list_to_dict(l): return {str(i): v for i, v in enumerate(l)} # 辅助函数将字典转换为动态可运行对象 def dict_to_dynamic_runnable(runnable): chain def _dic_to_runnable(d): return RunnableParallel( {k: (RunnableLambda(lambda x, keyk: x[key]) | runnable) for k in d.keys()} ).invoke(d) return _dic_to_runnable # 创建子答案链 sub_answers_chain ( { question: RunnablePassthrough(), } | langgraph_question_splitter | RunnableLambda(lambda result: list_to_dict(result[sub_questions])) | dict_to_dynamic_runnable(langgraph_applied_rag) | RunnableLambda(lambda result: list(result.values())) )4.4.3 答案汇总# 创建答案汇总提示 prompt_summarize_sub_answers ChatPromptTemplate([ ( system, You are an assistant summarizing multiple responses for better readability.\n Please consolidate the following sub answers into a clear and concise response.\n Ensure the final answer is not too long while maintaining the key points.\n Sub Answers: {sub_answers}, ), (human, My question was {question}. Summarize the key points clearly.), ]) # 构建完整的聊天机器人 chat_bot ( { question: RunnablePassthrough(), sub_answers: sub_answers_chain, } | prompt_summarize_sub_answers | llm )5. 案例效果5.1 智能文档检索系统能够从Notion知识库中检索相关文档并根据相关性过滤结果确保回答的准确性。5.2 复杂查询处理系统能够将复杂查询分解为多个子问题并行处理后再汇总答案提高回答的全面性。5.3 自然语言交互用户可以使用自然语言提问系统能够理解并生成准确的回答无需复杂的查询语法。5.4 知识库集成系统无缝集成Notion作为知识库能够实时获取最新的公司信息和流程文档。5.5 模块化架构系统采用模块化设计各组件可独立优化和扩展便于后续功能增强。6. 案例实现思路1 知识库集成使用Notion API集成公司知识库将文档和流程信息集中管理为RAG系统提供数据基础。2 基础RAG实现使用LangChain构建基础RAG系统实现文档检索和答案生成功能。3 LangGraph优化引入LangGraph优化文档检索过程添加相关性过滤功能提高检索质量。4 问题分解实现复杂查询的智能分解将用户问题拆分为多个子问题并行处理。5 动态并行处理使用动态并行处理技术同时处理多个子问题提高系统响应速度。6 答案汇总将多个子问题的答案汇总为连贯的最终回答提供全面而简洁的响应。7. 扩展建议添加多语言支持帮助国际员工更好地适应公司环境集成更多数据源如Confluence、SharePoint等构建更全面的知识库添加个性化推荐功能根据员工角色和部门提供定制化信息实现对话记忆功能支持多轮对话和上下文理解添加反馈机制持续优化回答质量和相关性集成任务管理功能帮助新员工跟踪入职进度添加可视化界面提供更友好的用户体验实现知识图谱构建展示概念之间的关系和依赖8. 总结本案例展示了如何使用LangChain和LangGraph构建一个基于Notion知识库的新员工入职聊天机器人。通过RAG技术、文档相关性过滤和问题分解等核心功能系统能够为新员工提供准确、全面的公司信息和流程指导。该案例的核心价值在于展示了LangChain与LangGraph的无缝集成方法演示了如何优化RAG系统的检索质量提供了复杂查询处理的技术方案展示了动态并行处理的应用这个案例为构建类似的企业知识问答系统提供了参考特别是在需要处理复杂查询和提高检索质量的场景中。通过扩展和优化该系统可以应用于更广泛的企业场景如客户服务、技术支持、培训教育等。