教你用 Cognee 同免費 Hugging Face 模型打造智能對話 AI 代理人 — 全面教學指南
今次教學會帶大家一步一步用免費、開源工具喺 Google Colab 等筆記本環境,打造一個高級 AI 代理人,結合 Cognee 嘅「代理記憶」功能同 Hugging Face 嘅輕量級對話模型。過程中我哋會設定 Cognee 去存取同檢索記憶,整合對話生成模型,令代理人可以學習、推理同自然互動。無論係處理多領域嘅文件,定係根據上下文對話,呢個代理人都能勝任。全文程式碼可以喺我哋 GitHub 找到,亦歡迎大家參考其他 AI 代理同 Agentic AI 嘅教學。
—
安裝必要套件及導入模組
首先安裝 Cognee、Transformers、Torch 同 Sentence-Transformers,之後導入用於分詞、模型載入、非同步處理同記憶整合嘅模組。呢個準備工作確保我哋可以順利建立、訓練同與智能代理互動。
“`python
!pip install cognee transformers torch sentence-transformers accelerate
import asyncio
import os
import json
from typing import List, Dict, Any
from datetime import datetime
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import cognee
“`
—
設定 Cognee 代理記憶
我哋會用句子嵌入模型 all-MiniLM-L6-v2,輕量而高效。若主設定失敗,會嘗試用環境變數方式設定,確保 Cognee 可以正常運作。
“`python
async def setup_cognee():
“””設定 Cognee 配置”””
try:
await cognee.config.set(“EMBEDDING_MODEL”, “sentence-transformers/all-MiniLM-L6-v2”)
await cognee.config.set(“EMBEDDING_PROVIDER”, “sentence_transformers”)
print(“Cognee 設定成功”)
return True
except Exception as e:
print(f”Cognee 設定錯誤: {e}”)
try:
os.environ[“EMBEDDING_MODEL”] = “sentence-transformers/all-MiniLM-L6-v2”
os.environ[“EMBEDDING_PROVIDER”] = “sentence_transformers”
print(“Cognee 透過環境變數設定成功”)
return True
except Exception as e2:
print(f”替代設定失敗: {e2}”)
return False
“`
—
建立 Hugging Face 對話模型介面
定義 HuggingFaceLLM 類別,支援 DialoGPT-medium 同 DistilGPT2 兩款輕量模型,會自動檢查 GPU 可用性,載入相應的 tokenizer 同模型。這樣可以產生符合上下文的智能回覆。
“`python
class HuggingFaceLLM:
def __init__(self, model_name=”microsoft/DialoGPT-medium”):
print(f”載入 Hugging Face 模型: {model_name}”)
self.device = “cuda” if torch.cuda.is_available() else “cpu”
print(f”使用裝置: {self.device}”)
if “DialoGPT” in model_name:
self.tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side=’left’)
self.model = AutoModelForCausalLM.from_pretrained(model_name)
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
else:
self.generator = pipeline(
“text-generation”,
model=”distilgpt2″,
device=0 if self.device == “cuda” else -1,
max_length=150,
do_sample=True,
temperature=0.7
)
self.tokenizer = None
self.model = None
print(“模型載入成功!”)
def generate_response(self, prompt: str, max_length: int = 100) -> str:
try:
if self.model is not None:
inputs = self.tokenizer.encode(prompt + self.tokenizer.eos_token, return_tensors=’pt’)
with torch.no_grad():
outputs = self.model.generate(
inputs,
max_length=inputs.shape[1] + max_length,
num_return_sequences=1,
temperature=0.7,
do_sample=True,
pad_token_id=self.tokenizer.eos_token_id
)
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
response = response[len(prompt):].strip()
return response if response else “我明白你的意思。”
else:
result = self.generator(prompt, max_length=max_length, truncation=True)
return result[0][‘generated_text’][len(prompt):].strip()
except Exception as e:
print(f”生成錯誤: {e}”)
return “我正在處理該資訊。”
hf_llm = None
“`
—
核心系統:AdvancedAIAgent 類別
呢個類別整合 Cognee 記憶、多領域學習、知識檢索同 Hugging Face 推理功能。代理人能夠從文字或文件學習,根據上下文檢索相關知識,並生成智能回答。無論係記憶事實、答問題,定係自然對話,佢都能學習、記住並以人類般流暢嘅方式回應。
“`python
class AdvancedAIAgent:
“””
具持久記憶、學習能力,及多領域知識處理嘅先進 AI 代理人
“””
def __init__(self, agent_name: str = “CogneeAgent”):
self.name = agent_name
self.memory_initialized = False
self.knowledge_domains = []
self.conversation_history = []
self.manual_memory = []
async def initialize_memory(self):
“””初始化記憶系統同 Hugging Face 模型”””
global hf_llm
if hf_llm is None:
hf_llm = HuggingFaceLLM(“microsoft/DialoGPT-medium”)
setup_success = await setup_cognee()
try:
await cognee.prune()
print(f”{self.name} 記憶系統初始化完成”)
self.memory_initialized = True
except Exception as e:
print(f”記憶初始化警告: {e}”)
self.memory_initialized = True
async def learn_from_text(self, text: str, domain: str = “general”):
“””帶領域標記加入知識記憶”””
if not self.memory_initialized:
await self.initialize_memory()
enhanced_text = f”[DOMAIN: {domain}] [TIMESTAMP: {datetime.now().isoformat()}]n{text}”
try:
await cognee.add(enhanced_text)
await cognee.cognify()
if domain not in self.knowledge_domains:
self.knowledge_domains.append(domain)
print(f”學習新知識於領域: {domain}”)
return True
except Exception as e:
print(f”學習錯誤: {e}”)
try:
await cognee.add(text)
await cognee.cognify()
if domain not in self.knowledge_domains:
self.knowledge_domains.append(domain)
print(f”簡化學習成功: {domain}”)
return True
except Exception as e2:
print(f”簡化學習失敗: {e2}”)
self.manual_memory.append({“text”: text, “domain”: domain})
if domain not in self.knowledge_domains:
self.knowledge_domains.append(domain)
print(f”已存入手動記憶: {domain}”)
return True
async def learn_from_documents(self, documents: List[Dict[str, str]]):
“””批量從多份文件學習”””
print(f”處理 {len(documents)} 份文件…”)
for i, doc in enumerate(documents):
text = doc.get(“content”, “”)
domain = doc.get(“domain”, “general”)
title = doc.get(“title”, f”Document_{i+1}”)
enhanced_content = f”標題: {title}n{text}”
await self.learn_from_text(enhanced_content, domain)
if i % 3 == 0:
print(f”已處理 {i+1}/{len(documents)} 份文件”)
async def query_knowledge(self, question: str, domain_filter: str = None) -> List[str]:
“””查詢知識庫,支援領域篩選”””
try:
if domain_filter:
enhanced_query = f”[DOMAIN: {domain_filter}] {question}”
else:
enhanced_query = question
search_results = await cognee.search(“SIMILARITY”, enhanced_query)
results = []
for result in search_results:
if hasattr(result, ‘text’):
results.append(result.text)
elif hasattr(result, ‘content’):
results.append(result.content)
elif hasattr(result, ‘value’):
results.append(str(result.value))
elif isinstance(result, dict):
content = result.get(‘text’) or result.get(‘content’) or result.get(‘data’) or result.get(‘value’)
if content:
results.append(str(content))
else:
results.append(str(result))
elif isinstance(result, str):
results.append(result)
else:
result_str = str(result)
if len(result_str) > 10:
results.append(result_str)
if not results and hasattr(self, ‘manual_memory’):
for item in self.manual_memory:
if domain_filter and item[‘domain’] != domain_filter:
continue
if any(word.lower() in item[‘text’].lower() for word in question.split()):
results.append(item[‘text’])
return results[:5]
except Exception as e:
print(f”查詢錯誤: {e}”)
results = []
if hasattr(self, ‘manual_memory’):
for item in self.manual_memory:
if domain_filter and item[‘domain’] != domain_filter:
continue
if any(word.lower() in item[‘text’].lower() for word in question.split()):
results.append(item[‘text’])
return results[:5]
async def reasoning_chain(self, question: str) -> Dict[str, Any]:
“””利用檢索知識進行進階推理”””
print(f”處理問題: {question}”)
relevant_info = await self.query_knowledge(question)
analysis = {
“question”: question,
“relevant_knowledge”: relevant_info,
“domains_searched”: self.knowledge_domains,
“confidence”: min(len(relevant_info) / 3.0, 1.0),
“timestamp”: datetime.now().isoformat()
}
if relevant_info and len(relevant_info) > 0:
reasoning = self._synthesize_answer(question, relevant_info)
analysis[“reasoning”] = reasoning
analysis[“answer”] = self._extract_key_points(relevant_info)
else:
analysis[“reasoning”] = “記憶中找不到相關知識”
analysis[“answer”] = “我目前知識庫無相關資料。”
return analysis
def _synthesize_answer(self, question: str, knowledge_pieces: List[str]) -> str:
“””用 Hugging Face 模型合成答案”””
global hf_llm
if not knowledge_pieces:
return “知識庫中無相關資訊。”
context = ” “.join(knowledge_pieces[:2])
context = context[:300]
prompt = f”根據以下資訊: {context}nn問題: {question}n回答:”
try:
if hf_llm:
synthesized = hf_llm.generate_response(prompt, max_length=80)
return synthesized if synthesized else f”根據我的知識: {context[:100]}…”
else:
return f”根據我的分析: {context[:150]}…”
except Exception as e:
print(f”合成錯誤: {e}”)
return f”根據我的知識: {context[:100]}…”
def _extract_key_points(self, knowledge_pieces: List[str]) -> List[str]:
“””從檢索知識中提取重點”””
key_points = []
for piece in knowledge_pieces:
clean_piece = piece.replace(“[DOMAIN:”, “”).replace(“[TIMESTAMP:”, “”)
sentences = clean_piece.split(‘.’)
if len(sentences) > 0 and len(sentences[0].strip()) > 10:
key_points.append(sentences[0].strip() + “.”)
return key_points[:3]
async def conversational_agent(self, user_input: str) -> str:
“””主對話介面,結合 HF 模型生成回覆”””
global hf_llm
self.conversation_history.append({“role”: “user”, “content”: user_input})
if any(word in user_input.lower() for word in [“learn”, “remember”, “add”, “teach”]):
content_to_learn = user_input.replace(“learn this:”, “”).replace(“remember:”, “”).strip()
await self.learn_from_text(content_to_learn, “conversation”)
response = “我已經把資訊存入記憶!還想教我什麼呢?”
elif user_input.lower().startswith((“what”, “how”, “why”, “when”, “where”, “who”, “tell me”)):
analysis = await self.reasoning_chain(user_input)
if analysis[“relevant_knowledge”] and hf_llm:
context = ” “.join(analysis[“relevant_knowledge”][:2])[:200]
prompt = f”問題: {user_input}n知識: {context}n友善回應:”
ai_response = hf_llm.generate_response(prompt, max_length=60)
response = ai_response if ai_response else “這是我在知識庫找到的資訊。”
else:
response = “我目前知識庫無關於該主題的具體資訊。”
else:
relevant_context = await self.query_knowledge(user_input)
if hf_llm:
context_info = “”
if relevant_context:
context_info = f” 我知道: {relevant_context[0][:100]}…”
conversation_prompt = f”用戶說: {user_input}{context_info}n我回應:”
response = hf_llm.generate_response(conversation_prompt, max_length=50)
if not response or len(response.strip()) < 3:
response = "很有趣!我正在從對話中學習。"
else:
response = "我正在聆聽並從對話中學習。"
self.conversation_history.append({"role": "assistant", "content": response})
return response
“`
—
示範運行:多領域學習、知識檢索及對話
我哋喺 main 函數示範代理人如何:
1. 從多個領域文件學習(程序設計、氣候科學、AI 倫理、可持續能源)
2. 回答相關問題並提供推理結果
3. 進行自然對話,學習用戶輸入的資訊
4. 查詢特定領域知識並統計記憶狀況
“`python
async def main():
print(“先進 AI 代理人與 Cognee 教學”)
print(“=” * 50)
agent = AdvancedAIAgent(“TutorialAgent”)
await agent.initialize_memory()
print(“n示範 1:多領域學習”)
sample_documents = [
{
“title”: “Python Basics”,
“content”: “Python 是一種以簡潔易讀著稱的高階程式語言。”,
“domain”: “programming”
},
{
“title”: “Climate Science”,
“content”: “氣候變化”,
“domain”: “science”
},
{
“title”: “AI Ethics”,
“content”: “AI 倫理關注確保人工智能系統的公平、透明、責任及社會影響。”,
“domain”: “technology”
},
{
“title”: “Sustainable Energy”,
“content”: “可再生能源對減少碳排放至關重要。”,
“domain”: “environment”
}
]
await agent.learn_from_documents(sample_documents)
print(“n示範 2:知識檢索與推理”)
test_questions = [
“你知道 Python 程式設計嗎?”,
“氣候變化與能源有什麼關係?”,
“AI 有哪些倫理考量?”
]
for question in test_questions:
print(f”n問題: {question}”)
analysis = await agent.reasoning_chain(question)
print(f”回答: {analysis.get(‘answer’, ‘無法生成回答’)}”)
print(f”信心指數: {analysis.get(‘confidence’, 0):.2f}”)
print(“n示範 3:對話代理人”)
conversation_inputs = [
“Learn this: 機器學習是 AI 的子領域”,
“什麼是機器學習?”,
“它和 Python 有什麼關係?”,
“記住神經網絡靈感來自生物神經元”
]
for user_input in conversation_inputs:
print(f”n用戶: {user_input}”)
response = await agent.conversational_agent(user_input)
print(f”代理人: {response}”)
print(f”n示範 4:代理人知識總結”)
print(f”知識領域: {agent.knowledge_domains}”)
print(f”對話歷史: {len(agent.conversation_history)} 次互動”)
print(f”n特定領域查詢:”)
programming_results = await agent.query_knowledge(“programming concepts”, “programming”)
print(f”程式設計知識: 找到 {len(programming_results)} 筆結果”)
if __name__ == “__main__”:
print(“啟動先進 AI 代理人教學,使用 Hugging Face 免費模型”)
print(“GPU 加速可用!” if torch.cuda.is_available() else “使用 CPU 運行”)
try:
await main()
except RuntimeError:
import nest_asyncio
nest_asyncio.apply()
asyncio.run(main())
print(“n教學完成!你已學會:”)
print(“• 如何設定 Cognee 同 Hugging Face 模型”)
print(“• AI 驅動的回應生成”)
print(“• 多領域知識管理”)
print(“• 進階推理與檢索”)
print(“• 具記憶的對話代理人”)
print(“• 免費 GPU 加速推理”)
“`
—
評論與啟示
此教程展示咗點樣用完全免費嘅開源資源,打造一個具備「記憶」及多領域學習能力嘅智能對話代理人。透過 Cognee 進行語意記憶存取,加上 Hugging Face 輕量模型作為推理引擎,令代理人可以根據上下文作出合理回答,甚至持續學習新知識。
對香港及全球用戶嚟講,呢種無需依賴昂貴 API 嘅方案,提供咗一條成本低廉又彈性高嘅人工智能開發路徑。尤其喺教育、客服、知識管理等場景,能幫助企業或開發者快速建立智能助手。
不過,現階段輕量模型喺理解複雜問題時仍有限制,記憶管理亦需更精細策略以避免資料過載或冗餘。未來若能結合更強大嘅模型,或者加入多模態資料處理(例如圖像、語音),將進一步提升代理人嘅智能水平。
此外,代理人嘅倫理與隱私問題亦不可忽視,尤其當涉及用戶提供嘅私人資訊時,必須設計完善嘅資料保護機制。
總括而言,本文不單教學詳盡,而且為開發者提供咗一個強大嘅實驗平台,推動智能代理技術向更普及、可持續嘅方向發展。建議業界及學術界持續關注此類開源框架嘅演進,並探索多元應用可能。
—
全文程式碼及更多 AI 代理教學,歡迎參考我哋嘅 GitHub 倉庫,亦可追蹤我哋 Twitter 同訂閱機器學習社群,掌握最新 AI 技術動態。
以上文章由特價GPT API KEY所翻譯及撰寫。而圖片則由FLUX根據內容自動生成。
🎬 YouTube Premium 家庭 Plan成員一位 只需 HK$148/年!
不用提供密碼、不用VPN、無需轉區
直接升級你的香港帳號 ➜ 即享 YouTube + YouTube Music 無廣告播放