1. 数据预处理

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

# 加载数据
data = pd.read_csv('knowledge_base.csv')

# 文本向量化
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(data['content'])
2. 检索功能
from sklearn.metrics.pairwise import cosine_similarity

def search(query, vectorizer, tfidf_matrix, data, top_n=5):
    # 将查询转换为向量
    query_vec = vectorizer.transform([query])
    
    # 计算余弦相似度
    similarities = cosine_similarity(query_vec, tfidf_matrix).flatten()
    
    # 获取最相似的结果
    top_indices = similarities.argsort()[-top_n:][::-1]
    return data.iloc[top_indices]

# 使用示例
results = search("如何安装Python", vectorizer, tfidf_matrix, data)
print(results)

3. 前端界面(Flask示例)

from flask import Flask, request, render_template

app = Flask(__name__)

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/search', methods=['POST'])
def search():
    query = request.form['query']
    results = search(query, vectorizer, tfidf_matrix, data)
    return render_template('results.html', results=results)

if __name__ == '__main__':
    app.run(debug=True)
Logo

一站式 AI 云服务平台

更多推荐