Compare commits
2 Commits
c0aaac279c
...
c3247a7c46
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c3247a7c46 | ||
|
|
626ae74cdf |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -10,3 +10,5 @@ wheels/
|
||||
.venv
|
||||
|
||||
.python-version
|
||||
data
|
||||
*.pkl
|
||||
|
||||
2
db.py
2
db.py
@@ -217,7 +217,7 @@ def query(db: Database | Path, text: str, record_count: int = 10) -> list[tuple[
|
||||
|
||||
return results
|
||||
|
||||
def add_document(db: Database | Path, file: Path, max_workers: int = 1) -> None:
|
||||
def add_document(db: Database | Path, file: Path, max_workers: int = 4) -> None:
|
||||
"""
|
||||
Adds a new document to the database. If path is given, do load, add, save.
|
||||
Loads PDF with PyMuPDF, splits by pages, and creates records and vectors.
|
||||
|
||||
73
main.py
73
main.py
@@ -179,6 +179,71 @@ def query(db_path: str, query_text: str):
|
||||
|
||||
|
||||
|
||||
def start_web_server(db_path: str, host: str = "127.0.0.1", port: int = 5000):
|
||||
"""Start a web server for the semantic search tool."""
|
||||
try:
|
||||
from flask import Flask, request, jsonify, render_template
|
||||
except ImportError:
|
||||
print("❌ Flask not found. Please install it first:")
|
||||
print(" pip install flask")
|
||||
sys.exit(1)
|
||||
# Set template_folder to 'templates' directory
|
||||
app = Flask(__name__, template_folder="templates")
|
||||
db_file = Path(db_path)
|
||||
|
||||
# Check if database exists
|
||||
if not db_file.exists():
|
||||
print(f"❌ Database file not found: {db_file}")
|
||||
print(" Create a database first using: python main.py create")
|
||||
sys.exit(1)
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
return render_template("index.html", results=None)
|
||||
|
||||
@app.route('/api/search', methods=['POST'])
|
||||
def search():
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'query' not in data:
|
||||
return jsonify({'error': 'Missing query parameter'}), 400
|
||||
|
||||
query_text = data['query'].strip()
|
||||
if not query_text:
|
||||
return jsonify({'error': 'Query cannot be empty'}), 400
|
||||
|
||||
# Perform the search
|
||||
results = db.query(db_file, query_text)
|
||||
|
||||
# Format results for JSON response
|
||||
formatted_results = []
|
||||
for distance, record in results:
|
||||
formatted_results.append({
|
||||
'distance': float(distance),
|
||||
'document': record.document.name,
|
||||
'page': record.page,
|
||||
'chunk': record.chunk,
|
||||
'text': ' '.join(record.text[:300].split()) # Clean and truncate text
|
||||
})
|
||||
|
||||
return jsonify({'results': formatted_results})
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
print("🚀 Starting web server...")
|
||||
print(f" Database: {db_file}")
|
||||
print(f" URL: http://{host}:{port}")
|
||||
print(" Press Ctrl+C to stop")
|
||||
|
||||
try:
|
||||
app.run(host=host, port=port, debug=False)
|
||||
except KeyboardInterrupt:
|
||||
print("\n👋 Web server stopped")
|
||||
except Exception as e:
|
||||
print(f"❌ Error starting web server: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Semantic Search Tool",
|
||||
@@ -203,6 +268,12 @@ def main():
|
||||
query_parser.add_argument('db', help='Path to the database file (e.g., db.pkl)')
|
||||
query_parser.add_argument('query_text', help='Text to search for')
|
||||
|
||||
# Host command (web server)
|
||||
host_parser = subparsers.add_parser('host', aliases=['h'], help='Start a web server for semantic search')
|
||||
host_parser.add_argument('db', help='Path to the database file (e.g., db.pkl)')
|
||||
host_parser.add_argument('--host', default='127.0.0.1', help='Host address to bind to (default: 127.0.0.1)')
|
||||
host_parser.add_argument('--port', type=int, default=5000, help='Port to listen on (default: 5000)')
|
||||
|
||||
# Test command
|
||||
subparsers.add_parser('test', aliases=['t'], help='Test database save/load functionality')
|
||||
|
||||
@@ -216,6 +287,8 @@ def main():
|
||||
add_file(args.db, args.file_paths)
|
||||
elif args.command in ['query', 'q']:
|
||||
query(args.db, args.query_text)
|
||||
elif args.command in ['host', 'h']:
|
||||
start_web_server(args.db, args.host, args.port)
|
||||
elif args.command in ['test', 't']:
|
||||
test_database()
|
||||
else:
|
||||
|
||||
73
templates/index.html
Normal file
73
templates/index.html
Normal file
@@ -0,0 +1,73 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Semantic Document Search</title>
|
||||
<style>
|
||||
body { font-family: Arial, sans-serif; max-width: 1200px; margin: 0 auto; padding: 20px; }
|
||||
.search-box { margin-bottom: 20px; }
|
||||
input[type="text"] { width: 70%; padding: 10px; font-size: 16px; }
|
||||
button { padding: 10px 20px; font-size: 16px; background: #007cba; color: white; border: none; cursor: pointer; }
|
||||
button:hover { background: #005c8a; }
|
||||
.result { border: 1px solid #ddd; margin: 10px 0; padding: 15px; border-radius: 5px; }
|
||||
.result-header { font-weight: bold; color: #333; margin-bottom: 10px; }
|
||||
.result-text { background: #f9f9f9; padding: 10px; border-radius: 3px; }
|
||||
.distance { color: #666; font-size: 0.9em; }
|
||||
.no-results { text-align: center; color: #666; margin: 40px 0; }
|
||||
.loading { text-align: center; color: #007cba; margin: 20px 0; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>🔍 Semantic Document Search</h1>
|
||||
<div class="search-box">
|
||||
<form id="searchForm">
|
||||
<input type="text" id="queryInput" placeholder="Enter your search query..." required>
|
||||
<button type="submit">Search</button>
|
||||
</form>
|
||||
</div>
|
||||
<div id="results"></div>
|
||||
|
||||
<script>
|
||||
document.getElementById('searchForm').addEventListener('submit', async (e) => {
|
||||
e.preventDefault();
|
||||
const query = document.getElementById('queryInput').value;
|
||||
const resultsDiv = document.getElementById('results');
|
||||
|
||||
resultsDiv.innerHTML = '<div class="loading">Searching...</div>';
|
||||
|
||||
try {
|
||||
const response = await fetch('/api/search', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ query: query })
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (data.error) {
|
||||
resultsDiv.innerHTML = `<div class="no-results">Error: ${data.error}</div>`;
|
||||
return;
|
||||
}
|
||||
|
||||
if (data.results.length === 0) {
|
||||
resultsDiv.innerHTML = '<div class="no-results">No results found.</div>';
|
||||
return;
|
||||
}
|
||||
|
||||
resultsDiv.innerHTML = data.results.map((result, i) => `
|
||||
<div class="result">
|
||||
<div class="result-header">
|
||||
Result ${i + 1} - ${result.document}
|
||||
<span class="distance">(Distance: ${result.distance.toFixed(4)})</span>
|
||||
</div>
|
||||
<div>Page: ${result.page}, Chunk: ${result.chunk}</div>
|
||||
<div class="result-text">${result.text}</div>
|
||||
</div>
|
||||
`).join('');
|
||||
|
||||
} catch (error) {
|
||||
resultsDiv.innerHTML = `<div class="no-results">Error: ${error.message}</div>`;
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user