Added web interface
This commit is contained in:
2
db.py
2
db.py
@@ -217,7 +217,7 @@ def query(db: Database | Path, text: str, record_count: int = 10) -> list[tuple[
|
|||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def add_document(db: Database | Path, file: Path, max_workers: int = 1) -> None:
|
def add_document(db: Database | Path, file: Path, max_workers: int = 4) -> None:
|
||||||
"""
|
"""
|
||||||
Adds a new document to the database. If path is given, do load, add, save.
|
Adds a new document to the database. If path is given, do load, add, save.
|
||||||
Loads PDF with PyMuPDF, splits by pages, and creates records and vectors.
|
Loads PDF with PyMuPDF, splits by pages, and creates records and vectors.
|
||||||
|
|||||||
73
main.py
73
main.py
@@ -179,6 +179,71 @@ def query(db_path: str, query_text: str):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def start_web_server(db_path: str, host: str = "127.0.0.1", port: int = 5000):
|
||||||
|
"""Start a web server for the semantic search tool."""
|
||||||
|
try:
|
||||||
|
from flask import Flask, request, jsonify, render_template
|
||||||
|
except ImportError:
|
||||||
|
print("❌ Flask not found. Please install it first:")
|
||||||
|
print(" pip install flask")
|
||||||
|
sys.exit(1)
|
||||||
|
# Set template_folder to 'templates' directory
|
||||||
|
app = Flask(__name__, template_folder="templates")
|
||||||
|
db_file = Path(db_path)
|
||||||
|
|
||||||
|
# Check if database exists
|
||||||
|
if not db_file.exists():
|
||||||
|
print(f"❌ Database file not found: {db_file}")
|
||||||
|
print(" Create a database first using: python main.py create")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
@app.route('/')
|
||||||
|
def index():
|
||||||
|
return render_template("index.html", results=None)
|
||||||
|
|
||||||
|
@app.route('/api/search', methods=['POST'])
|
||||||
|
def search():
|
||||||
|
try:
|
||||||
|
data = request.get_json()
|
||||||
|
if not data or 'query' not in data:
|
||||||
|
return jsonify({'error': 'Missing query parameter'}), 400
|
||||||
|
|
||||||
|
query_text = data['query'].strip()
|
||||||
|
if not query_text:
|
||||||
|
return jsonify({'error': 'Query cannot be empty'}), 400
|
||||||
|
|
||||||
|
# Perform the search
|
||||||
|
results = db.query(db_file, query_text)
|
||||||
|
|
||||||
|
# Format results for JSON response
|
||||||
|
formatted_results = []
|
||||||
|
for distance, record in results:
|
||||||
|
formatted_results.append({
|
||||||
|
'distance': float(distance),
|
||||||
|
'document': record.document.name,
|
||||||
|
'page': record.page,
|
||||||
|
'chunk': record.chunk,
|
||||||
|
'text': ' '.join(record.text[:300].split()) # Clean and truncate text
|
||||||
|
})
|
||||||
|
|
||||||
|
return jsonify({'results': formatted_results})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({'error': str(e)}), 500
|
||||||
|
|
||||||
|
print("🚀 Starting web server...")
|
||||||
|
print(f" Database: {db_file}")
|
||||||
|
print(f" URL: http://{host}:{port}")
|
||||||
|
print(" Press Ctrl+C to stop")
|
||||||
|
|
||||||
|
try:
|
||||||
|
app.run(host=host, port=port, debug=False)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\n👋 Web server stopped")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error starting web server: {e}")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="Semantic Search Tool",
|
description="Semantic Search Tool",
|
||||||
@@ -203,6 +268,12 @@ def main():
|
|||||||
query_parser.add_argument('db', help='Path to the database file (e.g., db.pkl)')
|
query_parser.add_argument('db', help='Path to the database file (e.g., db.pkl)')
|
||||||
query_parser.add_argument('query_text', help='Text to search for')
|
query_parser.add_argument('query_text', help='Text to search for')
|
||||||
|
|
||||||
|
# Host command (web server)
|
||||||
|
host_parser = subparsers.add_parser('host', aliases=['h'], help='Start a web server for semantic search')
|
||||||
|
host_parser.add_argument('db', help='Path to the database file (e.g., db.pkl)')
|
||||||
|
host_parser.add_argument('--host', default='127.0.0.1', help='Host address to bind to (default: 127.0.0.1)')
|
||||||
|
host_parser.add_argument('--port', type=int, default=5000, help='Port to listen on (default: 5000)')
|
||||||
|
|
||||||
# Test command
|
# Test command
|
||||||
subparsers.add_parser('test', aliases=['t'], help='Test database save/load functionality')
|
subparsers.add_parser('test', aliases=['t'], help='Test database save/load functionality')
|
||||||
|
|
||||||
@@ -216,6 +287,8 @@ def main():
|
|||||||
add_file(args.db, args.file_paths)
|
add_file(args.db, args.file_paths)
|
||||||
elif args.command in ['query', 'q']:
|
elif args.command in ['query', 'q']:
|
||||||
query(args.db, args.query_text)
|
query(args.db, args.query_text)
|
||||||
|
elif args.command in ['host', 'h']:
|
||||||
|
start_web_server(args.db, args.host, args.port)
|
||||||
elif args.command in ['test', 't']:
|
elif args.command in ['test', 't']:
|
||||||
test_database()
|
test_database()
|
||||||
else:
|
else:
|
||||||
|
|||||||
73
templates/index.html
Normal file
73
templates/index.html
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Semantic Document Search</title>
|
||||||
|
<style>
|
||||||
|
body { font-family: Arial, sans-serif; max-width: 1200px; margin: 0 auto; padding: 20px; }
|
||||||
|
.search-box { margin-bottom: 20px; }
|
||||||
|
input[type="text"] { width: 70%; padding: 10px; font-size: 16px; }
|
||||||
|
button { padding: 10px 20px; font-size: 16px; background: #007cba; color: white; border: none; cursor: pointer; }
|
||||||
|
button:hover { background: #005c8a; }
|
||||||
|
.result { border: 1px solid #ddd; margin: 10px 0; padding: 15px; border-radius: 5px; }
|
||||||
|
.result-header { font-weight: bold; color: #333; margin-bottom: 10px; }
|
||||||
|
.result-text { background: #f9f9f9; padding: 10px; border-radius: 3px; }
|
||||||
|
.distance { color: #666; font-size: 0.9em; }
|
||||||
|
.no-results { text-align: center; color: #666; margin: 40px 0; }
|
||||||
|
.loading { text-align: center; color: #007cba; margin: 20px 0; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>🔍 Semantic Document Search</h1>
|
||||||
|
<div class="search-box">
|
||||||
|
<form id="searchForm">
|
||||||
|
<input type="text" id="queryInput" placeholder="Enter your search query..." required>
|
||||||
|
<button type="submit">Search</button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
<div id="results"></div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
document.getElementById('searchForm').addEventListener('submit', async (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
const query = document.getElementById('queryInput').value;
|
||||||
|
const resultsDiv = document.getElementById('results');
|
||||||
|
|
||||||
|
resultsDiv.innerHTML = '<div class="loading">Searching...</div>';
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch('/api/search', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ query: query })
|
||||||
|
});
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
|
||||||
|
if (data.error) {
|
||||||
|
resultsDiv.innerHTML = `<div class="no-results">Error: ${data.error}</div>`;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.results.length === 0) {
|
||||||
|
resultsDiv.innerHTML = '<div class="no-results">No results found.</div>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
resultsDiv.innerHTML = data.results.map((result, i) => `
|
||||||
|
<div class="result">
|
||||||
|
<div class="result-header">
|
||||||
|
Result ${i + 1} - ${result.document}
|
||||||
|
<span class="distance">(Distance: ${result.distance.toFixed(4)})</span>
|
||||||
|
</div>
|
||||||
|
<div>Page: ${result.page}, Chunk: ${result.chunk}</div>
|
||||||
|
<div class="result-text">${result.text}</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
resultsDiv.innerHTML = `<div class="no-results">Error: ${error.message}</div>`;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
Reference in New Issue
Block a user