Added web interface

This commit is contained in:
Jan Mrna
2025-11-03 15:31:27 +01:00
parent 404134bc5a
commit 8d0890edc5
3 changed files with 147 additions and 1 deletions

2
db.py
View File

@@ -217,7 +217,7 @@ def query(db: Database | Path, text: str, record_count: int = 10) -> list[tuple[
return results return results
def add_document(db: Database | Path, file: Path, max_workers: int = 1) -> None: def add_document(db: Database | Path, file: Path, max_workers: int = 4) -> None:
""" """
Adds a new document to the database. If path is given, do load, add, save. Adds a new document to the database. If path is given, do load, add, save.
Loads PDF with PyMuPDF, splits by pages, and creates records and vectors. Loads PDF with PyMuPDF, splits by pages, and creates records and vectors.

73
main.py
View File

@@ -179,6 +179,71 @@ def query(db_path: str, query_text: str):
def start_web_server(db_path: str, host: str = "127.0.0.1", port: int = 5000):
"""Start a web server for the semantic search tool."""
try:
from flask import Flask, request, jsonify, render_template
except ImportError:
print("❌ Flask not found. Please install it first:")
print(" pip install flask")
sys.exit(1)
# Set template_folder to 'templates' directory
app = Flask(__name__, template_folder="templates")
db_file = Path(db_path)
# Check if database exists
if not db_file.exists():
print(f"❌ Database file not found: {db_file}")
print(" Create a database first using: python main.py create")
sys.exit(1)
@app.route('/')
def index():
return render_template("index.html", results=None)
@app.route('/api/search', methods=['POST'])
def search():
try:
data = request.get_json()
if not data or 'query' not in data:
return jsonify({'error': 'Missing query parameter'}), 400
query_text = data['query'].strip()
if not query_text:
return jsonify({'error': 'Query cannot be empty'}), 400
# Perform the search
results = db.query(db_file, query_text)
# Format results for JSON response
formatted_results = []
for distance, record in results:
formatted_results.append({
'distance': float(distance),
'document': record.document.name,
'page': record.page,
'chunk': record.chunk,
'text': ' '.join(record.text[:300].split()) # Clean and truncate text
})
return jsonify({'results': formatted_results})
except Exception as e:
return jsonify({'error': str(e)}), 500
print("🚀 Starting web server...")
print(f" Database: {db_file}")
print(f" URL: http://{host}:{port}")
print(" Press Ctrl+C to stop")
try:
app.run(host=host, port=port, debug=False)
except KeyboardInterrupt:
print("\n👋 Web server stopped")
except Exception as e:
print(f"❌ Error starting web server: {e}")
def main(): def main():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Semantic Search Tool", description="Semantic Search Tool",
@@ -203,6 +268,12 @@ def main():
query_parser.add_argument('db', help='Path to the database file (e.g., db.pkl)') query_parser.add_argument('db', help='Path to the database file (e.g., db.pkl)')
query_parser.add_argument('query_text', help='Text to search for') query_parser.add_argument('query_text', help='Text to search for')
# Host command (web server)
host_parser = subparsers.add_parser('host', aliases=['h'], help='Start a web server for semantic search')
host_parser.add_argument('db', help='Path to the database file (e.g., db.pkl)')
host_parser.add_argument('--host', default='127.0.0.1', help='Host address to bind to (default: 127.0.0.1)')
host_parser.add_argument('--port', type=int, default=5000, help='Port to listen on (default: 5000)')
# Test command # Test command
subparsers.add_parser('test', aliases=['t'], help='Test database save/load functionality') subparsers.add_parser('test', aliases=['t'], help='Test database save/load functionality')
@@ -216,6 +287,8 @@ def main():
add_file(args.db, args.file_paths) add_file(args.db, args.file_paths)
elif args.command in ['query', 'q']: elif args.command in ['query', 'q']:
query(args.db, args.query_text) query(args.db, args.query_text)
elif args.command in ['host', 'h']:
start_web_server(args.db, args.host, args.port)
elif args.command in ['test', 't']: elif args.command in ['test', 't']:
test_database() test_database()
else: else:

73
templates/index.html Normal file
View File

@@ -0,0 +1,73 @@
<!DOCTYPE html>
<html>
<head>
<title>Semantic Document Search</title>
<style>
body { font-family: Arial, sans-serif; max-width: 1200px; margin: 0 auto; padding: 20px; }
.search-box { margin-bottom: 20px; }
input[type="text"] { width: 70%; padding: 10px; font-size: 16px; }
button { padding: 10px 20px; font-size: 16px; background: #007cba; color: white; border: none; cursor: pointer; }
button:hover { background: #005c8a; }
.result { border: 1px solid #ddd; margin: 10px 0; padding: 15px; border-radius: 5px; }
.result-header { font-weight: bold; color: #333; margin-bottom: 10px; }
.result-text { background: #f9f9f9; padding: 10px; border-radius: 3px; }
.distance { color: #666; font-size: 0.9em; }
.no-results { text-align: center; color: #666; margin: 40px 0; }
.loading { text-align: center; color: #007cba; margin: 20px 0; }
</style>
</head>
<body>
<h1>🔍 Semantic Document Search</h1>
<div class="search-box">
<form id="searchForm">
<input type="text" id="queryInput" placeholder="Enter your search query..." required>
<button type="submit">Search</button>
</form>
</div>
<div id="results"></div>
<script>
document.getElementById('searchForm').addEventListener('submit', async (e) => {
e.preventDefault();
const query = document.getElementById('queryInput').value;
const resultsDiv = document.getElementById('results');
resultsDiv.innerHTML = '<div class="loading">Searching...</div>';
try {
const response = await fetch('/api/search', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: query })
});
const data = await response.json();
if (data.error) {
resultsDiv.innerHTML = `<div class="no-results">Error: ${data.error}</div>`;
return;
}
if (data.results.length === 0) {
resultsDiv.innerHTML = '<div class="no-results">No results found.</div>';
return;
}
resultsDiv.innerHTML = data.results.map((result, i) => `
<div class="result">
<div class="result-header">
Result ${i + 1} - ${result.document}
<span class="distance">(Distance: ${result.distance.toFixed(4)})</span>
</div>
<div>Page: ${result.page}, Chunk: ${result.chunk}</div>
<div class="result-text">${result.text}</div>
</div>
`).join('');
} catch (error) {
resultsDiv.innerHTML = `<div class="no-results">Error: ${error.message}</div>`;
}
});
</script>
</body>
</html>