Compare commits

..

2 Commits

Author SHA1 Message Date
Mrna
c3247a7c46 Update gitignore 2025-11-03 15:32:02 +01:00
Mrna
626ae74cdf Added web interface 2025-11-03 15:31:27 +01:00
4 changed files with 149 additions and 1 deletions

2
.gitignore vendored
View File

@@ -10,3 +10,5 @@ wheels/
.venv
.python-version
data
*.pkl

2
db.py
View File

@@ -217,7 +217,7 @@ def query(db: Database | Path, text: str, record_count: int = 10) -> list[tuple[
return results
def add_document(db: Database | Path, file: Path, max_workers: int = 1) -> None:
def add_document(db: Database | Path, file: Path, max_workers: int = 4) -> None:
"""
Adds a new document to the database. If path is given, do load, add, save.
Loads PDF with PyMuPDF, splits by pages, and creates records and vectors.

73
main.py
View File

@@ -179,6 +179,71 @@ def query(db_path: str, query_text: str):
def start_web_server(db_path: str, host: str = "127.0.0.1", port: int = 5000):
"""Start a web server for the semantic search tool."""
try:
from flask import Flask, request, jsonify, render_template
except ImportError:
print("❌ Flask not found. Please install it first:")
print(" pip install flask")
sys.exit(1)
# Set template_folder to 'templates' directory
app = Flask(__name__, template_folder="templates")
db_file = Path(db_path)
# Check if database exists
if not db_file.exists():
print(f"❌ Database file not found: {db_file}")
print(" Create a database first using: python main.py create")
sys.exit(1)
@app.route('/')
def index():
return render_template("index.html", results=None)
@app.route('/api/search', methods=['POST'])
def search():
try:
data = request.get_json()
if not data or 'query' not in data:
return jsonify({'error': 'Missing query parameter'}), 400
query_text = data['query'].strip()
if not query_text:
return jsonify({'error': 'Query cannot be empty'}), 400
# Perform the search
results = db.query(db_file, query_text)
# Format results for JSON response
formatted_results = []
for distance, record in results:
formatted_results.append({
'distance': float(distance),
'document': record.document.name,
'page': record.page,
'chunk': record.chunk,
'text': ' '.join(record.text[:300].split()) # Clean and truncate text
})
return jsonify({'results': formatted_results})
except Exception as e:
return jsonify({'error': str(e)}), 500
print("🚀 Starting web server...")
print(f" Database: {db_file}")
print(f" URL: http://{host}:{port}")
print(" Press Ctrl+C to stop")
try:
app.run(host=host, port=port, debug=False)
except KeyboardInterrupt:
print("\n👋 Web server stopped")
except Exception as e:
print(f"❌ Error starting web server: {e}")
def main():
parser = argparse.ArgumentParser(
description="Semantic Search Tool",
@@ -203,6 +268,12 @@ def main():
query_parser.add_argument('db', help='Path to the database file (e.g., db.pkl)')
query_parser.add_argument('query_text', help='Text to search for')
# Host command (web server)
host_parser = subparsers.add_parser('host', aliases=['h'], help='Start a web server for semantic search')
host_parser.add_argument('db', help='Path to the database file (e.g., db.pkl)')
host_parser.add_argument('--host', default='127.0.0.1', help='Host address to bind to (default: 127.0.0.1)')
host_parser.add_argument('--port', type=int, default=5000, help='Port to listen on (default: 5000)')
# Test command
subparsers.add_parser('test', aliases=['t'], help='Test database save/load functionality')
@@ -216,6 +287,8 @@ def main():
add_file(args.db, args.file_paths)
elif args.command in ['query', 'q']:
query(args.db, args.query_text)
elif args.command in ['host', 'h']:
start_web_server(args.db, args.host, args.port)
elif args.command in ['test', 't']:
test_database()
else:

73
templates/index.html Normal file
View File

@@ -0,0 +1,73 @@
<!DOCTYPE html>
<html>
<head>
<title>Semantic Document Search</title>
<style>
body { font-family: Arial, sans-serif; max-width: 1200px; margin: 0 auto; padding: 20px; }
.search-box { margin-bottom: 20px; }
input[type="text"] { width: 70%; padding: 10px; font-size: 16px; }
button { padding: 10px 20px; font-size: 16px; background: #007cba; color: white; border: none; cursor: pointer; }
button:hover { background: #005c8a; }
.result { border: 1px solid #ddd; margin: 10px 0; padding: 15px; border-radius: 5px; }
.result-header { font-weight: bold; color: #333; margin-bottom: 10px; }
.result-text { background: #f9f9f9; padding: 10px; border-radius: 3px; }
.distance { color: #666; font-size: 0.9em; }
.no-results { text-align: center; color: #666; margin: 40px 0; }
.loading { text-align: center; color: #007cba; margin: 20px 0; }
</style>
</head>
<body>
<h1>🔍 Semantic Document Search</h1>
<div class="search-box">
<form id="searchForm">
<input type="text" id="queryInput" placeholder="Enter your search query..." required>
<button type="submit">Search</button>
</form>
</div>
<div id="results"></div>
<script>
document.getElementById('searchForm').addEventListener('submit', async (e) => {
e.preventDefault();
const query = document.getElementById('queryInput').value;
const resultsDiv = document.getElementById('results');
resultsDiv.innerHTML = '<div class="loading">Searching...</div>';
try {
const response = await fetch('/api/search', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: query })
});
const data = await response.json();
if (data.error) {
resultsDiv.innerHTML = `<div class="no-results">Error: ${data.error}</div>`;
return;
}
if (data.results.length === 0) {
resultsDiv.innerHTML = '<div class="no-results">No results found.</div>';
return;
}
resultsDiv.innerHTML = data.results.map((result, i) => `
<div class="result">
<div class="result-header">
Result ${i + 1} - ${result.document}
<span class="distance">(Distance: ${result.distance.toFixed(4)})</span>
</div>
<div>Page: ${result.page}, Chunk: ${result.chunk}</div>
<div class="result-text">${result.text}</div>
</div>
`).join('');
} catch (error) {
resultsDiv.innerHTML = `<div class="no-results">Error: ${error.message}</div>`;
}
});
</script>
</body>
</html>