From 8d0890edc53a756919bcffe910f8016fb48d228d Mon Sep 17 00:00:00 2001 From: Jan Mrna Date: Mon, 3 Nov 2025 15:31:27 +0100 Subject: [PATCH] Added web interface --- db.py | 2 +- main.py | 73 ++++++++++++++++++++++++++++++++++++++++++++ templates/index.html | 73 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+), 1 deletion(-) create mode 100644 templates/index.html diff --git a/db.py b/db.py index 3548d5d..d89f10d 100644 --- a/db.py +++ b/db.py @@ -217,7 +217,7 @@ def query(db: Database | Path, text: str, record_count: int = 10) -> list[tuple[ return results -def add_document(db: Database | Path, file: Path, max_workers: int = 1) -> None: +def add_document(db: Database | Path, file: Path, max_workers: int = 4) -> None: """ Adds a new document to the database. If path is given, do load, add, save. Loads PDF with PyMuPDF, splits by pages, and creates records and vectors. diff --git a/main.py b/main.py index d86a723..6dfb164 100644 --- a/main.py +++ b/main.py @@ -179,6 +179,71 @@ def query(db_path: str, query_text: str): +def start_web_server(db_path: str, host: str = "127.0.0.1", port: int = 5000): + """Start a web server for the semantic search tool.""" + try: + from flask import Flask, request, jsonify, render_template + except ImportError: + print("āŒ Flask not found. Please install it first:") + print(" pip install flask") + sys.exit(1) + # Set template_folder to 'templates' directory + app = Flask(__name__, template_folder="templates") + db_file = Path(db_path) + + # Check if database exists + if not db_file.exists(): + print(f"āŒ Database file not found: {db_file}") + print(" Create a database first using: python main.py create") + sys.exit(1) + + @app.route('/') + def index(): + return render_template("index.html", results=None) + + @app.route('/api/search', methods=['POST']) + def search(): + try: + data = request.get_json() + if not data or 'query' not in data: + return jsonify({'error': 'Missing query parameter'}), 400 + + query_text = data['query'].strip() + if not query_text: + return jsonify({'error': 'Query cannot be empty'}), 400 + + # Perform the search + results = db.query(db_file, query_text) + + # Format results for JSON response + formatted_results = [] + for distance, record in results: + formatted_results.append({ + 'distance': float(distance), + 'document': record.document.name, + 'page': record.page, + 'chunk': record.chunk, + 'text': ' '.join(record.text[:300].split()) # Clean and truncate text + }) + + return jsonify({'results': formatted_results}) + + except Exception as e: + return jsonify({'error': str(e)}), 500 + + print("šŸš€ Starting web server...") + print(f" Database: {db_file}") + print(f" URL: http://{host}:{port}") + print(" Press Ctrl+C to stop") + + try: + app.run(host=host, port=port, debug=False) + except KeyboardInterrupt: + print("\nšŸ‘‹ Web server stopped") + except Exception as e: + print(f"āŒ Error starting web server: {e}") + + def main(): parser = argparse.ArgumentParser( description="Semantic Search Tool", @@ -203,6 +268,12 @@ def main(): query_parser.add_argument('db', help='Path to the database file (e.g., db.pkl)') query_parser.add_argument('query_text', help='Text to search for') + # Host command (web server) + host_parser = subparsers.add_parser('host', aliases=['h'], help='Start a web server for semantic search') + host_parser.add_argument('db', help='Path to the database file (e.g., db.pkl)') + host_parser.add_argument('--host', default='127.0.0.1', help='Host address to bind to (default: 127.0.0.1)') + host_parser.add_argument('--port', type=int, default=5000, help='Port to listen on (default: 5000)') + # Test command subparsers.add_parser('test', aliases=['t'], help='Test database save/load functionality') @@ -216,6 +287,8 @@ def main(): add_file(args.db, args.file_paths) elif args.command in ['query', 'q']: query(args.db, args.query_text) + elif args.command in ['host', 'h']: + start_web_server(args.db, args.host, args.port) elif args.command in ['test', 't']: test_database() else: diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..9dc6302 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,73 @@ + + + + Semantic Document Search + + + +

šŸ” Semantic Document Search

+ +
+ + + + \ No newline at end of file