Serve by file index, not full path
This commit is contained in:
23
db.py
23
db.py
@@ -26,7 +26,7 @@ class Record:
|
||||
class QueryResult:
|
||||
record: Record
|
||||
distance: float
|
||||
document: Path
|
||||
document_name: str
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Database:
|
||||
@@ -197,7 +197,7 @@ def query(db: Database | Path, text: str, record_count: int = 10) -> list[QueryR
|
||||
# Look up the corresponding record
|
||||
if vector_bytes in db.records:
|
||||
record = db.records[vector_bytes]
|
||||
results.append(QueryResult(record, distance, db.documents[record.document_index]))
|
||||
results.append(QueryResult(record, distance, db.documents[record.document_index].name))
|
||||
|
||||
return results
|
||||
|
||||
@@ -273,3 +273,22 @@ def add_document(db: Database | Path, file: Path, max_workers: int = 4) -> None:
|
||||
if save_to_file and database_file_path:
|
||||
save(db, database_file_path)
|
||||
print(f"Database saved to {database_file_path}")
|
||||
|
||||
def get_document_path(db: Database | Path, document_index: int) -> Path:
|
||||
"""
|
||||
Get the file path of the document at the given index in the database.
|
||||
|
||||
Args:
|
||||
db: Database object or path to database file
|
||||
document_index: Index of the document to retrieve
|
||||
|
||||
Returns:
|
||||
Path to the document file
|
||||
"""
|
||||
if isinstance(db, Path):
|
||||
db = load(db)
|
||||
|
||||
if document_index < 0 or document_index >= len(db.documents):
|
||||
raise IndexError(f"Document index out of range: {document_index}")
|
||||
|
||||
return db.documents[document_index]
|
||||
18
main.py
18
main.py
@@ -166,7 +166,7 @@ def query(db_path: str, query_text: str):
|
||||
|
||||
for i, res in enumerate(results, 1):
|
||||
print(f"\n{i}. Distance: {res.distance:.4f}")
|
||||
print(f" Document: {res.document.name}")
|
||||
print(f" Document: {res.document_name}")
|
||||
print(f" Page: {res.record.page}, Chunk: {res.record.chunk}")
|
||||
# Replace all whitespace characters with regular spaces for cleaner display
|
||||
clean_text = ' '.join(res.record.text[:200].split())
|
||||
@@ -201,18 +201,13 @@ def start_web_server(db_path: str, host: str = "127.0.0.1", port: int = 5000):
|
||||
def index():
|
||||
return render_template("index.html", results=None)
|
||||
|
||||
@app.route('/file/<path:document_path>')
|
||||
def serve_file(document_path):
|
||||
@app.route('/file/<int:document_index>')
|
||||
def serve_file(document_index):
|
||||
"""Serve PDF files directly."""
|
||||
try:
|
||||
file_path = Path(document_path)
|
||||
file_path = db.get_document_path(db_file, document_index)
|
||||
if not file_path.exists():
|
||||
return jsonify({'error': 'File not found'}), 404
|
||||
|
||||
# Check if it's a PDF file for security
|
||||
if file_path.suffix.lower() != '.pdf':
|
||||
return jsonify({'error': 'Only PDF files are allowed'}), 403
|
||||
|
||||
return send_file(file_path, as_attachment=False)
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
@@ -236,13 +231,12 @@ def start_web_server(db_path: str, host: str = "127.0.0.1", port: int = 5000):
|
||||
for res in results:
|
||||
formatted_results.append({
|
||||
'distance': float(res.distance),
|
||||
'document': res.document.name,
|
||||
'document_path': str(res.document), # Full path for the link
|
||||
'document_name': res.document_name,
|
||||
'document_index': res.record.document_index,
|
||||
'page': res.record.page,
|
||||
'chunk': res.record.chunk,
|
||||
'text': ' '.join(res.record.text[:300].split()) # Clean and truncate text
|
||||
})
|
||||
|
||||
return jsonify({'results': formatted_results})
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -58,7 +58,7 @@
|
||||
resultsDiv.innerHTML = data.results.map((result, i) => `
|
||||
<div class="result">
|
||||
<div class="result-header">
|
||||
Result ${i + 1} - <a href="/file/${encodeURIComponent(result.document_path)}#page=${result.page}" class="document-link" target="_blank">${result.document}</a>
|
||||
Result ${i + 1} - <a href="/file/${encodeURIComponent(result.document_index)}#page=${result.page}" class="document-link" target="_blank">${result.document_name}</a>
|
||||
<span class="distance">(Distance: ${result.distance.toFixed(4)})</span>
|
||||
</div>
|
||||
<div>Page: ${result.page}, Chunk: ${result.chunk}</div>
|
||||
|
||||
Reference in New Issue
Block a user