Serve by file index, not full path
This commit is contained in:
25
db.py
25
db.py
@@ -26,7 +26,7 @@ class Record:
|
|||||||
class QueryResult:
|
class QueryResult:
|
||||||
record: Record
|
record: Record
|
||||||
distance: float
|
distance: float
|
||||||
document: Path
|
document_name: str
|
||||||
|
|
||||||
@dataclass(slots=True)
|
@dataclass(slots=True)
|
||||||
class Database:
|
class Database:
|
||||||
@@ -197,7 +197,7 @@ def query(db: Database | Path, text: str, record_count: int = 10) -> list[QueryR
|
|||||||
# Look up the corresponding record
|
# Look up the corresponding record
|
||||||
if vector_bytes in db.records:
|
if vector_bytes in db.records:
|
||||||
record = db.records[vector_bytes]
|
record = db.records[vector_bytes]
|
||||||
results.append(QueryResult(record, distance, db.documents[record.document_index]))
|
results.append(QueryResult(record, distance, db.documents[record.document_index].name))
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
@@ -272,4 +272,23 @@ def add_document(db: Database | Path, file: Path, max_workers: int = 4) -> None:
|
|||||||
# Save database if we loaded it from file
|
# Save database if we loaded it from file
|
||||||
if save_to_file and database_file_path:
|
if save_to_file and database_file_path:
|
||||||
save(db, database_file_path)
|
save(db, database_file_path)
|
||||||
print(f"Database saved to {database_file_path}")
|
print(f"Database saved to {database_file_path}")
|
||||||
|
|
||||||
|
def get_document_path(db: Database | Path, document_index: int) -> Path:
|
||||||
|
"""
|
||||||
|
Get the file path of the document at the given index in the database.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db: Database object or path to database file
|
||||||
|
document_index: Index of the document to retrieve
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to the document file
|
||||||
|
"""
|
||||||
|
if isinstance(db, Path):
|
||||||
|
db = load(db)
|
||||||
|
|
||||||
|
if document_index < 0 or document_index >= len(db.documents):
|
||||||
|
raise IndexError(f"Document index out of range: {document_index}")
|
||||||
|
|
||||||
|
return db.documents[document_index]
|
||||||
18
main.py
18
main.py
@@ -166,7 +166,7 @@ def query(db_path: str, query_text: str):
|
|||||||
|
|
||||||
for i, res in enumerate(results, 1):
|
for i, res in enumerate(results, 1):
|
||||||
print(f"\n{i}. Distance: {res.distance:.4f}")
|
print(f"\n{i}. Distance: {res.distance:.4f}")
|
||||||
print(f" Document: {res.document.name}")
|
print(f" Document: {res.document_name}")
|
||||||
print(f" Page: {res.record.page}, Chunk: {res.record.chunk}")
|
print(f" Page: {res.record.page}, Chunk: {res.record.chunk}")
|
||||||
# Replace all whitespace characters with regular spaces for cleaner display
|
# Replace all whitespace characters with regular spaces for cleaner display
|
||||||
clean_text = ' '.join(res.record.text[:200].split())
|
clean_text = ' '.join(res.record.text[:200].split())
|
||||||
@@ -201,18 +201,13 @@ def start_web_server(db_path: str, host: str = "127.0.0.1", port: int = 5000):
|
|||||||
def index():
|
def index():
|
||||||
return render_template("index.html", results=None)
|
return render_template("index.html", results=None)
|
||||||
|
|
||||||
@app.route('/file/<path:document_path>')
|
@app.route('/file/<int:document_index>')
|
||||||
def serve_file(document_path):
|
def serve_file(document_index):
|
||||||
"""Serve PDF files directly."""
|
"""Serve PDF files directly."""
|
||||||
try:
|
try:
|
||||||
file_path = Path(document_path)
|
file_path = db.get_document_path(db_file, document_index)
|
||||||
if not file_path.exists():
|
if not file_path.exists():
|
||||||
return jsonify({'error': 'File not found'}), 404
|
return jsonify({'error': 'File not found'}), 404
|
||||||
|
|
||||||
# Check if it's a PDF file for security
|
|
||||||
if file_path.suffix.lower() != '.pdf':
|
|
||||||
return jsonify({'error': 'Only PDF files are allowed'}), 403
|
|
||||||
|
|
||||||
return send_file(file_path, as_attachment=False)
|
return send_file(file_path, as_attachment=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return jsonify({'error': str(e)}), 500
|
return jsonify({'error': str(e)}), 500
|
||||||
@@ -236,13 +231,12 @@ def start_web_server(db_path: str, host: str = "127.0.0.1", port: int = 5000):
|
|||||||
for res in results:
|
for res in results:
|
||||||
formatted_results.append({
|
formatted_results.append({
|
||||||
'distance': float(res.distance),
|
'distance': float(res.distance),
|
||||||
'document': res.document.name,
|
'document_name': res.document_name,
|
||||||
'document_path': str(res.document), # Full path for the link
|
'document_index': res.record.document_index,
|
||||||
'page': res.record.page,
|
'page': res.record.page,
|
||||||
'chunk': res.record.chunk,
|
'chunk': res.record.chunk,
|
||||||
'text': ' '.join(res.record.text[:300].split()) # Clean and truncate text
|
'text': ' '.join(res.record.text[:300].split()) # Clean and truncate text
|
||||||
})
|
})
|
||||||
|
|
||||||
return jsonify({'results': formatted_results})
|
return jsonify({'results': formatted_results})
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -58,7 +58,7 @@
|
|||||||
resultsDiv.innerHTML = data.results.map((result, i) => `
|
resultsDiv.innerHTML = data.results.map((result, i) => `
|
||||||
<div class="result">
|
<div class="result">
|
||||||
<div class="result-header">
|
<div class="result-header">
|
||||||
Result ${i + 1} - <a href="/file/${encodeURIComponent(result.document_path)}#page=${result.page}" class="document-link" target="_blank">${result.document}</a>
|
Result ${i + 1} - <a href="/file/${encodeURIComponent(result.document_index)}#page=${result.page}" class="document-link" target="_blank">${result.document_name}</a>
|
||||||
<span class="distance">(Distance: ${result.distance.toFixed(4)})</span>
|
<span class="distance">(Distance: ${result.distance.toFixed(4)})</span>
|
||||||
</div>
|
</div>
|
||||||
<div>Page: ${result.page}, Chunk: ${result.chunk}</div>
|
<div>Page: ${result.page}, Chunk: ${result.chunk}</div>
|
||||||
|
|||||||
Reference in New Issue
Block a user