From ee8a8ad170df4b674217530abcba0859dcbd86fd Mon Sep 17 00:00:00 2001
From: Jan Mrna <mj370@protonmail.com>
Date: Thu, 6 Nov 2025 10:58:18 +0100
Subject: [PATCH] Fixed warnings from pylint

---
 db.py   | 33 ++++++++++++++++++++++++++-------
 main.py | 34 +++++++++++++++++++---------------
 2 files changed, 45 insertions(+), 22 deletions(-)

diff --git a/db.py b/db.py
index a5961fe..dc58970 100644
--- a/db.py
+++ b/db.py
@@ -1,3 +1,7 @@
+#pylint: disable=missing-class-docstring,invalid-name,broad-exception-caught
+"""
+Database module for semantic document search tool.
+"""
 import pickle
 from pathlib import Path
 from dataclasses import dataclass
@@ -6,13 +10,13 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
 
 import numpy as np
 import pymupdf
-import ollama  # TODO split to another file
+import ollama
 
 #
 # Types
 #
 
-type Vector = np.NDArray  # np.NDArray[np.float32] ?
+type Vector = np.NDArray
 type VectorBytes = bytes
 
 
@@ -90,12 +94,29 @@ def _vectorize_record(record: Record) -> tuple[Record, Vector]:
     return record, _embed(record.text)
 
 
+def test_embedding() -> bool:
+    """
+    Test if embedding functionality is available and working.
+    
+    Returns:
+        bool: True if embedding is working, False otherwise
+    """
+    try:
+        _ = _embed("Test.")
+        return True
+    except Exception:
+        return False
+
+
 #
 # High-level (exported) functions
 #
 
 
 def create_dummy() -> Database:
+    """
+    Create a dummy database for testing purposes.
+    """
     db_length: Final[int] = 10
     vectors = [np.array([i, 2 * i, 3 * i, 4 * i]) for i in range(db_length)]
     records = {
@@ -250,13 +271,11 @@ def add_document(db: Database | Path, file: Path, max_workers: int = 4) -> None:
         records: list[Record] = []
         chunk_size = 1024
 
-        for page_num in range(len(doc)):
-            page = doc[page_num]
+        for page_num, page in enumerate(doc):
             text = page.get_text().strip()
             if not text:
                 print(f"  Page {page_num + 1}: Skipped (empty)")
                 continue
-
             # Simple chunking - split text into chunks of specified size
             for chunk_idx, i in enumerate(range(0, len(text), chunk_size)):
                 chunk = text[i : i + chunk_size]
@@ -267,14 +286,14 @@ def add_document(db: Database | Path, file: Path, max_workers: int = 4) -> None:
                     )
         doc.close()
     except Exception as e:
-        raise RuntimeError(f"Error processing PDF {file}: {e}")
+        raise RuntimeError(f"Error processing PDF {file}: {e}") from e
 
     # Process chunks in parallel
     print(f"Processing {len(records)} chunks with {max_workers} workers...")
 
     db.documents.append(file)
 
-    # TODO measure with GIL disabled to check if multithreading actually helps
+    # NOTE this will only help with GIL disabled
     with ThreadPoolExecutor(max_workers=max_workers) as pool:
         futures = [pool.submit(_vectorize_record, r) for r in records]
         for f in as_completed(futures):
diff --git a/main.py b/main.py
index 1ecd9ea..58673f7 100644
--- a/main.py
+++ b/main.py
@@ -1,9 +1,15 @@
-import argparse
+#pylint: disable=broad-exception-caught
+"""
+Semantic search tool main script.
+Provides command-line interface and web server for creating, adding and querying the database.
+"""
 import sys
-from pathlib import Path
 import tempfile
-import numpy as np
+import argparse
 from typing import Final
+from pathlib import Path
+
+import numpy as np
 
 import db
 
@@ -88,17 +94,10 @@ def test_database():
 
         # Test embedding functionality
         print("\n5. Testing embedding functionality (Ollama API server)...")
-        try:
-            test_embedding = db._embed("This is a test text for embedding.")
-            print(
-                f"   Embedding test PASSED: Generated vector of shape {test_embedding.shape}"
-            )
-            ollama_running = True
-        except Exception as e:
-            print(
-                f"   Embedding test FAILED: {e}\n   Did you start ollama docker image?"
-            )
-            ollama_running = False
+        embedding_ok = db.test_embedding()
+        print(f"   Embedding test {'PASSED' if embedding_ok else 'FAILED'}")
+        if not embedding_ok:
+            print("   Did you start ollama docker image?")
 
         # Summary
         all_good = (
@@ -106,7 +105,7 @@ def test_database():
             and records_match
             and vectors_equal
             and records_equal
-            and ollama_running
+            and embedding_ok
         )
         print(f"\n✅ Test {'PASSED' if all_good else 'FAILED'}")
 
@@ -204,6 +203,8 @@ def query(db_path: str, query_text: str):
 def start_web_server(db_path: str, host: str = "127.0.0.1", port: int = 5000):
     """Start a web server for the semantic search tool."""
     try:
+        # here we intentionally import inside the function to avoid Flask dependency for CLI usage
+        # pylint: disable=import-outside-toplevel
         from flask import Flask, request, jsonify, render_template, send_file
     except ImportError:
         print("❌ Flask not found. Please install it first:")
@@ -282,6 +283,9 @@ def start_web_server(db_path: str, host: str = "127.0.0.1", port: int = 5000):
 
 
 def main():
+    """
+    Main function to parse command-line arguments and execute commands.
+    """
     parser = argparse.ArgumentParser(
         description="Semantic Search Tool",
         formatter_class=argparse.RawDescriptionHelpFormatter,