# app/config.py import os from pydantic_settings import BaseSettings from dotenv import load_dotenv import logging import secrets from typing import List load_dotenv() logger = logging.getLogger(__name__) class Settings(BaseSettings): DATABASE_URL: str | None = None GEMINI_API_KEY: str | None = None SENTRY_DSN: str | None = None # Sentry DSN for error tracking # --- Environment Settings --- ENVIRONMENT: str = "development" # development, staging, production # --- JWT Settings --- (SECRET_KEY is used by FastAPI-Users) SECRET_KEY: str # Must be set via environment variable TOKEN_TYPE: str = "bearer" # Default token type for JWT authentication # FastAPI-Users handles JWT algorithm internally # --- OCR Settings --- MAX_FILE_SIZE_MB: int = 10 # Maximum allowed file size for OCR processing ALLOWED_IMAGE_TYPES: list[str] = ["image/jpeg", "image/png", "image/webp"] # Supported image formats OCR_ITEM_EXTRACTION_PROMPT: str = """ **ROLE & GOAL** You are an expert AI assistant specializing in Optical Character Recognition (OCR) and structured data extraction. Your primary function is to act as a "Shopping List Digitizer." Your goal is to meticulously analyze the provided image of a shopping list, which is likely handwritten, and convert it into a structured, machine-readable JSON format. You must be accurate, infer context where necessary, and handle the inherent ambiguities of handwriting and informal list-making. **INPUT** You will receive a single image (`[Image]`). This image contains a shopping list. It may be: * Neatly written or very messy. * On lined paper, a whiteboard, a napkin, or a dedicated notepad. * Containing doodles, stains, or other visual noise. * Using various formats (bullet points, numbered lists, columns, simple line breaks). * could be in English or in German. **CORE TASK: STEP-BY-STEP ANALYSIS** Follow these steps precisely: 1. **Initial Image Analysis & OCR:** * Perform an advanced OCR scan on the entire image to transcribe all visible text. * Pay close attention to the spatial layout. Identify headings, columns, and line items. Note which text elements appear to be grouped together. 2. **Item Identification & Filtering:** * Differentiate between actual list items and non-item elements. * **INCLUDE:** Items intended for purchase. * **EXCLUDE:** List titles (e.g., "GROCERIES," "Target List"), dates, doodles, unrelated notes, or stray marks. Capture the list title separately if one exists. 3. **Detailed Extraction for Each Item:** For every single item you identify, extract the following attributes. If an attribute is not present, use `null`. * `item_name` (string): The primary name of the product. * **Standardize:** Normalize the name. (e.g., "B. Powder" -> "Baking Powder", "A. Juice" -> "Apple Juice"). * **Contextual Guessing:** If a word is poorly written, use the context of a shopping list to make an educated guess. (e.g., "Ciffee" is almost certainly "Coffee"). * `quantity` (number or string): The amount needed. * If a number is present (e.g., "**2** milks"), extract the number `2`. * If it's a word (e.g., "**a dozen** eggs"), extract the string `"a dozen"`. * If no quantity is specified (e.g., "Bread"), infer a default quantity of `1`. * `unit` (string): The unit of measurement or packaging. * Examples: "kg", "lbs", "liters", "gallons", "box", "can", "bag", "bunch". * Infer where possible (e.g., for "2 Milks," the unit could be inferred as "cartons" or "gallons" depending on regional context, but it's safer to leave it `null` if not explicitly stated). * `notes` (string): Any additional descriptive text. * Examples: "low-sodium," "organic," "brand name (Tide)," "for the cake," "get the ripe ones." * `category` (string): Infer a logical category for the item. * Use common grocery store categories: `Produce`, `Dairy & Eggs`, `Meat & Seafood`, `Pantry`, `Frozen`, `Bakery`, `Beverages`, `Household`, `Personal Care`. * If the list itself has category headings (e.g., a "DAIRY" section), use those first. * `original_text` (string): Provide the exact, unaltered text that your OCR transcribed for this entire line item. This is crucial for verification. * `is_crossed_out` (boolean): Set to `true` if the item is struck through, crossed out, or clearly marked as completed. Otherwise, set to `false`. **HANDLING AMBIGUITIES AND EDGE CASES** * **Illegible Text:** If a line or word is completely unreadable, set `item_name` to `"UNREADABLE"` and place the garbled OCR attempt in the `original_text` field. * **Abbreviations:** Expand common shopping list abbreviations (e.g., "OJ" -> "Orange Juice", "TP" -> "Toilet Paper", "AVOs" -> "Avocados", "G. Beef" -> "Ground Beef"). * **Implicit Items:** If a line is vague like "Snacks for kids," list it as is. Do not invent specific items. * **Multi-item Lines:** If a line contains multiple items (e.g., "Onions, Garlic, Ginger"), split them into separate item objects. **OUTPUT FORMAT** Your final output MUST be a single JSON object with the following structure. Do not include any explanatory text before or after the JSON block. ```json { "list_title": "string or null", "items": [ { "item_name": "string", "quantity": "number or string", "unit": "string or null", "category": "string", "notes": "string or null", "original_text": "string", "is_crossed_out": "boolean" } ], "summary": { "total_items": "integer", "unread_items": "integer", "crossed_out_items": "integer" } } ``` **EXAMPLE WALKTHROUGH** * **IF THE IMAGE SHOWS:** A crumpled sticky note with the title "Stuff for tonight" and the items: * `2x Chicken Breasts` * `~~Baguette~~` (this item is crossed out) * `Salad mix (bag)` * `Tomatos` (misspelled) * `Choc Ice Cream` * **YOUR JSON OUTPUT SHOULD BE:** ```json { "list_title": "Stuff for tonight", "items": [ { "item_name": "Chicken Breasts", "quantity": 2, "unit": null, "category": "Meat & Seafood", "notes": null, "original_text": "2x Chicken Breasts", "is_crossed_out": false }, { "item_name": "Baguette", "quantity": 1, "unit": null, "category": "Bakery", "notes": null, "original_text": "Baguette", "is_crossed_out": true }, { "item_name": "Salad Mix", "quantity": 1, "unit": "bag", "category": "Produce", "notes": null, "original_text": "Salad mix (bag)", "is_crossed_out": false }, { "item_name": "Tomatoes", "quantity": 1, "unit": null, "category": "Produce", "notes": null, "original_text": "Tomatos", "is_crossed_out": false }, { "item_name": "Chocolate Ice Cream", "quantity": 1, "unit": null, "category": "Frozen", "notes": null, "original_text": "Choc Ice Cream", "is_crossed_out": false } ], "summary": { "total_items": 5, "unread_items": 0, "crossed_out_items": 1 } } ``` **FINAL INSTRUCTION** If the image provided is not a shopping list or is completely blank/unintelligible, respond with a JSON object where the `items` array is empty and add a note in the `list_title` field, such as "Image does not appear to be a shopping list." Now, analyze the provided image and generate the JSON output. """ # --- OCR Error Messages --- OCR_SERVICE_UNAVAILABLE: str = "OCR service is currently unavailable. Please try again later." OCR_SERVICE_CONFIG_ERROR: str = "OCR service configuration error. Please contact support." OCR_UNEXPECTED_ERROR: str = "An unexpected error occurred during OCR processing." OCR_QUOTA_EXCEEDED: str = "OCR service quota exceeded. Please try again later." OCR_INVALID_FILE_TYPE: str = "Invalid file type. Supported types: {types}" OCR_FILE_TOO_LARGE: str = "File too large. Maximum size: {size}MB" OCR_PROCESSING_ERROR: str = "Error processing image: {detail}" # --- Gemini AI Settings --- GEMINI_MODEL_NAME: str = "gemini-2.5-flash-preview-05-20" # The model to use for OCR GEMINI_SAFETY_SETTINGS: dict = { "HARM_CATEGORY_HATE_SPEECH": "BLOCK_MEDIUM_AND_ABOVE", "HARM_CATEGORY_DANGEROUS_CONTENT": "BLOCK_MEDIUM_AND_ABOVE", "HARM_CATEGORY_HARASSMENT": "BLOCK_MEDIUM_AND_ABOVE", "HARM_CATEGORY_SEXUALLY_EXPLICIT": "BLOCK_MEDIUM_AND_ABOVE", } GEMINI_GENERATION_CONFIG: dict = { "candidate_count": 1, "max_output_tokens": 2048, "temperature": 0.9, "top_p": 1, "top_k": 1 } # --- API Settings --- API_PREFIX: str = "/api" # Base path for all API endpoints API_OPENAPI_URL: str = "/api/openapi.json" API_DOCS_URL: str = "/api/docs" API_REDOC_URL: str = "/api/redoc" # CORS Origins - environment dependent CORS_ORIGINS: str = "http://localhost:5173,http://localhost:5174,http://localhost:8000,http://127.0.0.1:5173,http://127.0.0.1:5174,http://127.0.0.1:8000" FRONTEND_URL: str = "http://localhost:5173" # URL for the frontend application # --- API Metadata --- API_TITLE: str = "Shared Lists API" API_DESCRIPTION: str = "API for managing shared shopping lists, OCR, and cost splitting." API_VERSION: str = "0.1.0" ROOT_MESSAGE: str = "Welcome to the Shared Lists API! Docs available at /api/docs" # --- Logging Settings --- LOG_LEVEL: str = "WARNING" LOG_FORMAT: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" # --- Health Check Settings --- HEALTH_STATUS_OK: str = "ok" HEALTH_STATUS_ERROR: str = "error" # --- HTTP Status Messages --- HTTP_400_DETAIL: str = "Bad Request" HTTP_401_DETAIL: str = "Unauthorized" HTTP_403_DETAIL: str = "Forbidden" HTTP_404_DETAIL: str = "Not Found" HTTP_422_DETAIL: str = "Unprocessable Entity" HTTP_429_DETAIL: str = "Too Many Requests" HTTP_500_DETAIL: str = "Internal Server Error" HTTP_503_DETAIL: str = "Service Unavailable" # --- Database Error Messages --- DB_CONNECTION_ERROR: str = "Database connection error" DB_INTEGRITY_ERROR: str = "Database integrity error" DB_TRANSACTION_ERROR: str = "Database transaction error" DB_QUERY_ERROR: str = "Database query error" # --- Auth Error Messages --- AUTH_INVALID_CREDENTIALS: str = "Invalid username or password" AUTH_NOT_AUTHENTICATED: str = "Not authenticated" AUTH_JWT_ERROR: str = "JWT token error: {error}" AUTH_JWT_UNEXPECTED_ERROR: str = "Unexpected JWT error: {error}" AUTH_HEADER_NAME: str = "WWW-Authenticate" AUTH_HEADER_PREFIX: str = "Bearer" # OAuth Settings # IMPORTANT: For Google OAuth to work, you MUST set the following environment variables # (e.g., in your .env file): # GOOGLE_CLIENT_ID: Your Google Cloud project's OAuth 2.0 Client ID # GOOGLE_CLIENT_SECRET: Your Google Cloud project's OAuth 2.0 Client Secret # Ensure the GOOGLE_REDIRECT_URI below matches the one configured in your Google Cloud Console. GOOGLE_CLIENT_ID: str = "" GOOGLE_CLIENT_SECRET: str = "" GOOGLE_REDIRECT_URI: str = "https://mitlistbe.mohamad.dev/api/v1/auth/google/callback" APPLE_CLIENT_ID: str = "" APPLE_TEAM_ID: str = "" APPLE_KEY_ID: str = "" APPLE_PRIVATE_KEY: str = "" APPLE_REDIRECT_URI: str = "https://mitlistbe.mohamad.dev/api/v1/auth/apple/callback" # Session Settings SESSION_SECRET_KEY: str = "your-session-secret-key" # Change this in production ACCESS_TOKEN_EXPIRE_MINUTES: int = 480 # 8 hours instead of 30 minutes # Redis Settings REDIS_URL: str = "redis://localhost:6379" REDIS_PASSWORD: str = "" class Config: env_file = ".env" env_file_encoding = 'utf-8' extra = "ignore" @property def cors_origins_list(self) -> List[str]: """Convert CORS_ORIGINS string to list""" return [origin.strip() for origin in self.CORS_ORIGINS.split(",")] @property def is_production(self) -> bool: """Check if running in production environment""" return self.ENVIRONMENT.lower() == "production" @property def is_development(self) -> bool: """Check if running in development environment""" return self.ENVIRONMENT.lower() == "development" @property def docs_url(self) -> str | None: """Return docs URL only in development""" return self.API_DOCS_URL if self.is_development else None @property def redoc_url(self) -> str | None: """Return redoc URL only in development""" return self.API_REDOC_URL if self.is_development else None @property def openapi_url(self) -> str | None: """Return OpenAPI URL only in development""" return self.API_OPENAPI_URL if self.is_development else None settings = Settings() # Validation for critical settings if settings.DATABASE_URL is None: raise ValueError("DATABASE_URL environment variable must be set.") # Enforce secure secret key if not settings.SECRET_KEY: raise ValueError("SECRET_KEY environment variable must be set. Generate a secure key using: openssl rand -hex 32") # Validate secret key strength if len(settings.SECRET_KEY) < 32: raise ValueError("SECRET_KEY must be at least 32 characters long for security") # Production-specific validations if settings.is_production: if settings.SESSION_SECRET_KEY == "your-session-secret-key": raise ValueError("SESSION_SECRET_KEY must be changed from default value in production") if not settings.SENTRY_DSN: logger.warning("SENTRY_DSN not set in production environment. Error tracking will be unavailable.") if settings.GEMINI_API_KEY is None: logger.error("CRITICAL: GEMINI_API_KEY environment variable not set. Gemini features will be unavailable.") else: # Optional: Log partial key for confirmation (avoid logging full key) logger.info(f"GEMINI_API_KEY loaded (starts with: {settings.GEMINI_API_KEY[:4]}...).") # Log environment information logger.info(f"Application starting in {settings.ENVIRONMENT} environment") if settings.is_production: logger.info("Production mode: API documentation disabled") else: logger.info(f"Development mode: API documentation available at {settings.API_DOCS_URL}")