From e40f7ed7410f5be77811857084999d7cd468cf8c Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Sun, 28 Sep 2025 15:45:40 +0000
Subject: [PATCH 01/21] Add script to estimate OFF payload sizes

Co-authored-by: owner <owner@fungee.llc>
---
 local/off_estimator.py          | 248 ++++++++++++++++++++++++++++++++
 local/off_estimator_sample.json |  18 +++
 2 files changed, 266 insertions(+)
 create mode 100644 local/off_estimator.py
 create mode 100644 local/off_estimator_sample.json

diff --git a/local/off_estimator.py b/local/off_estimator.py
new file mode 100644
index 0000000..60f3d77
--- /dev/null
+++ b/local/off_estimator.py
@@ -0,0 +1,248 @@
+#!/usr/bin/env python3
+
+import argparse
+import gzip
+import io
+import json
+import sys
+import time
+from typing import Any, Dict, Iterable, List, Optional, Tuple
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Stream OFF JSONL.gz and estimate projected payload sizes.")
+    source = parser.add_mutually_exclusive_group(required=True)
+    source.add_argument("--stdin", action="store_true", help="Read gzipped JSONL from stdin")
+    source.add_argument("--input", type=str, help="Path to gzipped JSONL file")
+    parser.add_argument("--sample", type=int, default=0, help="Number of records to process for sampling (0 = full stream)")
+    parser.add_argument("--output", type=str, default="", help="Optional path to write JSON report")
+    return parser.parse_args()
+
+
+def iter_gz_lines_from_stdin() -> Iterable[str]:
+    gz = gzip.GzipFile(fileobj=sys.stdin.buffer, mode="rb")
+    with io.TextIOWrapper(gz, encoding="utf-8", errors="ignore", newline="\n") as f:
+        for line in f:
+            yield line
+
+
+def iter_gz_lines_from_file(path: str) -> Iterable[str]:
+    with gzip.open(path, mode="rt", encoding="utf-8", errors="ignore", newline="\n") as f:
+        for line in f:
+            yield line
+
+
+def extract_display_image_urls(selected_images: Optional[Dict[str, Any]]) -> List[Dict[str, str]]:
+    if not isinstance(selected_images, dict):
+        return []
+    urls: List[Dict[str, str]] = []
+    try:
+        for image in selected_images.values():
+            display = image.get("display") if isinstance(image, dict) else None
+            if isinstance(display, dict):
+                # Prefer English if available, else any string value
+                if isinstance(display.get("en"), str):
+                    urls.append({"url": display["en"]})
+                else:
+                    for v in display.values():
+                        if isinstance(v, str):
+                            urls.append({"url": v})
+                            break
+    except Exception:
+        # Be tolerant to odd structures
+        return urls
+    return urls
+
+
+def map_ingredient_node(node: Dict[str, Any]) -> Dict[str, Any]:
+    mapped: Dict[str, Any] = {
+        "name": node.get("text"),
+        "vegan": node.get("vegan"),
+        "vegetarian": node.get("vegetarian"),
+        "ingredients": [],
+    }
+    sub = node.get("ingredients")
+    if isinstance(sub, list) and len(sub) > 0:
+        mapped["ingredients"] = [map_ingredient_node(child) for child in sub if isinstance(child, dict)]
+    return mapped
+
+
+def extract_projection(product: Dict[str, Any]) -> Dict[str, Any]:
+    # Barcode / code
+    barcode: Optional[str] = None
+    code = product.get("code")
+    if isinstance(code, str) and code.strip():
+        barcode = code.strip()
+    elif isinstance(code, (int, float)):
+        barcode = str(code)
+    elif isinstance(product.get("_id"), str):
+        barcode = product.get("_id")
+
+    # Brand
+    brand: Optional[str] = None
+    brand_owner = product.get("brand_owner")
+    if isinstance(brand_owner, str) and brand_owner.strip():
+        brand = brand_owner.strip()
+    else:
+        brands = product.get("brands")
+        if isinstance(brands, str) and brands.strip():
+            # OFF brands is comma-separated; take the first token
+            brand = brands.split(",")[0].strip()
+
+    # Name
+    name: Optional[str] = None
+    product_name = product.get("product_name")
+    if isinstance(product_name, str) and product_name.strip():
+        name = product_name.strip()
+    else:
+        # Try language-specific variants if present
+        for k, v in product.items():
+            if k.startswith("product_name_") and isinstance(v, str) and v.strip():
+                name = v.strip()
+                break
+
+    # Ingredients
+    ingredients_list: List[Dict[str, Any]] = []
+    raw_ingredients = product.get("ingredients")
+    if isinstance(raw_ingredients, list) and len(raw_ingredients) > 0:
+        ingredients_list = [map_ingredient_node(node) for node in raw_ingredients if isinstance(node, dict)]
+
+    # Images
+    images: List[Dict[str, str]] = []
+    selected_images = product.get("selected_images")
+    images = extract_display_image_urls(selected_images)
+
+    return {
+        "barcode": barcode,
+        "brand": brand,
+        "name": name,
+        "ingredients": ingredients_list,
+        "images": images,
+    }
+
+
+def json_bytes(value: Any) -> int:
+    try:
+        return len(json.dumps(value, ensure_ascii=False, separators=(",", ":")).encode("utf-8"))
+    except Exception:
+        return 0
+
+
+def utf8_bytes(value: Optional[str]) -> int:
+    if value is None:
+        return 0
+    try:
+        return len(value.encode("utf-8"))
+    except Exception:
+        return 0
+
+
+def run(lines: Iterable[str], sample: int = 0) -> Dict[str, Any]:
+    start = time.time()
+
+    total_records = 0
+    projected_records = 0
+    barcode_bytes_total = 0
+    brand_bytes_total = 0
+    name_bytes_total = 0
+    ingredients_bytes_total = 0
+    images_bytes_total = 0
+
+    nonempty_brand = 0
+    nonempty_name = 0
+    nonempty_ingredients = 0
+    nonempty_images = 0
+
+    processed = 0
+    for raw in lines:
+        if sample and processed >= sample:
+            break
+        total_records += 1
+        raw = raw.strip()
+        if not raw:
+            continue
+        try:
+            product = json.loads(raw)
+        except Exception:
+            continue
+
+        proj = extract_projection(product)
+        if proj.get("barcode"):
+            projected_records += 1
+            barcode_bytes_total += utf8_bytes(proj.get("barcode"))
+
+            b = proj.get("brand")
+            if b:
+                nonempty_brand += 1
+                brand_bytes_total += utf8_bytes(b)
+
+            n = proj.get("name")
+            if n:
+                nonempty_name += 1
+                name_bytes_total += utf8_bytes(n)
+
+            ing = proj.get("ingredients")
+            if isinstance(ing, list) and len(ing) > 0:
+                nonempty_ingredients += 1
+                ingredients_bytes_total += json_bytes(ing)
+
+            imgs = proj.get("images")
+            if isinstance(imgs, list) and len(imgs) > 0:
+                nonempty_images += 1
+                images_bytes_total += json_bytes(imgs)
+
+        processed += 1
+
+    elapsed = time.time() - start
+
+    result = {
+        "total_records": total_records,
+        "projected_records_with_barcode": projected_records,
+        "barcode_bytes_total": barcode_bytes_total,
+        "brand_bytes_total": brand_bytes_total,
+        "name_bytes_total": name_bytes_total,
+        "ingredients_bytes_total": ingredients_bytes_total,
+        "images_bytes_total": images_bytes_total,
+        "nonempty_counts": {
+            "brand": nonempty_brand,
+            "name": nonempty_name,
+            "ingredients": nonempty_ingredients,
+            "images": nonempty_images,
+        },
+        "elapsed_seconds": elapsed,
+    }
+
+    totals_payload = (
+        barcode_bytes_total
+        + brand_bytes_total
+        + name_bytes_total
+        + ingredients_bytes_total
+        + images_bytes_total
+    )
+    result["projected_payload_bytes_total"] = totals_payload
+    result["avg_payload_bytes_per_projected_row"] = (
+        (totals_payload / projected_records) if projected_records else 0.0
+    )
+
+    return result
+
+
+def main() -> None:
+    args = parse_args()
+    if args.stdin:
+        lines = iter_gz_lines_from_stdin()
+    else:
+        lines = iter_gz_lines_from_file(args.input)
+
+    result = run(lines, sample=args.sample)
+
+    if args.output:
+        with open(args.output, "w", encoding="utf-8") as f:
+            json.dump(result, f, ensure_ascii=False, indent=2)
+    else:
+        print(json.dumps(result, ensure_ascii=False, indent=2))
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/local/off_estimator_sample.json b/local/off_estimator_sample.json
new file mode 100644
index 0000000..5e3bf48
--- /dev/null
+++ b/local/off_estimator_sample.json
@@ -0,0 +1,18 @@
+{
+  "total_records": 100000,
+  "projected_records_with_barcode": 100000,
+  "barcode_bytes_total": 1288365,
+  "brand_bytes_total": 1796855,
+  "name_bytes_total": 2689661,
+  "ingredients_bytes_total": 108627317,
+  "images_bytes_total": 0,
+  "nonempty_counts": {
+    "brand": 98819,
+    "name": 99632,
+    "ingredients": 96406,
+    "images": 0
+  },
+  "elapsed_seconds": 53.94886898994446,
+  "projected_payload_bytes_total": 114402198,
+  "avg_payload_bytes_per_projected_row": 1144.02198
+}

From a64fa1d1b26912f66f76f4f98f325b478834d5ba Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Sun, 28 Sep 2025 16:26:07 +0000
Subject: [PATCH 02/21] Add off_jsonl_linecount.txt with line count

Co-authored-by: owner <owner@fungee.llc>
---
 local/off_jsonl_linecount.txt | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 local/off_jsonl_linecount.txt

diff --git a/local/off_jsonl_linecount.txt b/local/off_jsonl_linecount.txt
new file mode 100644
index 0000000..73383a4
--- /dev/null
+++ b/local/off_jsonl_linecount.txt
@@ -0,0 +1 @@
+4046118

From e213cf0e9240652d87426df5712e31b6b634dc3d Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Tue, 30 Sep 2025 13:53:20 +0530
Subject: [PATCH 03/21] Add SSE inventory+analysis stream and shared helpers
 (#4)

---
 supabase/functions/ingredicheck/analyzer.ts   | 332 +++++++++++++-----
 supabase/functions/ingredicheck/index.ts      |   1 +
 supabase/functions/ingredicheck/inventory.ts  | 276 +++++++++------
 supabase/functions/shared/db.ts               |   4 +
 .../shared/llm/ingredientanalyzeragent.ts     |   2 +-
 5 files changed, 418 insertions(+), 197 deletions(-)

diff --git a/supabase/functions/ingredicheck/analyzer.ts b/supabase/functions/ingredicheck/analyzer.ts
index 6cda7b1..cddb36e 100644
--- a/supabase/functions/ingredicheck/analyzer.ts
+++ b/supabase/functions/ingredicheck/analyzer.ts
@@ -1,93 +1,257 @@
+import { Context } from "https://deno.land/x/oak@v12.6.0/mod.ts";
+import * as DB from "../shared/db.ts";
+import {
+  ingredientAnalyzerAgent,
+  IngredientRecommendation,
+} from "../shared/llm/ingredientanalyzeragent.ts";
+import * as Inventory from "./inventory.ts";
 
-import { Context } from 'https://deno.land/x/oak@v12.6.0/mod.ts'
-import * as DB from '../shared/db.ts'
-import { ingredientAnalyzerAgent } from '../shared/llm/ingredientanalyzeragent.ts'
+const MB = 1024 * 1024;
 
-const MB = 1024 * 1024
+export type AnalysisRequest = {
+  barcode?: string;
+  userPreferenceText?: string;
+  clientActivityId?: string;
+};
 
 export async function analyze(ctx: Context) {
+  const startTime = new Date();
+  let requestBody: AnalysisRequest = {};
+  let responseBody: unknown = [];
+  let responseStatus = 200;
 
-    const startTime = new Date()
-    let requestBody: any = {}
-    let product = DB.defaultProduct()
-
-    try {
-        const body = ctx.request.body({ type: "form-data" })
-        const formData = await body.value.read({ maxSize: 10 * MB })
-
-        requestBody = {
-            barcode: formData.fields['barcode'],
-            userPreferenceText: formData.fields['userPreferenceText'],
-            clientActivityId: formData.fields['clientActivityId']
-        }
-
-        ctx.state.clientActivityId = requestBody.clientActivityId
-
-        if (requestBody.barcode !== undefined) {
-            const result = await ctx.state.supabaseClient
-                .from('log_inventory')
-                .select()
-                .eq('barcode', requestBody.barcode)
-                .order('created_at', { ascending: false })
-                .limit(1)
-                .single()
-
-            if (result.error) {
-                throw result.error
-            }
-
-            product = result.data as DB.Product
-        } else {
-            const result = await ctx.state.supabaseClient
-                .from('log_extract')
-                .select()
-                .eq('client_activity_id', ctx.state.clientActivityId)
-                .order('created_at', { ascending: false })
-                .limit(1)
-                .single()
-            
-            if (result.error) {
-                throw result.error
-            }
-
-            product = {
-                barcode: result.data.barcode,
-                brand: result.data.brand,
-                name: result.data.name,
-                ingredients: result.data.ingredients ?? [],
-                images: []
-            }
-        }
-
-        // Skip analyzer agent if user has no preferences set
-        const hasValidPreferences = requestBody.userPreferenceText && 
-                                    requestBody.userPreferenceText.trim() !== "" && 
-                                    requestBody.userPreferenceText.trim().toLowerCase() !== "none"
-        
-        const ingredientRecommendations =
-            product.ingredients && product.ingredients.length !== 0 && hasValidPreferences
-                ? await ingredientAnalyzerAgent(ctx, product, requestBody.userPreferenceText)
-                : []
-
-        ctx.response.status = 200
-        ctx.response.body = ingredientRecommendations
-    } catch (error) {
-        ctx.response.status = 500
-        ctx.response.body = error
-    }
+  try {
+    const body = ctx.request.body({ type: "form-data" });
+    const formData = await body.value.read({ maxSize: 10 * MB });
+
+    requestBody = {
+      barcode: formData.fields["barcode"],
+      userPreferenceText: formData.fields["userPreferenceText"],
+      clientActivityId: formData.fields["clientActivityId"],
+    };
+
+    const result = await performAnalysis({
+      ctx,
+      requestBody,
+    });
+
+    responseStatus = 200;
+    responseBody = result.recommendations;
+  } catch (error) {
+    responseStatus = 500;
+    responseBody = error;
+  }
+
+  ctx.response.status = responseStatus;
+  ctx.response.body = responseBody;
+
+  await logAnalysisResult(
+    ctx,
+    startTime,
+    requestBody,
+    responseStatus,
+    responseBody,
+  );
+}
+
+export async function streamInventoryAndAnalysis(ctx: Context) {
+  const barcode = ctx.params.barcode;
+  const clientActivityId =
+    ctx.request.url.searchParams.get("clientActivityId") ?? undefined;
+  const userPreferenceText =
+    ctx.request.url.searchParams.get("userPreferenceText") ?? undefined;
+
+  const sse = ctx.sendEvents();
+
+  if (!barcode) {
+    sse.dispatchMessage({
+      event: "error",
+      data: JSON.stringify({ message: "Barcode is required." }),
+    });
+    sse.close();
+    return;
+  }
+
+  const inventoryResult = await Inventory.fetchProduct({
+    supabaseClient: ctx.state.supabaseClient,
+    barcode,
+    clientActivityId,
+  });
+
+  if (inventoryResult.status !== 200 || !inventoryResult.product) {
+    const errorPayload = {
+      message: inventoryResult.error ?? "Product not found.",
+      status: inventoryResult.status,
+    };
+    sse.dispatchMessage({
+      event: "error",
+      data: JSON.stringify(errorPayload),
+    });
+    sse.close();
+    return;
+  }
+
+  sse.dispatchMessage({
+    event: "product",
+    data: JSON.stringify(inventoryResult.product),
+  });
+
+  const analysisStartTime = new Date();
+
+  const analysisRequest: AnalysisRequest = {
+    barcode,
+    userPreferenceText,
+    clientActivityId,
+  };
+
+  try {
+    const analysisResult = await performAnalysis({
+      ctx,
+      requestBody: analysisRequest,
+      productOverride: inventoryResult.product,
+    });
+
+    sse.dispatchMessage({
+      event: "analysis",
+      data: JSON.stringify(analysisResult.recommendations),
+    });
+
+    await logAnalysisResult(
+      ctx,
+      analysisStartTime,
+      analysisRequest,
+      200,
+      analysisResult.recommendations,
+    );
+  } catch (error) {
+    const message = error instanceof Error ? error.message : "Analysis failed.";
+
+    sse.dispatchMessage({
+      event: "error",
+      data: JSON.stringify({ message }),
+    });
 
-    const endTime = new Date()
+    await logAnalysisResult(
+      ctx,
+      analysisStartTime,
+      analysisRequest,
+      500,
+      { message },
+    );
+  } finally {
+    sse.dispatchComment("done");
+    sse.close();
+  }
+}
 
-    ctx.state.supabaseClient.functions.invoke('background/log_analyzebarcode', {
+type PerformAnalysisOptions = {
+  ctx: Context;
+  requestBody: AnalysisRequest;
+  productOverride?: DB.Product;
+};
+
+type PerformAnalysisResult = {
+  product: DB.Product;
+  recommendations: IngredientRecommendation[];
+};
+
+export async function performAnalysis(
+  options: PerformAnalysisOptions,
+): Promise<PerformAnalysisResult> {
+  const { ctx, requestBody, productOverride } = options;
+
+  ctx.state.clientActivityId = requestBody.clientActivityId;
+
+  const product = productOverride ?? await lookupProduct(ctx, requestBody);
+
+  const hasValidPreferences = requestBody.userPreferenceText &&
+    requestBody.userPreferenceText.trim() !== "" &&
+    requestBody.userPreferenceText.trim().toLowerCase() !== "none";
+
+  const hasIngredients = Array.isArray(product.ingredients) &&
+    product.ingredients.length > 0;
+
+  const recommendations = hasValidPreferences && hasIngredients
+    ? await ingredientAnalyzerAgent(
+      ctx,
+      product,
+      requestBody.userPreferenceText!,
+    )
+    : [];
+
+  return {
+    product,
+    recommendations,
+  };
+}
+
+export async function logAnalysisResult(
+  ctx: Context,
+  startTime: Date,
+  requestBody: AnalysisRequest,
+  responseStatus: number,
+  responseBody: unknown,
+) {
+  const endTime = new Date();
+
+  try {
+    await ctx.state.supabaseClient.functions.invoke(
+      "background/log_analyzebarcode",
+      {
         body: {
-            activity_id: ctx.state.activityId,
-            client_activity_id: ctx.state.clientActivityId,
-            start_time: startTime,
-            end_time: endTime,
-            request_body: requestBody,
-            response_status: ctx.response.status,
-            response_body: ctx.response.body
+          activity_id: ctx.state.activityId,
+          client_activity_id: ctx.state.clientActivityId,
+          start_time: startTime,
+          end_time: endTime,
+          request_body: requestBody,
+          response_status: responseStatus,
+          response_body: responseBody,
         },
-        method: 'POST'
-    })
-}
\ No newline at end of file
+        method: "POST",
+      },
+    );
+  } catch (error) {
+    console.error("Failed to log analyze barcode event", error);
+  }
+}
+
+async function lookupProduct(
+  ctx: Context,
+  requestBody: AnalysisRequest,
+): Promise<DB.Product> {
+  if (requestBody.barcode !== undefined) {
+    const result = await ctx.state.supabaseClient
+      .from("log_inventory")
+      .select()
+      .eq("barcode", requestBody.barcode)
+      .order("created_at", { ascending: false })
+      .limit(1)
+      .single();
+
+    if (result.error) {
+      throw result.error;
+    }
+
+    return result.data as DB.Product;
+  }
+
+  const result = await ctx.state.supabaseClient
+    .from("log_extract")
+    .select()
+    .eq("client_activity_id", ctx.state.clientActivityId)
+    .order("created_at", { ascending: false })
+    .limit(1)
+    .single();
+
+  if (result.error) {
+    throw result.error;
+  }
+
+  return {
+    barcode: result.data.barcode,
+    brand: result.data.brand,
+    name: result.data.name,
+    ingredients: result.data.ingredients ?? [],
+    images: [],
+  };
+}
diff --git a/supabase/functions/ingredicheck/index.ts b/supabase/functions/ingredicheck/index.ts
index c606bec..d4d0ccc 100644
--- a/supabase/functions/ingredicheck/index.ts
+++ b/supabase/functions/ingredicheck/index.ts
@@ -46,6 +46,7 @@ router
         }
         ctx.response.status = 204
     })
+    .get('/ingredicheck/inventory/:barcode/analyze-stream', Analyzer.streamInventoryAndAnalysis)
     .get('/ingredicheck/inventory/:barcode', async (ctx) => {
         const clientActivityId = ctx.request.url.searchParams.get("clientActivityId")
         await Inventory.get(ctx, ctx.params.barcode, clientActivityId)
diff --git a/supabase/functions/ingredicheck/inventory.ts b/supabase/functions/ingredicheck/inventory.ts
index 7f0bffa..5c188a5 100644
--- a/supabase/functions/ingredicheck/inventory.ts
+++ b/supabase/functions/ingredicheck/inventory.ts
@@ -1,124 +1,176 @@
-import { Context } from 'https://deno.land/x/oak@v12.6.0/mod.ts'
-import * as DB from '../shared/db.ts'
-
-export async function get(ctx: Context, barcode: string, clientActivityId: string | null) {
-
-    let result_json: any = {}
-    let log_json: any = {
-        start_time: new Date(),
-        barcode: barcode,
-        data_source: 'openfoodfacts/v3',
-        client_activity_id: clientActivityId,
-    }
-
-    const url = `https://world.openfoodfacts.org/api/v3/product/${barcode}.json`
-    const response = await fetch(url)
-    const data = await response.json()
-
-    if (data.status === 'failure') {
-        console.log(`Unexpected product details: ${JSON.stringify(data, null, 2)}`)
-        ctx.response.status = 404
+import { Context } from "https://deno.land/x/oak@v12.6.0/mod.ts";
+import * as DB from "../shared/db.ts";
+
+type InventoryFetchOptions = {
+  supabaseClient: any;
+  barcode: string;
+  clientActivityId?: string | null;
+};
+
+type InventoryFetchResult = {
+  status: number;
+  product: DB.Product | null;
+  error?: string;
+};
+
+export async function fetchProduct(
+  options: InventoryFetchOptions,
+): Promise<InventoryFetchResult> {
+  const { supabaseClient, barcode, clientActivityId } = options;
+
+  let product: DB.Product | null = null;
+  let errorMessage: string | undefined;
+
+  const log_json: Record<string, unknown> = {
+    start_time: new Date(),
+    barcode: barcode,
+    data_source: "openfoodfacts/v3",
+    client_activity_id: clientActivityId,
+  };
+
+  let status = 200;
+
+  try {
+    const url =
+      `https://world.openfoodfacts.org/api/v3/product/${barcode}.json`;
+    const response = await fetch(url);
+    const data = await response.json();
+
+    if (data.status === "failure") {
+      console.log(
+        `Unexpected product details: ${JSON.stringify(data, null, 2)}`,
+      );
+      status = 404;
+      errorMessage = data.status_verbose || "Product not found.";
     } else {
-        // console.log(`brand: ${data.product.brand_owner}`)
-        // console.log(`name: ${data.product.product_name}`)
-        // console.log(`ingredients: ${data.product.ingredients}`)
-        // console.log(`images: ${data.product.selected_images?.front?.display?.en}`)
-        result_json = processOpenFoodFactsProductData(barcode, data.product)
-        log_json = {
-            ...log_json,
-            ...result_json
-        }
-        ctx.response.status = 200
+      product = processOpenFoodFactsProductData(barcode, data.product);
+      Object.assign(log_json, product);
     }
+  } catch (error) {
+    status = 500;
+    errorMessage = (error as Error).message;
+    console.error(`Failed to fetch product ${barcode}: ${errorMessage}`);
+  }
+
+  log_json.end_time = new Date();
+  log_json.response_status = status;
+  if (errorMessage) {
+    log_json.error = errorMessage;
+  }
+
+  await supabaseClient.functions.invoke("background/log_inventory", {
+    body: log_json,
+    method: "POST",
+  });
+
+  return {
+    status,
+    product,
+    error: errorMessage,
+  };
+}
 
-    log_json.end_time = new Date()
-
-    await ctx.state.supabaseClient.functions.invoke('background/log_inventory', {
-        body: log_json,
-        method: 'POST'
-    })
-
-    ctx.response.body = result_json
+export async function get(
+  ctx: Context,
+  barcode: string,
+  clientActivityId: string | null,
+) {
+  const result = await fetchProduct({
+    supabaseClient: ctx.state.supabaseClient,
+    barcode,
+    clientActivityId,
+  });
+
+  ctx.response.status = result.status;
+  if (result.status === 200 && result.product) {
+    ctx.response.body = result.product;
+  } else {
+    ctx.response.body = {
+      error: result.error ?? "Unexpected inventory error.",
+    };
+  }
 }
 
 type SelectedImages = {
-    [key: string]: {
-        display: {
-            [key: string]: string
-        }
-    }
-}
+  [key: string]: {
+    display: {
+      [key: string]: string;
+    };
+  };
+};
 
 type ImageUrl = {
-    url: string
-}
+  url: string;
+};
 
 function extractDisplayImageUrls(selectedImages?: SelectedImages): ImageUrl[] {
-    if (selectedImages) {
-        return Object.values(selectedImages).flatMap(image => {
-            if (image.display?.en) {
-                return [{
-                    url: image.display.en
-                }]
-            }
-            return []
-        })
-    }
-    return []
+  if (selectedImages) {
+    return Object.values(selectedImages).flatMap((image) => {
+      if (image.display?.en) {
+        return [{
+          url: image.display.en,
+        }];
+      }
+      return [];
+    });
+  }
+  return [];
 }
 
-function processOpenFoodFactsProductData(barcode: string, product: any) : DB.Product {
-
-    let brand: string | undefined = undefined
-    let name: string | undefined = undefined
-    let ingredients: any[] = []
-
-    if (product.brand_owner) {
-        brand = product.brand_owner
-    }
-
-    if (product.product_name) {
-        name = product.product_name
-    }
-
-    if (product.ingredients) {
-        ingredients =
-            product.ingredients.map((i: any) => {
-                return {
-                    name: i.text,
-                    vegan: i.vegan,
-                    vegetarian: i.vegetarian,
-                    ingredients: i.ingredients?.map((i2: any) => {
-                        return {
-                            name: i2.text,
-                            vegan: i2.vegan,
-                            vegetarian: i2.vegetarian,
-                            ingredients: i2.ingredients?.map((i3: any) => {
-                                return {
-                                    name: i3.text,
-                                    vegan: i3.vegan,
-                                    vegetarian: i3.vegetarian,
-                                    ingredients: []
-                                }
-                            }) ?? []
-                        }
-                    }) ?? []
-                }
-            })
-    }
-
-    const images = extractDisplayImageUrls(product.selected_images)
-
-    // Workaround for known issues with OpenFoodFacts data
-    if (barcode === '0096619362776') {
-        // Label says 'Contains No Animal Rennet', but ingredient list has 'Animal Rennet'.
-        ingredients = ingredients.filter((i) => i.name !== 'Animal Rennet')
-    }
-
-    return {
-        brand: brand,
-        name: name,
-        ingredients: ingredients,
-        images: images
-    }
-}
\ No newline at end of file
+function processOpenFoodFactsProductData(
+  barcode: string,
+  product: any,
+): DB.Product {
+  let brand: string | undefined = undefined;
+  let name: string | undefined = undefined;
+  let ingredients: any[] = [];
+
+  if (product.brand_owner) {
+    brand = product.brand_owner;
+  }
+
+  if (product.product_name) {
+    name = product.product_name;
+  }
+
+  if (product.ingredients) {
+    ingredients = product.ingredients.map((i: any) => {
+      return {
+        name: i.text,
+        vegan: i.vegan,
+        vegetarian: i.vegetarian,
+        ingredients: i.ingredients?.map((i2: any) => {
+          return {
+            name: i2.text,
+            vegan: i2.vegan,
+            vegetarian: i2.vegetarian,
+            ingredients: i2.ingredients?.map((i3: any) => {
+              return {
+                name: i3.text,
+                vegan: i3.vegan,
+                vegetarian: i3.vegetarian,
+                ingredients: [],
+              };
+            }) ?? [],
+          };
+        }) ?? [],
+      };
+    });
+  }
+
+  const images = extractDisplayImageUrls(product.selected_images);
+
+  // Workaround for known issues with OpenFoodFacts data
+  if (barcode === "0096619362776") {
+    // Label says 'Contains No Animal Rennet', but ingredient list has 'Animal Rennet'.
+    ingredients = ingredients.filter((i) => i.name !== "Animal Rennet");
+  }
+
+  return {
+    barcode: barcode,
+    brand: brand,
+    name: name,
+    ingredients: ingredients,
+    images: images,
+  };
+}
diff --git a/supabase/functions/shared/db.ts b/supabase/functions/shared/db.ts
index 878af75..c8908e8 100644
--- a/supabase/functions/shared/db.ts
+++ b/supabase/functions/shared/db.ts
@@ -22,6 +22,10 @@ export type Product = {
 
 export function defaultProduct(): Product {
     return {
+        barcode: undefined,
+        data_source: undefined,
+        brand: undefined,
+        name: undefined,
         ingredients: [],
         images: [],
     }
diff --git a/supabase/functions/shared/llm/ingredientanalyzeragent.ts b/supabase/functions/shared/llm/ingredientanalyzeragent.ts
index 6c79db5..e2a8b86 100644
--- a/supabase/functions/shared/llm/ingredientanalyzeragent.ts
+++ b/supabase/functions/shared/llm/ingredientanalyzeragent.ts
@@ -7,7 +7,7 @@ import {
 import { createGeminiProgram } from "./programs.ts";
 import { ChatMessage } from "./types.ts";
 
-type IngredientRecommendation = {
+export type IngredientRecommendation = {
   ingredientName: string;
   safetyRecommendation: "MaybeUnsafe" | "DefinitelyUnsafe";
   reasoning: string;

From 10e72c27a2d474eb330c8f6af79824d66d820495 Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Tue, 30 Sep 2025 15:46:59 +0530
Subject: [PATCH 04/21] feat(inventory): add Deno OFF ingest script and
 inventory_cache table; remove estimation artifacts

---
 supabase/database/tables.sql | 44 ++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/supabase/database/tables.sql b/supabase/database/tables.sql
index 9d816f6..1b01d59 100644
--- a/supabase/database/tables.sql
+++ b/supabase/database/tables.sql
@@ -1,4 +1,48 @@
 
+<<<<<<< Current (Your changes)
+=======
+--------------------------------------------------------------------------------
+
+create table
+    public.inventory_cache (
+        created_at timestamp with time zone not null default now(),
+        updated_at timestamp with time zone not null default now(),
+        last_refreshed_at timestamp with time zone,
+        barcode text not null,
+        data_source text not null default 'openfoodfacts/v3',
+        name text,
+        brand text,
+        ingredients jsonb not null default '[]'::jsonb,
+        images jsonb not null default '[]'::jsonb,
+        off_last_modified_t bigint,
+        etag text,
+        constraint inventory_cache_pkey primary key (barcode)
+    ) tablespace pg_default;
+
+alter table public.inventory_cache enable row level security;
+
+create policy "Select for all authenticated users" on public.inventory_cache
+    for select
+    using (true);
+
+create policy "Write for service role only" on public.inventory_cache
+    for ALL
+    using (auth.role() = 'service_role')
+    with check (auth.role() = 'service_role');
+
+create or replace function set_inventory_cache_updated_at()
+returns trigger as $$
+begin
+    new.updated_at = now();
+    return new;
+end;
+$$ language plpgsql;
+
+create trigger trg_inventory_cache_updated_at
+before update on public.inventory_cache
+for each row execute function set_inventory_cache_updated_at();
+
+>>>>>>> Incoming (Background Agent changes)
 --------------------------------------------------------------------------------
 
 create table

From 62805fd388e5115c689193dabe837d84b27ad8be Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Tue, 30 Sep 2025 16:04:06 +0530
Subject: [PATCH 05/21] feat: add Deno OFF ingest script for inventory caching

---
 local/off_ingest.ts | 292 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 292 insertions(+)
 create mode 100644 local/off_ingest.ts

diff --git a/local/off_ingest.ts b/local/off_ingest.ts
new file mode 100644
index 0000000..d1f072a
--- /dev/null
+++ b/local/off_ingest.ts
@@ -0,0 +1,292 @@
+// deno run -A --import-map=local/import_map.json local/off_ingest.ts
+// Environment: SUPABASE_URL, SUPABASE_SECRET_KEY must be set for upload
+// Note: SUPABASE_SECRET_KEY is the new key type (replaces SUPABASE_SERVICE_ROLE_KEY)
+
+import { createClient } from "@supabase/supabase-js";
+
+type Ingredient = {
+    name: string;
+    vegan?: boolean;
+    vegetarian?: boolean;
+    ingredients?: Ingredient[];
+};
+
+type Image = { url: string };
+
+type CacheRow = {
+    barcode: string;
+    data_source: string;
+    brand?: string;
+    name?: string;
+    ingredients: Ingredient[];
+    images: Image[];
+    off_last_modified_t?: number;
+};
+
+const OFF_JSONL_GZ_URL = "https://static.openfoodfacts.org/data/openfoodfacts-products.jsonl.gz";
+const OUTPUT_PATH = "local/off_inventory_cache.jsonl";
+const BATCH_UPLOAD_SIZE = 500;
+
+function mapIngredient(node: any): Ingredient {
+    const item: Ingredient = {
+        name: typeof node?.text === "string" ? node.text : undefined as unknown as string,
+        vegan: node?.vegan,
+        vegetarian: node?.vegetarian,
+        ingredients: [],
+    };
+    if (Array.isArray(node?.ingredients) && node.ingredients.length > 0) {
+        item.ingredients = node.ingredients.filter((x: any) => x && typeof x === "object").map(mapIngredient);
+    }
+    return item;
+}
+
+function extractDisplayImageUrls(selectedImages: any): Image[] {
+    if (!selectedImages || typeof selectedImages !== "object") return [];
+    const urls: Image[] = [];
+    try {
+        for (const value of Object.values(selectedImages as Record<string, any>)) {
+            const display = (value as any)?.display;
+            if (display && typeof display === "object") {
+                if (typeof display.en === "string" && display.en) {
+                    urls.push({ url: display.en });
+                } else {
+                    for (const v of Object.values(display)) {
+                        if (typeof v === "string" && v) {
+                            urls.push({ url: v });
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+    } catch (_) {
+        // ignore malformed structures
+    }
+    return urls;
+}
+
+function mapToCacheRow(product: any): CacheRow | null {
+    const dataSource = "openfoodfacts/v3";
+
+    let barcode: string | undefined;
+    const code = product?.code;
+    if (typeof code === "string" && code.trim()) {
+        barcode = code.trim();
+    } else if (typeof code === "number") {
+        barcode = String(code);
+    } else if (typeof product?._id === "string" && product._id.trim()) {
+        barcode = product._id.trim();
+    }
+    if (!barcode) return null;
+
+    let brand: string | undefined;
+    if (typeof product?.brand_owner === "string" && product.brand_owner.trim()) {
+        brand = product.brand_owner.trim();
+    } else if (typeof product?.brands === "string" && product.brands.trim()) {
+        brand = product.brands.split(",")[0]?.trim();
+    }
+
+    let name: string | undefined;
+    if (typeof product?.product_name === "string" && product.product_name.trim()) {
+        name = product.product_name.trim();
+    } else {
+        for (const [k, v] of Object.entries(product ?? {})) {
+            if (k.startsWith("product_name_") && typeof v === "string" && (v as string).trim()) {
+                name = (v as string).trim();
+                break;
+            }
+        }
+    }
+
+    let ingredients: Ingredient[] = [];
+    if (Array.isArray(product?.ingredients) && product.ingredients.length > 0) {
+        ingredients = product.ingredients.filter((x: any) => x && typeof x === "object").map(mapIngredient);
+    }
+
+    const images = extractDisplayImageUrls(product?.selected_images);
+    const off_last_modified_t = typeof product?.last_modified_t === "number" ? product.last_modified_t : undefined;
+
+    return {
+        barcode,
+        data_source: dataSource,
+        brand,
+        name,
+        ingredients,
+        images,
+        off_last_modified_t,
+    };
+}
+
+async function* iterLinesFromGzip(url: string): AsyncGenerator<string> {
+    const res = await fetch(url);
+    if (!res.body) throw new Error("No response body from OFF");
+    const decompressed = res.body.pipeThrough(new DecompressionStream("gzip"));
+    const textStream = decompressed.pipeThrough(new TextDecoderStream());
+    const reader = textStream.getReader();
+    let buf = "";
+    try {
+        while (true) {
+            const { value, done } = await reader.read();
+            if (done) break;
+            buf += value ?? "";
+            let idx: number;
+            while ((idx = buf.indexOf("\n")) !== -1) {
+                const line = buf.slice(0, idx);
+                buf = buf.slice(idx + 1);
+                yield line;
+            }
+        }
+        if (buf.length > 0) {
+            yield buf;
+        }
+    } finally {
+        reader.releaseLock();
+    }
+}
+
+async function writeJsonl(rows: AsyncIterable<CacheRow>, outPath: string): Promise<{ count: number; totalBytes: number; nonEmpty: { brand: number; name: number; ingredients: number; images: number } }> {
+    const file = await Deno.open(outPath, { create: true, write: true, truncate: true });
+    const encoder = new TextEncoder();
+    let count = 0;
+    let totalBytes = 0;
+    let nonBrand = 0, nonName = 0, nonIng = 0, nonImg = 0;
+    try {
+        for await (const row of rows) {
+            count++;
+            if (row.brand) nonBrand++;
+            if (row.name) nonName++;
+            if (row.ingredients && row.ingredients.length) nonIng++;
+            if (row.images && row.images.length) nonImg++;
+            const json = JSON.stringify(row);
+            totalBytes += encoder.encode(json).byteLength;
+            await file.write(encoder.encode(json + "\n"));
+        }
+    } finally {
+        file.close();
+    }
+    return { count, totalBytes, nonEmpty: { brand: nonBrand, name: nonName, ingredients: nonIng, images: nonImg } };
+}
+
+async function* projectRows(lines: AsyncIterable<string>): AsyncGenerator<CacheRow> {
+    for await (const line of lines) {
+        const trimmed = line.trim();
+        if (!trimmed) continue;
+        try {
+            const product = JSON.parse(trimmed);
+            const row = mapToCacheRow(product);
+            if (row && row.barcode) {
+                yield row;
+            }
+        } catch (_) {
+            // skip invalid lines
+        }
+    }
+}
+
+function formatBytes(bytes: number): string {
+    const units = ["B", "KB", "MB", "GB", "TB"] as const;
+    let i = 0;
+    let n = bytes;
+    while (n >= 1024 && i < units.length - 1) {
+        n /= 1024;
+        i++;
+    }
+    return `${n.toFixed(2)} ${units[i]}`;
+}
+
+async function askUploadPermission(stats: { count: number; totalBytes: number }): Promise<boolean> {
+    console.log("\nSummary:");
+    console.log(`  Rows: ${stats.count}`);
+    console.log(`  Payload size (JSON only): ${formatBytes(stats.totalBytes)} (~${Math.round(stats.totalBytes / Math.max(1, stats.count))} B/row)`);
+    const answer = confirm("Upload to Supabase inventory_cache? This can take a long time. (y/N)");
+    return !!answer;
+}
+
+async function uploadJsonlToSupabase(path: string) {
+    const url = Deno.env.get("SUPABASE_URL") ?? "";
+    const key = Deno.env.get("SUPABASE_SECRET_KEY") ?? Deno.env.get("SUPABASE_SERVICE_ROLE_KEY") ?? "";
+    if (!url || !key) throw new Error("SUPABASE_URL and SUPABASE_SECRET_KEY (or SUPABASE_SERVICE_ROLE_KEY for legacy) must be set in environment");
+    const supabase = createClient(url, key, { auth: { persistSession: false } });
+
+    const file = await Deno.open(path, { read: true });
+    const decoder = new TextDecoder();
+    const bufSize = 1024 * 1024;
+    const buf = new Uint8Array(bufSize);
+    let pending: any[] = [];
+    let leftover = "";
+    let total = 0;
+    try {
+        while (true) {
+            const read = await file.read(buf);
+            if (read === null) break;
+            const chunk = decoder.decode(buf.subarray(0, read));
+            let data = leftover + chunk;
+            let idx: number;
+            while ((idx = data.indexOf("\n")) !== -1) {
+                const line = data.slice(0, idx);
+                data = data.slice(idx + 1);
+                if (!line) continue;
+                try {
+                    const row = JSON.parse(line);
+                    // Set last_refreshed_at during upsert
+                    row.last_refreshed_at = new Date().toISOString();
+                    pending.push(row);
+                    total++;
+                    if (pending.length >= BATCH_UPLOAD_SIZE) {
+                        const { error } = await supabase.from("inventory_cache").upsert(pending, { onConflict: "barcode" });
+                        if (error) throw error;
+                        pending = [];
+                    }
+                } catch (_) {
+                    // skip
+                }
+            }
+            leftover = data;
+        }
+        if (leftover.trim()) {
+            try {
+                const row = JSON.parse(leftover.trim());
+                row.last_refreshed_at = new Date().toISOString();
+                pending.push(row);
+                total++;
+            } catch (_) {}
+        }
+        if (pending.length) {
+            const { error } = await supabase.from("inventory_cache").upsert(pending, { onConflict: "barcode" });
+            if (error) throw error;
+        }
+    } finally {
+        file.close();
+    }
+    console.log(`Uploaded/Upserted ${total} rows to inventory_cache`);
+}
+
+async function main() {
+    console.log("Downloading OFF JSONL.gz and streaming transform...");
+    const rows = projectRows(iterLinesFromGzip(OFF_JSONL_GZ_URL));
+    const stats = await writeJsonl(rows, OUTPUT_PATH);
+    console.log("\nField coverage (non-empty counts):");
+    console.log(`  brand: ${stats.nonEmpty.brand}`);
+    console.log(`  name: ${stats.nonEmpty.name}`);
+    console.log(`  ingredients: ${stats.nonEmpty.ingredients}`);
+    console.log(`  images: ${stats.nonEmpty.images}`);
+    const proceed = await askUploadPermission(stats);
+    if (!proceed) {
+        console.log("Upload skipped.");
+        return;
+    }
+    console.log("\nUploading to Supabase (batched upserts)...");
+    const start = Date.now();
+    await uploadJsonlToSupabase(OUTPUT_PATH);
+    const elapsed = (Date.now() - start) / 1000;
+    console.log(`Done in ${elapsed.toFixed(1)}s.`);
+}
+
+if (import.meta.main) {
+    await main().catch((err) => {
+        console.error("Error:", err);
+        Deno.exit(1);
+    });
+}
+
+

From 028cbb144e2ae55381410edc00904d734251e89d Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Tue, 30 Sep 2025 16:06:22 +0530
Subject: [PATCH 06/21] chore: remove extraneous estimator files from PR

---
 local/off_estimator.py          | 248 --------------------------------
 local/off_estimator_sample.json |  18 ---
 local/off_jsonl_linecount.txt   |   1 -
 3 files changed, 267 deletions(-)
 delete mode 100644 local/off_estimator.py
 delete mode 100644 local/off_estimator_sample.json
 delete mode 100644 local/off_jsonl_linecount.txt

diff --git a/local/off_estimator.py b/local/off_estimator.py
deleted file mode 100644
index 60f3d77..0000000
--- a/local/off_estimator.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import gzip
-import io
-import json
-import sys
-import time
-from typing import Any, Dict, Iterable, List, Optional, Tuple
-
-
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description="Stream OFF JSONL.gz and estimate projected payload sizes.")
-    source = parser.add_mutually_exclusive_group(required=True)
-    source.add_argument("--stdin", action="store_true", help="Read gzipped JSONL from stdin")
-    source.add_argument("--input", type=str, help="Path to gzipped JSONL file")
-    parser.add_argument("--sample", type=int, default=0, help="Number of records to process for sampling (0 = full stream)")
-    parser.add_argument("--output", type=str, default="", help="Optional path to write JSON report")
-    return parser.parse_args()
-
-
-def iter_gz_lines_from_stdin() -> Iterable[str]:
-    gz = gzip.GzipFile(fileobj=sys.stdin.buffer, mode="rb")
-    with io.TextIOWrapper(gz, encoding="utf-8", errors="ignore", newline="\n") as f:
-        for line in f:
-            yield line
-
-
-def iter_gz_lines_from_file(path: str) -> Iterable[str]:
-    with gzip.open(path, mode="rt", encoding="utf-8", errors="ignore", newline="\n") as f:
-        for line in f:
-            yield line
-
-
-def extract_display_image_urls(selected_images: Optional[Dict[str, Any]]) -> List[Dict[str, str]]:
-    if not isinstance(selected_images, dict):
-        return []
-    urls: List[Dict[str, str]] = []
-    try:
-        for image in selected_images.values():
-            display = image.get("display") if isinstance(image, dict) else None
-            if isinstance(display, dict):
-                # Prefer English if available, else any string value
-                if isinstance(display.get("en"), str):
-                    urls.append({"url": display["en"]})
-                else:
-                    for v in display.values():
-                        if isinstance(v, str):
-                            urls.append({"url": v})
-                            break
-    except Exception:
-        # Be tolerant to odd structures
-        return urls
-    return urls
-
-
-def map_ingredient_node(node: Dict[str, Any]) -> Dict[str, Any]:
-    mapped: Dict[str, Any] = {
-        "name": node.get("text"),
-        "vegan": node.get("vegan"),
-        "vegetarian": node.get("vegetarian"),
-        "ingredients": [],
-    }
-    sub = node.get("ingredients")
-    if isinstance(sub, list) and len(sub) > 0:
-        mapped["ingredients"] = [map_ingredient_node(child) for child in sub if isinstance(child, dict)]
-    return mapped
-
-
-def extract_projection(product: Dict[str, Any]) -> Dict[str, Any]:
-    # Barcode / code
-    barcode: Optional[str] = None
-    code = product.get("code")
-    if isinstance(code, str) and code.strip():
-        barcode = code.strip()
-    elif isinstance(code, (int, float)):
-        barcode = str(code)
-    elif isinstance(product.get("_id"), str):
-        barcode = product.get("_id")
-
-    # Brand
-    brand: Optional[str] = None
-    brand_owner = product.get("brand_owner")
-    if isinstance(brand_owner, str) and brand_owner.strip():
-        brand = brand_owner.strip()
-    else:
-        brands = product.get("brands")
-        if isinstance(brands, str) and brands.strip():
-            # OFF brands is comma-separated; take the first token
-            brand = brands.split(",")[0].strip()
-
-    # Name
-    name: Optional[str] = None
-    product_name = product.get("product_name")
-    if isinstance(product_name, str) and product_name.strip():
-        name = product_name.strip()
-    else:
-        # Try language-specific variants if present
-        for k, v in product.items():
-            if k.startswith("product_name_") and isinstance(v, str) and v.strip():
-                name = v.strip()
-                break
-
-    # Ingredients
-    ingredients_list: List[Dict[str, Any]] = []
-    raw_ingredients = product.get("ingredients")
-    if isinstance(raw_ingredients, list) and len(raw_ingredients) > 0:
-        ingredients_list = [map_ingredient_node(node) for node in raw_ingredients if isinstance(node, dict)]
-
-    # Images
-    images: List[Dict[str, str]] = []
-    selected_images = product.get("selected_images")
-    images = extract_display_image_urls(selected_images)
-
-    return {
-        "barcode": barcode,
-        "brand": brand,
-        "name": name,
-        "ingredients": ingredients_list,
-        "images": images,
-    }
-
-
-def json_bytes(value: Any) -> int:
-    try:
-        return len(json.dumps(value, ensure_ascii=False, separators=(",", ":")).encode("utf-8"))
-    except Exception:
-        return 0
-
-
-def utf8_bytes(value: Optional[str]) -> int:
-    if value is None:
-        return 0
-    try:
-        return len(value.encode("utf-8"))
-    except Exception:
-        return 0
-
-
-def run(lines: Iterable[str], sample: int = 0) -> Dict[str, Any]:
-    start = time.time()
-
-    total_records = 0
-    projected_records = 0
-    barcode_bytes_total = 0
-    brand_bytes_total = 0
-    name_bytes_total = 0
-    ingredients_bytes_total = 0
-    images_bytes_total = 0
-
-    nonempty_brand = 0
-    nonempty_name = 0
-    nonempty_ingredients = 0
-    nonempty_images = 0
-
-    processed = 0
-    for raw in lines:
-        if sample and processed >= sample:
-            break
-        total_records += 1
-        raw = raw.strip()
-        if not raw:
-            continue
-        try:
-            product = json.loads(raw)
-        except Exception:
-            continue
-
-        proj = extract_projection(product)
-        if proj.get("barcode"):
-            projected_records += 1
-            barcode_bytes_total += utf8_bytes(proj.get("barcode"))
-
-            b = proj.get("brand")
-            if b:
-                nonempty_brand += 1
-                brand_bytes_total += utf8_bytes(b)
-
-            n = proj.get("name")
-            if n:
-                nonempty_name += 1
-                name_bytes_total += utf8_bytes(n)
-
-            ing = proj.get("ingredients")
-            if isinstance(ing, list) and len(ing) > 0:
-                nonempty_ingredients += 1
-                ingredients_bytes_total += json_bytes(ing)
-
-            imgs = proj.get("images")
-            if isinstance(imgs, list) and len(imgs) > 0:
-                nonempty_images += 1
-                images_bytes_total += json_bytes(imgs)
-
-        processed += 1
-
-    elapsed = time.time() - start
-
-    result = {
-        "total_records": total_records,
-        "projected_records_with_barcode": projected_records,
-        "barcode_bytes_total": barcode_bytes_total,
-        "brand_bytes_total": brand_bytes_total,
-        "name_bytes_total": name_bytes_total,
-        "ingredients_bytes_total": ingredients_bytes_total,
-        "images_bytes_total": images_bytes_total,
-        "nonempty_counts": {
-            "brand": nonempty_brand,
-            "name": nonempty_name,
-            "ingredients": nonempty_ingredients,
-            "images": nonempty_images,
-        },
-        "elapsed_seconds": elapsed,
-    }
-
-    totals_payload = (
-        barcode_bytes_total
-        + brand_bytes_total
-        + name_bytes_total
-        + ingredients_bytes_total
-        + images_bytes_total
-    )
-    result["projected_payload_bytes_total"] = totals_payload
-    result["avg_payload_bytes_per_projected_row"] = (
-        (totals_payload / projected_records) if projected_records else 0.0
-    )
-
-    return result
-
-
-def main() -> None:
-    args = parse_args()
-    if args.stdin:
-        lines = iter_gz_lines_from_stdin()
-    else:
-        lines = iter_gz_lines_from_file(args.input)
-
-    result = run(lines, sample=args.sample)
-
-    if args.output:
-        with open(args.output, "w", encoding="utf-8") as f:
-            json.dump(result, f, ensure_ascii=False, indent=2)
-    else:
-        print(json.dumps(result, ensure_ascii=False, indent=2))
-
-
-if __name__ == "__main__":
-    main()
-
diff --git a/local/off_estimator_sample.json b/local/off_estimator_sample.json
deleted file mode 100644
index 5e3bf48..0000000
--- a/local/off_estimator_sample.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-  "total_records": 100000,
-  "projected_records_with_barcode": 100000,
-  "barcode_bytes_total": 1288365,
-  "brand_bytes_total": 1796855,
-  "name_bytes_total": 2689661,
-  "ingredients_bytes_total": 108627317,
-  "images_bytes_total": 0,
-  "nonempty_counts": {
-    "brand": 98819,
-    "name": 99632,
-    "ingredients": 96406,
-    "images": 0
-  },
-  "elapsed_seconds": 53.94886898994446,
-  "projected_payload_bytes_total": 114402198,
-  "avg_payload_bytes_per_projected_row": 1144.02198
-}
diff --git a/local/off_jsonl_linecount.txt b/local/off_jsonl_linecount.txt
deleted file mode 100644
index 73383a4..0000000
--- a/local/off_jsonl_linecount.txt
+++ /dev/null
@@ -1 +0,0 @@
-4046118

From c48d7a231174705e804f48ba6428afc6ca440846 Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Tue, 30 Sep 2025 16:17:47 +0530
Subject: [PATCH 07/21] fix: resolve merge conflict in tables.sql

---
 supabase/database/tables.sql | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/supabase/database/tables.sql b/supabase/database/tables.sql
index 1b01d59..14f91a5 100644
--- a/supabase/database/tables.sql
+++ b/supabase/database/tables.sql
@@ -1,6 +1,4 @@
 
-<<<<<<< Current (Your changes)
-=======
 --------------------------------------------------------------------------------
 
 create table
@@ -42,7 +40,6 @@ create trigger trg_inventory_cache_updated_at
 before update on public.inventory_cache
 for each row execute function set_inventory_cache_updated_at();
 
->>>>>>> Incoming (Background Agent changes)
 --------------------------------------------------------------------------------
 
 create table

From 97505a78c906ecd8959c47c4dd1683b08dac9ea3 Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Tue, 30 Sep 2025 16:34:00 +0530
Subject: [PATCH 08/21] feat: add .env template and load env vars in off_ingest
 script

---
 .gitignore          |  1 +
 local/.env.template | 12 ++++++++++++
 local/off_ingest.ts | 27 +++++++++++++++++++++++++--
 3 files changed, 38 insertions(+), 2 deletions(-)
 create mode 100644 local/.env.template

diff --git a/.gitignore b/.gitignore
index 356448e..508fd10 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 local/finetuning/preferencevalidatordataset/.env
+local/.env
diff --git a/local/.env.template b/local/.env.template
new file mode 100644
index 0000000..91e50fb
--- /dev/null
+++ b/local/.env.template
@@ -0,0 +1,12 @@
+# Supabase Configuration
+# Copy this file to .env and fill in your actual values
+
+# Your Supabase project URL
+SUPABASE_URL=https://your-project-ref.supabase.co
+
+# Your Supabase secret key (new key type, replaces SUPABASE_SERVICE_ROLE_KEY)
+# Get this from your Supabase dashboard > Settings > API
+SUPABASE_SECRET_KEY=your_secret_key_here
+
+# Legacy fallback (if you haven't migrated to new keys yet)
+# SUPABASE_SERVICE_ROLE_KEY=your_legacy_service_role_key_here
diff --git a/local/off_ingest.ts b/local/off_ingest.ts
index d1f072a..4437c28 100644
--- a/local/off_ingest.ts
+++ b/local/off_ingest.ts
@@ -1,9 +1,29 @@
 // deno run -A --import-map=local/import_map.json local/off_ingest.ts
-// Environment: SUPABASE_URL, SUPABASE_SECRET_KEY must be set for upload
-// Note: SUPABASE_SECRET_KEY is the new key type (replaces SUPABASE_SERVICE_ROLE_KEY)
+// Environment: Copy .env.template to .env and fill in your values
 
 import { createClient } from "@supabase/supabase-js";
 
+// Load environment variables from .env file
+async function loadEnv() {
+  try {
+    const envText = await Deno.readTextFile("local/.env");
+    const lines = envText.split("\n");
+    for (const line of lines) {
+      const trimmed = line.trim();
+      if (trimmed && !trimmed.startsWith("#")) {
+        const [key, ...valueParts] = trimmed.split("=");
+        if (key && valueParts.length > 0) {
+          const value = valueParts.join("=").trim();
+          Deno.env.set(key.trim(), value);
+        }
+      }
+    }
+  } catch (error) {
+    console.warn("⚠️  Could not load .env file:", error.message);
+    console.warn("   Make sure to copy .env.template to .env and fill in your values");
+  }
+}
+
 type Ingredient = {
     name: string;
     vegan?: boolean;
@@ -262,6 +282,9 @@ async function uploadJsonlToSupabase(path: string) {
 }
 
 async function main() {
+    // Load environment variables from .env file
+    await loadEnv();
+    
     console.log("Downloading OFF JSONL.gz and streaming transform...");
     const rows = projectRows(iterLinesFromGzip(OFF_JSONL_GZ_URL));
     const stats = await writeJsonl(rows, OUTPUT_PATH);

From 54622bfded027af044f4d7cd410ed16a056f4ea2 Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Tue, 30 Sep 2025 17:07:45 +0530
Subject: [PATCH 09/21] refactor: use npm: specifier for standalone script (no
 import map needed)

---
 local/off_ingest.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/local/off_ingest.ts b/local/off_ingest.ts
index 4437c28..aa12dfc 100644
--- a/local/off_ingest.ts
+++ b/local/off_ingest.ts
@@ -1,7 +1,7 @@
-// deno run -A --import-map=local/import_map.json local/off_ingest.ts
+// deno run -A local/off_ingest.ts
 // Environment: Copy .env.template to .env and fill in your values
 
-import { createClient } from "@supabase/supabase-js";
+import { createClient } from "npm:@supabase/supabase-js@2.39.3";
 
 // Load environment variables from .env file
 async function loadEnv() {

From a7ad82312fcfc4085e8a970d6157f9990a4e33e9 Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Tue, 30 Sep 2025 17:08:56 +0530
Subject: [PATCH 10/21] feat: add batch upload with user confirmation every 5
 batches

---
 local/off_ingest.ts | 35 ++++++++++++++++++++++++++++++++---
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/local/off_ingest.ts b/local/off_ingest.ts
index aa12dfc..b6d01a7 100644
--- a/local/off_ingest.ts
+++ b/local/off_ingest.ts
@@ -45,7 +45,8 @@ type CacheRow = {
 
 const OFF_JSONL_GZ_URL = "https://static.openfoodfacts.org/data/openfoodfacts-products.jsonl.gz";
 const OUTPUT_PATH = "local/off_inventory_cache.jsonl";
-const BATCH_UPLOAD_SIZE = 500;
+const BATCH_UPLOAD_SIZE = 1000;
+const BATCHES_PER_CONFIRMATION = 5; // Upload 5 batches (5000 rows) before asking for confirmation
 
 function mapIngredient(node: any): Ingredient {
     const item: Ingredient = {
@@ -235,6 +236,9 @@ async function uploadJsonlToSupabase(path: string) {
     let pending: any[] = [];
     let leftover = "";
     let total = 0;
+    let batchCount = 0;
+    let uploadedBatches = 0;
+    
     try {
         while (true) {
             const read = await file.read(buf);
@@ -252,17 +256,37 @@ async function uploadJsonlToSupabase(path: string) {
                     row.last_refreshed_at = new Date().toISOString();
                     pending.push(row);
                     total++;
+                    
                     if (pending.length >= BATCH_UPLOAD_SIZE) {
+                        // Upload this batch
                         const { error } = await supabase.from("inventory_cache").upsert(pending, { onConflict: "barcode" });
                         if (error) throw error;
+                        
+                        batchCount++;
+                        uploadedBatches++;
+                        console.log(`✅ Uploaded batch ${batchCount} (${pending.length} rows) - Total: ${total} rows`);
+                        
+                        // Check if we need confirmation
+                        if (uploadedBatches >= BATCHES_PER_CONFIRMATION) {
+                            console.log(`\n📊 Progress: ${total} rows uploaded in ${batchCount} batches`);
+                            const continueUpload = confirm(`Continue uploading? (${total} rows uploaded so far) [y/N]`);
+                            if (!continueUpload) {
+                                console.log("❌ Upload cancelled by user");
+                                return;
+                            }
+                            uploadedBatches = 0; // Reset counter
+                        }
+                        
                         pending = [];
                     }
                 } catch (_) {
-                    // skip
+                    // skip invalid JSON
                 }
             }
             leftover = data;
         }
+        
+        // Handle remaining data
         if (leftover.trim()) {
             try {
                 const row = JSON.parse(leftover.trim());
@@ -271,14 +295,19 @@ async function uploadJsonlToSupabase(path: string) {
                 total++;
             } catch (_) {}
         }
+        
+        // Upload final batch if any
         if (pending.length) {
             const { error } = await supabase.from("inventory_cache").upsert(pending, { onConflict: "barcode" });
             if (error) throw error;
+            batchCount++;
+            console.log(`✅ Uploaded final batch ${batchCount} (${pending.length} rows)`);
         }
     } finally {
         file.close();
     }
-    console.log(`Uploaded/Upserted ${total} rows to inventory_cache`);
+    
+    console.log(`\n🎉 Upload complete! Total: ${total} rows uploaded in ${batchCount} batches`);
 }
 
 async function main() {

From 51b9068d7fbf53db83524e241875415f2fe64674 Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Tue, 30 Sep 2025 17:42:22 +0530
Subject: [PATCH 11/21] feat: add progress indicators for download and
 processing

---
 local/off_ingest.ts | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/local/off_ingest.ts b/local/off_ingest.ts
index b6d01a7..f583f75 100644
--- a/local/off_ingest.ts
+++ b/local/off_ingest.ts
@@ -139,12 +139,21 @@ function mapToCacheRow(product: any): CacheRow | null {
 }
 
 async function* iterLinesFromGzip(url: string): AsyncGenerator<string> {
+    console.log("📥 Downloading Open Food Facts data...");
     const res = await fetch(url);
     if (!res.body) throw new Error("No response body from OFF");
+    
+    const contentLength = res.headers.get("content-length");
+    const totalBytes = contentLength ? parseInt(contentLength) : 0;
+    console.log(`📦 File size: ${totalBytes > 0 ? formatBytes(totalBytes) : "unknown"}`);
+    
     const decompressed = res.body.pipeThrough(new DecompressionStream("gzip"));
     const textStream = decompressed.pipeThrough(new TextDecoderStream());
     const reader = textStream.getReader();
     let buf = "";
+    let lineCount = 0;
+    let lastProgressTime = Date.now();
+    
     try {
         while (true) {
             const { value, done } = await reader.read();
@@ -154,23 +163,37 @@ async function* iterLinesFromGzip(url: string): AsyncGenerator<string> {
             while ((idx = buf.indexOf("\n")) !== -1) {
                 const line = buf.slice(0, idx);
                 buf = buf.slice(idx + 1);
+                lineCount++;
+                
+                // Show progress every 50k lines or every 10 seconds
+                const now = Date.now();
+                if (lineCount % 50000 === 0 || now - lastProgressTime > 10000) {
+                    console.log(`📊 Processed ${lineCount.toLocaleString()} products...`);
+                    lastProgressTime = now;
+                }
+                
                 yield line;
             }
         }
         if (buf.length > 0) {
+            lineCount++;
             yield buf;
         }
     } finally {
         reader.releaseLock();
     }
+    console.log(`✅ Download complete! Processed ${lineCount.toLocaleString()} products`);
 }
 
 async function writeJsonl(rows: AsyncIterable<CacheRow>, outPath: string): Promise<{ count: number; totalBytes: number; nonEmpty: { brand: number; name: number; ingredients: number; images: number } }> {
+    console.log("💾 Writing transformed data to local file...");
     const file = await Deno.open(outPath, { create: true, write: true, truncate: true });
     const encoder = new TextEncoder();
     let count = 0;
     let totalBytes = 0;
     let nonBrand = 0, nonName = 0, nonIng = 0, nonImg = 0;
+    let lastProgressTime = Date.now();
+    
     try {
         for await (const row of rows) {
             count++;
@@ -181,10 +204,18 @@ async function writeJsonl(rows: AsyncIterable<CacheRow>, outPath: string): Promi
             const json = JSON.stringify(row);
             totalBytes += encoder.encode(json).byteLength;
             await file.write(encoder.encode(json + "\n"));
+            
+            // Show progress every 25k rows or every 5 seconds
+            const now = Date.now();
+            if (count % 25000 === 0 || now - lastProgressTime > 5000) {
+                console.log(`📝 Written ${count.toLocaleString()} products to local file...`);
+                lastProgressTime = now;
+            }
         }
     } finally {
         file.close();
     }
+    console.log(`✅ Local file complete! ${count.toLocaleString()} products written`);
     return { count, totalBytes, nonEmpty: { brand: nonBrand, name: nonName, ingredients: nonIng, images: nonImg } };
 }
 

From e4140316bd200700045e65496f1a64d4731c9f30 Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Tue, 30 Sep 2025 17:46:59 +0530
Subject: [PATCH 12/21] feat: add detailed validation statistics to track
 invalid/empty products

---
 local/off_ingest.ts | 58 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 50 insertions(+), 8 deletions(-)

diff --git a/local/off_ingest.ts b/local/off_ingest.ts
index f583f75..4bdfabe 100644
--- a/local/off_ingest.ts
+++ b/local/off_ingest.ts
@@ -185,7 +185,7 @@ async function* iterLinesFromGzip(url: string): AsyncGenerator<string> {
     console.log(`✅ Download complete! Processed ${lineCount.toLocaleString()} products`);
 }
 
-async function writeJsonl(rows: AsyncIterable<CacheRow>, outPath: string): Promise<{ count: number; totalBytes: number; nonEmpty: { brand: number; name: number; ingredients: number; images: number } }> {
+async function writeJsonl(rows: AsyncIterable<{ row: CacheRow; stats: any }>, outPath: string): Promise<{ count: number; totalBytes: number; nonEmpty: { brand: number; name: number; ingredients: number; images: number }; validationStats: any }> {
     console.log("💾 Writing transformed data to local file...");
     const file = await Deno.open(outPath, { create: true, write: true, truncate: true });
     const encoder = new TextEncoder();
@@ -193,9 +193,10 @@ async function writeJsonl(rows: AsyncIterable<CacheRow>, outPath: string): Promi
     let totalBytes = 0;
     let nonBrand = 0, nonName = 0, nonIng = 0, nonImg = 0;
     let lastProgressTime = Date.now();
+    let lastStats: any = null;
     
     try {
-        for await (const row of rows) {
+        for await (const { row, stats } of rows) {
             count++;
             if (row.brand) nonBrand++;
             if (row.name) nonName++;
@@ -204,33 +205,62 @@ async function writeJsonl(rows: AsyncIterable<CacheRow>, outPath: string): Promi
             const json = JSON.stringify(row);
             totalBytes += encoder.encode(json).byteLength;
             await file.write(encoder.encode(json + "\n"));
+            lastStats = stats;
             
             // Show progress every 25k rows or every 5 seconds
             const now = Date.now();
             if (count % 25000 === 0 || now - lastProgressTime > 5000) {
-                console.log(`📝 Written ${count.toLocaleString()} products to local file...`);
+                const validRate = ((lastStats.validProducts / lastStats.totalLines) * 100).toFixed(1);
+                const invalidRate = (((lastStats.emptyLines + lastStats.jsonParseErrors + lastStats.noBarcode) / lastStats.totalLines) * 100).toFixed(1);
+                console.log(`📝 Written ${count.toLocaleString()} products (${validRate}% valid, ${invalidRate}% invalid)`);
                 lastProgressTime = now;
             }
         }
     } finally {
         file.close();
     }
+    
+    // Final validation statistics
+    if (lastStats) {
+        console.log(`\n📊 Validation Statistics:`);
+        console.log(`  Total lines processed: ${lastStats.totalLines.toLocaleString()}`);
+        console.log(`  Valid products: ${lastStats.validProducts.toLocaleString()} (${((lastStats.validProducts / lastStats.totalLines) * 100).toFixed(1)}%)`);
+        console.log(`  Invalid products: ${(lastStats.emptyLines + lastStats.jsonParseErrors + lastStats.noBarcode).toLocaleString()} (${(((lastStats.emptyLines + lastStats.jsonParseErrors + lastStats.noBarcode) / lastStats.totalLines) * 100).toFixed(1)}%)`);
+        console.log(`    - Empty lines: ${lastStats.emptyLines.toLocaleString()}`);
+        console.log(`    - JSON parse errors: ${lastStats.jsonParseErrors.toLocaleString()}`);
+        console.log(`    - No barcode: ${lastStats.noBarcode.toLocaleString()}`);
+    }
+    
     console.log(`✅ Local file complete! ${count.toLocaleString()} products written`);
-    return { count, totalBytes, nonEmpty: { brand: nonBrand, name: nonName, ingredients: nonIng, images: nonImg } };
+    return { count, totalBytes, nonEmpty: { brand: nonBrand, name: nonName, ingredients: nonIng, images: nonImg }, validationStats: lastStats };
 }
 
-async function* projectRows(lines: AsyncIterable<string>): AsyncGenerator<CacheRow> {
+async function* projectRows(lines: AsyncIterable<string>): AsyncGenerator<{ row: CacheRow; stats: { totalLines: number; emptyLines: number; jsonParseErrors: number; noBarcode: number; validProducts: number } }> {
+    let totalLines = 0;
+    let emptyLines = 0;
+    let jsonParseErrors = 0;
+    let noBarcode = 0;
+    let validProducts = 0;
+    
     for await (const line of lines) {
+        totalLines++;
         const trimmed = line.trim();
-        if (!trimmed) continue;
+        if (!trimmed) {
+            emptyLines++;
+            continue;
+        }
+        
         try {
             const product = JSON.parse(trimmed);
             const row = mapToCacheRow(product);
             if (row && row.barcode) {
-                yield row;
+                validProducts++;
+                yield { row, stats: { totalLines, emptyLines, jsonParseErrors, noBarcode, validProducts } };
+            } else {
+                noBarcode++;
             }
         } catch (_) {
-            // skip invalid lines
+            jsonParseErrors++;
         }
     }
 }
@@ -353,6 +383,18 @@ async function main() {
     console.log(`  name: ${stats.nonEmpty.name}`);
     console.log(`  ingredients: ${stats.nonEmpty.ingredients}`);
     console.log(`  images: ${stats.nonEmpty.images}`);
+    
+    // Show validation summary
+    if (stats.validationStats) {
+        console.log("\n📊 Data Quality Summary:");
+        const validRate = ((stats.validationStats.validProducts / stats.validationStats.totalLines) * 100).toFixed(1);
+        console.log(`  Success rate: ${validRate}% (${stats.validationStats.validProducts.toLocaleString()} valid out of ${stats.validationStats.totalLines.toLocaleString()} total)`);
+        console.log(`  Invalid breakdown:`);
+        console.log(`    - Empty lines: ${stats.validationStats.emptyLines.toLocaleString()}`);
+        console.log(`    - JSON parse errors: ${stats.validationStats.jsonParseErrors.toLocaleString()}`);
+        console.log(`    - No barcode: ${stats.validationStats.noBarcode.toLocaleString()}`);
+    }
+    
     const proceed = await askUploadPermission(stats);
     if (!proceed) {
         console.log("Upload skipped.");

From 162c9e7349675fde809e6a986843d6d5f0447117 Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Tue, 30 Sep 2025 17:51:00 +0530
Subject: [PATCH 13/21] perf: optimize for speed with larger batches, parallel
 uploads, and better I/O

---
 local/off_ingest.ts | 48 +++++++++++++++++++++++++++++++++------------
 1 file changed, 36 insertions(+), 12 deletions(-)

diff --git a/local/off_ingest.ts b/local/off_ingest.ts
index 4bdfabe..0442a60 100644
--- a/local/off_ingest.ts
+++ b/local/off_ingest.ts
@@ -1,5 +1,6 @@
-// deno run -A local/off_ingest.ts
+// deno run -A --unstable-kv local/off_ingest.ts
 // Environment: Copy .env.template to .env and fill in your values
+// Performance: Use --unstable-kv for better memory management
 
 import { createClient } from "npm:@supabase/supabase-js@2.39.3";
 
@@ -45,8 +46,9 @@ type CacheRow = {
 
 const OFF_JSONL_GZ_URL = "https://static.openfoodfacts.org/data/openfoodfacts-products.jsonl.gz";
 const OUTPUT_PATH = "local/off_inventory_cache.jsonl";
-const BATCH_UPLOAD_SIZE = 1000;
-const BATCHES_PER_CONFIRMATION = 5; // Upload 5 batches (5000 rows) before asking for confirmation
+const BATCH_UPLOAD_SIZE = 5000; // Increased from 1000
+const BATCHES_PER_CONFIRMATION = 2; // Upload 2 batches (10k rows) before asking for confirmation
+const PARALLEL_BATCHES = 3; // Process multiple batches in parallel
 
 function mapIngredient(node: any): Ingredient {
     const item: Ingredient = {
@@ -284,6 +286,12 @@ async function askUploadPermission(stats: { count: number; totalBytes: number })
     return !!answer;
 }
 
+async function uploadBatch(supabase: any, batch: any[], batchNumber: number): Promise<void> {
+    const { error } = await supabase.from("inventory_cache").upsert(batch, { onConflict: "barcode" });
+    if (error) throw error;
+    console.log(`✅ Uploaded batch ${batchNumber} (${batch.length} rows)`);
+}
+
 async function uploadJsonlToSupabase(path: string) {
     const url = Deno.env.get("SUPABASE_URL") ?? "";
     const key = Deno.env.get("SUPABASE_SECRET_KEY") ?? Deno.env.get("SUPABASE_SERVICE_ROLE_KEY") ?? "";
@@ -292,13 +300,14 @@ async function uploadJsonlToSupabase(path: string) {
 
     const file = await Deno.open(path, { read: true });
     const decoder = new TextDecoder();
-    const bufSize = 1024 * 1024;
+    const bufSize = 2 * 1024 * 1024; // Increased buffer size
     const buf = new Uint8Array(bufSize);
     let pending: any[] = [];
     let leftover = "";
     let total = 0;
     let batchCount = 0;
     let uploadedBatches = 0;
+    let pendingUploads: Promise<void>[] = [];
     
     try {
         while (true) {
@@ -319,16 +328,27 @@ async function uploadJsonlToSupabase(path: string) {
                     total++;
                     
                     if (pending.length >= BATCH_UPLOAD_SIZE) {
-                        // Upload this batch
-                        const { error } = await supabase.from("inventory_cache").upsert(pending, { onConflict: "barcode" });
-                        if (error) throw error;
-                        
+                        // Start parallel upload
+                        const batchToUpload = [...pending];
                         batchCount++;
+                        const uploadPromise = uploadBatch(supabase, batchToUpload, batchCount);
+                        pendingUploads.push(uploadPromise);
+                        
+                        // Wait for uploads if we have too many pending
+                        if (pendingUploads.length >= PARALLEL_BATCHES) {
+                            await Promise.all(pendingUploads);
+                            pendingUploads = [];
+                        }
+                        
                         uploadedBatches++;
-                        console.log(`✅ Uploaded batch ${batchCount} (${pending.length} rows) - Total: ${total} rows`);
                         
                         // Check if we need confirmation
                         if (uploadedBatches >= BATCHES_PER_CONFIRMATION) {
+                            // Wait for all pending uploads before asking
+                            if (pendingUploads.length > 0) {
+                                await Promise.all(pendingUploads);
+                                pendingUploads = [];
+                            }
                             console.log(`\n📊 Progress: ${total} rows uploaded in ${batchCount} batches`);
                             const continueUpload = confirm(`Continue uploading? (${total} rows uploaded so far) [y/N]`);
                             if (!continueUpload) {
@@ -359,10 +379,14 @@ async function uploadJsonlToSupabase(path: string) {
         
         // Upload final batch if any
         if (pending.length) {
-            const { error } = await supabase.from("inventory_cache").upsert(pending, { onConflict: "barcode" });
-            if (error) throw error;
             batchCount++;
-            console.log(`✅ Uploaded final batch ${batchCount} (${pending.length} rows)`);
+            const uploadPromise = uploadBatch(supabase, pending, batchCount);
+            pendingUploads.push(uploadPromise);
+        }
+        
+        // Wait for all remaining uploads
+        if (pendingUploads.length > 0) {
+            await Promise.all(pendingUploads);
         }
     } finally {
         file.close();

From a2ac1b2b11298dd9636d460fa47d29bddbf4e2ca Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Wed, 1 Oct 2025 14:59:16 +0530
Subject: [PATCH 14/21] feat: organize OpenFoodFacts scripts and add inventory
 cache to gitignore

- Move off_ingest.ts and off_upload_batch.ts to local/openfoodfacts/ folder
- Add off_inventory_cache.jsonl to .gitignore (large generated file)
- Fix parallel upload logic to prevent memory exhaustion
- Add deduplication to handle duplicate barcodes in batches
- Successfully uploaded 3M+ products to Supabase inventory_cache
---
 .gitignore                              |   1 +
 deno.lock                               |  76 +++
 local/.gitignore                        |   3 +-
 local/off_ingest.ts                     | 441 -------------
 local/openfoodfacts/off_ingest.ts       | 826 ++++++++++++++++++++++++
 local/openfoodfacts/off_upload_batch.ts |  56 ++
 6 files changed, 961 insertions(+), 442 deletions(-)
 delete mode 100644 local/off_ingest.ts
 create mode 100644 local/openfoodfacts/off_ingest.ts
 create mode 100644 local/openfoodfacts/off_upload_batch.ts

diff --git a/.gitignore b/.gitignore
index 508fd10..0ddfe2e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 local/finetuning/preferencevalidatordataset/.env
 local/.env
+local/temp/
diff --git a/deno.lock b/deno.lock
index dcfa245..7901036 100644
--- a/deno.lock
+++ b/deno.lock
@@ -1,17 +1,93 @@
 {
   "version": "5",
   "specifiers": {
+    "npm:@supabase/supabase-js@2.39.3": "2.39.3",
     "npm:@types/node@*": "24.2.0"
   },
   "npm": {
+    "@supabase/functions-js@2.4.5": {
+      "integrity": "sha512-v5GSqb9zbosquTo6gBwIiq7W9eQ7rE5QazsK/ezNiQXdCbY+bH8D9qEaBIkhVvX4ZRW5rP03gEfw5yw9tiq4EQ==",
+      "dependencies": [
+        "@supabase/node-fetch"
+      ]
+    },
+    "@supabase/gotrue-js@2.72.0": {
+      "integrity": "sha512-cJSFgvZhhTEyOLAwkadKAlx0zUOzUFSpMd/42iacF1C2pm4FLnc6ICHT0D/q2gPr/tz65tu0g0s51n4od82yEQ==",
+      "dependencies": [
+        "@supabase/node-fetch"
+      ]
+    },
+    "@supabase/node-fetch@2.6.15": {
+      "integrity": "sha512-1ibVeYUacxWYi9i0cf5efil6adJ9WRyZBLivgjs+AUpewx1F3xPi7gLgaASI2SmIQxPoCEjAsLAzKPgMJVgOUQ==",
+      "dependencies": [
+        "whatwg-url"
+      ]
+    },
+    "@supabase/postgrest-js@1.21.3": {
+      "integrity": "sha512-rg3DmmZQKEVCreXq6Am29hMVe1CzemXyIWVYyyua69y6XubfP+DzGfLxME/1uvdgwqdoaPbtjBDpEBhqxq1ZwA==",
+      "dependencies": [
+        "@supabase/node-fetch"
+      ]
+    },
+    "@supabase/realtime-js@2.15.4": {
+      "integrity": "sha512-e/FYIWjvQJHOCNACWehnKvg26zosju3694k0NMUNb+JGLdvHJzEa29ZVVLmawd2kvx4hdbv8mxSqfttRnH3+DA==",
+      "dependencies": [
+        "@supabase/node-fetch",
+        "@types/phoenix",
+        "@types/ws",
+        "ws"
+      ]
+    },
+    "@supabase/storage-js@2.11.0": {
+      "integrity": "sha512-Y+kx/wDgd4oasAgoAq0bsbQojwQ+ejIif8uczZ9qufRHWFLMU5cODT+ApHsSrDufqUcVKt+eyxtOXSkeh2v9ww==",
+      "dependencies": [
+        "@supabase/node-fetch"
+      ]
+    },
+    "@supabase/supabase-js@2.39.3": {
+      "integrity": "sha512-NoltJSaJNKDJNutO5sJPAAi5RIWrn1z2XH+ig1+cHDojT6BTN7TvZPNa3Kq3gFQWfO5H1N9El/bCTZJ3iFW2kQ==",
+      "dependencies": [
+        "@supabase/functions-js",
+        "@supabase/gotrue-js",
+        "@supabase/node-fetch",
+        "@supabase/postgrest-js",
+        "@supabase/realtime-js",
+        "@supabase/storage-js"
+      ]
+    },
     "@types/node@24.2.0": {
       "integrity": "sha512-3xyG3pMCq3oYCNg7/ZP+E1ooTaGB4cG8JWRsqqOYQdbWNY4zbaV0Ennrd7stjiJEFZCaybcIgpTjJWHRfBSIDw==",
       "dependencies": [
         "undici-types"
       ]
     },
+    "@types/phoenix@1.6.6": {
+      "integrity": "sha512-PIzZZlEppgrpoT2QgbnDU+MMzuR6BbCjllj0bM70lWoejMeNJAxCchxnv7J3XFkI8MpygtRpzXrIlmWUBclP5A=="
+    },
+    "@types/ws@8.18.1": {
+      "integrity": "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==",
+      "dependencies": [
+        "@types/node"
+      ]
+    },
+    "tr46@0.0.3": {
+      "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="
+    },
     "undici-types@7.10.0": {
       "integrity": "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag=="
+    },
+    "webidl-conversions@3.0.1": {
+      "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="
+    },
+    "whatwg-url@5.0.0": {
+      "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
+      "dependencies": [
+        "tr46",
+        "webidl-conversions"
+      ]
+    },
+    "ws@8.18.3": {
+      "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg=="
     }
   },
   "redirects": {
diff --git a/local/.gitignore b/local/.gitignore
index 0275b13..6edb89a 100644
--- a/local/.gitignore
+++ b/local/.gitignore
@@ -1,2 +1,3 @@
 supabase-service.json
-datasets/
\ No newline at end of file
+datasets/
+off_inventory_cache.jsonl
\ No newline at end of file
diff --git a/local/off_ingest.ts b/local/off_ingest.ts
deleted file mode 100644
index 0442a60..0000000
--- a/local/off_ingest.ts
+++ /dev/null
@@ -1,441 +0,0 @@
-// deno run -A --unstable-kv local/off_ingest.ts
-// Environment: Copy .env.template to .env and fill in your values
-// Performance: Use --unstable-kv for better memory management
-
-import { createClient } from "npm:@supabase/supabase-js@2.39.3";
-
-// Load environment variables from .env file
-async function loadEnv() {
-  try {
-    const envText = await Deno.readTextFile("local/.env");
-    const lines = envText.split("\n");
-    for (const line of lines) {
-      const trimmed = line.trim();
-      if (trimmed && !trimmed.startsWith("#")) {
-        const [key, ...valueParts] = trimmed.split("=");
-        if (key && valueParts.length > 0) {
-          const value = valueParts.join("=").trim();
-          Deno.env.set(key.trim(), value);
-        }
-      }
-    }
-  } catch (error) {
-    console.warn("⚠️  Could not load .env file:", error.message);
-    console.warn("   Make sure to copy .env.template to .env and fill in your values");
-  }
-}
-
-type Ingredient = {
-    name: string;
-    vegan?: boolean;
-    vegetarian?: boolean;
-    ingredients?: Ingredient[];
-};
-
-type Image = { url: string };
-
-type CacheRow = {
-    barcode: string;
-    data_source: string;
-    brand?: string;
-    name?: string;
-    ingredients: Ingredient[];
-    images: Image[];
-    off_last_modified_t?: number;
-};
-
-const OFF_JSONL_GZ_URL = "https://static.openfoodfacts.org/data/openfoodfacts-products.jsonl.gz";
-const OUTPUT_PATH = "local/off_inventory_cache.jsonl";
-const BATCH_UPLOAD_SIZE = 5000; // Increased from 1000
-const BATCHES_PER_CONFIRMATION = 2; // Upload 2 batches (10k rows) before asking for confirmation
-const PARALLEL_BATCHES = 3; // Process multiple batches in parallel
-
-function mapIngredient(node: any): Ingredient {
-    const item: Ingredient = {
-        name: typeof node?.text === "string" ? node.text : undefined as unknown as string,
-        vegan: node?.vegan,
-        vegetarian: node?.vegetarian,
-        ingredients: [],
-    };
-    if (Array.isArray(node?.ingredients) && node.ingredients.length > 0) {
-        item.ingredients = node.ingredients.filter((x: any) => x && typeof x === "object").map(mapIngredient);
-    }
-    return item;
-}
-
-function extractDisplayImageUrls(selectedImages: any): Image[] {
-    if (!selectedImages || typeof selectedImages !== "object") return [];
-    const urls: Image[] = [];
-    try {
-        for (const value of Object.values(selectedImages as Record<string, any>)) {
-            const display = (value as any)?.display;
-            if (display && typeof display === "object") {
-                if (typeof display.en === "string" && display.en) {
-                    urls.push({ url: display.en });
-                } else {
-                    for (const v of Object.values(display)) {
-                        if (typeof v === "string" && v) {
-                            urls.push({ url: v });
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-    } catch (_) {
-        // ignore malformed structures
-    }
-    return urls;
-}
-
-function mapToCacheRow(product: any): CacheRow | null {
-    const dataSource = "openfoodfacts/v3";
-
-    let barcode: string | undefined;
-    const code = product?.code;
-    if (typeof code === "string" && code.trim()) {
-        barcode = code.trim();
-    } else if (typeof code === "number") {
-        barcode = String(code);
-    } else if (typeof product?._id === "string" && product._id.trim()) {
-        barcode = product._id.trim();
-    }
-    if (!barcode) return null;
-
-    let brand: string | undefined;
-    if (typeof product?.brand_owner === "string" && product.brand_owner.trim()) {
-        brand = product.brand_owner.trim();
-    } else if (typeof product?.brands === "string" && product.brands.trim()) {
-        brand = product.brands.split(",")[0]?.trim();
-    }
-
-    let name: string | undefined;
-    if (typeof product?.product_name === "string" && product.product_name.trim()) {
-        name = product.product_name.trim();
-    } else {
-        for (const [k, v] of Object.entries(product ?? {})) {
-            if (k.startsWith("product_name_") && typeof v === "string" && (v as string).trim()) {
-                name = (v as string).trim();
-                break;
-            }
-        }
-    }
-
-    let ingredients: Ingredient[] = [];
-    if (Array.isArray(product?.ingredients) && product.ingredients.length > 0) {
-        ingredients = product.ingredients.filter((x: any) => x && typeof x === "object").map(mapIngredient);
-    }
-
-    const images = extractDisplayImageUrls(product?.selected_images);
-    const off_last_modified_t = typeof product?.last_modified_t === "number" ? product.last_modified_t : undefined;
-
-    return {
-        barcode,
-        data_source: dataSource,
-        brand,
-        name,
-        ingredients,
-        images,
-        off_last_modified_t,
-    };
-}
-
-async function* iterLinesFromGzip(url: string): AsyncGenerator<string> {
-    console.log("📥 Downloading Open Food Facts data...");
-    const res = await fetch(url);
-    if (!res.body) throw new Error("No response body from OFF");
-    
-    const contentLength = res.headers.get("content-length");
-    const totalBytes = contentLength ? parseInt(contentLength) : 0;
-    console.log(`📦 File size: ${totalBytes > 0 ? formatBytes(totalBytes) : "unknown"}`);
-    
-    const decompressed = res.body.pipeThrough(new DecompressionStream("gzip"));
-    const textStream = decompressed.pipeThrough(new TextDecoderStream());
-    const reader = textStream.getReader();
-    let buf = "";
-    let lineCount = 0;
-    let lastProgressTime = Date.now();
-    
-    try {
-        while (true) {
-            const { value, done } = await reader.read();
-            if (done) break;
-            buf += value ?? "";
-            let idx: number;
-            while ((idx = buf.indexOf("\n")) !== -1) {
-                const line = buf.slice(0, idx);
-                buf = buf.slice(idx + 1);
-                lineCount++;
-                
-                // Show progress every 50k lines or every 10 seconds
-                const now = Date.now();
-                if (lineCount % 50000 === 0 || now - lastProgressTime > 10000) {
-                    console.log(`📊 Processed ${lineCount.toLocaleString()} products...`);
-                    lastProgressTime = now;
-                }
-                
-                yield line;
-            }
-        }
-        if (buf.length > 0) {
-            lineCount++;
-            yield buf;
-        }
-    } finally {
-        reader.releaseLock();
-    }
-    console.log(`✅ Download complete! Processed ${lineCount.toLocaleString()} products`);
-}
-
-async function writeJsonl(rows: AsyncIterable<{ row: CacheRow; stats: any }>, outPath: string): Promise<{ count: number; totalBytes: number; nonEmpty: { brand: number; name: number; ingredients: number; images: number }; validationStats: any }> {
-    console.log("💾 Writing transformed data to local file...");
-    const file = await Deno.open(outPath, { create: true, write: true, truncate: true });
-    const encoder = new TextEncoder();
-    let count = 0;
-    let totalBytes = 0;
-    let nonBrand = 0, nonName = 0, nonIng = 0, nonImg = 0;
-    let lastProgressTime = Date.now();
-    let lastStats: any = null;
-    
-    try {
-        for await (const { row, stats } of rows) {
-            count++;
-            if (row.brand) nonBrand++;
-            if (row.name) nonName++;
-            if (row.ingredients && row.ingredients.length) nonIng++;
-            if (row.images && row.images.length) nonImg++;
-            const json = JSON.stringify(row);
-            totalBytes += encoder.encode(json).byteLength;
-            await file.write(encoder.encode(json + "\n"));
-            lastStats = stats;
-            
-            // Show progress every 25k rows or every 5 seconds
-            const now = Date.now();
-            if (count % 25000 === 0 || now - lastProgressTime > 5000) {
-                const validRate = ((lastStats.validProducts / lastStats.totalLines) * 100).toFixed(1);
-                const invalidRate = (((lastStats.emptyLines + lastStats.jsonParseErrors + lastStats.noBarcode) / lastStats.totalLines) * 100).toFixed(1);
-                console.log(`📝 Written ${count.toLocaleString()} products (${validRate}% valid, ${invalidRate}% invalid)`);
-                lastProgressTime = now;
-            }
-        }
-    } finally {
-        file.close();
-    }
-    
-    // Final validation statistics
-    if (lastStats) {
-        console.log(`\n📊 Validation Statistics:`);
-        console.log(`  Total lines processed: ${lastStats.totalLines.toLocaleString()}`);
-        console.log(`  Valid products: ${lastStats.validProducts.toLocaleString()} (${((lastStats.validProducts / lastStats.totalLines) * 100).toFixed(1)}%)`);
-        console.log(`  Invalid products: ${(lastStats.emptyLines + lastStats.jsonParseErrors + lastStats.noBarcode).toLocaleString()} (${(((lastStats.emptyLines + lastStats.jsonParseErrors + lastStats.noBarcode) / lastStats.totalLines) * 100).toFixed(1)}%)`);
-        console.log(`    - Empty lines: ${lastStats.emptyLines.toLocaleString()}`);
-        console.log(`    - JSON parse errors: ${lastStats.jsonParseErrors.toLocaleString()}`);
-        console.log(`    - No barcode: ${lastStats.noBarcode.toLocaleString()}`);
-    }
-    
-    console.log(`✅ Local file complete! ${count.toLocaleString()} products written`);
-    return { count, totalBytes, nonEmpty: { brand: nonBrand, name: nonName, ingredients: nonIng, images: nonImg }, validationStats: lastStats };
-}
-
-async function* projectRows(lines: AsyncIterable<string>): AsyncGenerator<{ row: CacheRow; stats: { totalLines: number; emptyLines: number; jsonParseErrors: number; noBarcode: number; validProducts: number } }> {
-    let totalLines = 0;
-    let emptyLines = 0;
-    let jsonParseErrors = 0;
-    let noBarcode = 0;
-    let validProducts = 0;
-    
-    for await (const line of lines) {
-        totalLines++;
-        const trimmed = line.trim();
-        if (!trimmed) {
-            emptyLines++;
-            continue;
-        }
-        
-        try {
-            const product = JSON.parse(trimmed);
-            const row = mapToCacheRow(product);
-            if (row && row.barcode) {
-                validProducts++;
-                yield { row, stats: { totalLines, emptyLines, jsonParseErrors, noBarcode, validProducts } };
-            } else {
-                noBarcode++;
-            }
-        } catch (_) {
-            jsonParseErrors++;
-        }
-    }
-}
-
-function formatBytes(bytes: number): string {
-    const units = ["B", "KB", "MB", "GB", "TB"] as const;
-    let i = 0;
-    let n = bytes;
-    while (n >= 1024 && i < units.length - 1) {
-        n /= 1024;
-        i++;
-    }
-    return `${n.toFixed(2)} ${units[i]}`;
-}
-
-async function askUploadPermission(stats: { count: number; totalBytes: number }): Promise<boolean> {
-    console.log("\nSummary:");
-    console.log(`  Rows: ${stats.count}`);
-    console.log(`  Payload size (JSON only): ${formatBytes(stats.totalBytes)} (~${Math.round(stats.totalBytes / Math.max(1, stats.count))} B/row)`);
-    const answer = confirm("Upload to Supabase inventory_cache? This can take a long time. (y/N)");
-    return !!answer;
-}
-
-async function uploadBatch(supabase: any, batch: any[], batchNumber: number): Promise<void> {
-    const { error } = await supabase.from("inventory_cache").upsert(batch, { onConflict: "barcode" });
-    if (error) throw error;
-    console.log(`✅ Uploaded batch ${batchNumber} (${batch.length} rows)`);
-}
-
-async function uploadJsonlToSupabase(path: string) {
-    const url = Deno.env.get("SUPABASE_URL") ?? "";
-    const key = Deno.env.get("SUPABASE_SECRET_KEY") ?? Deno.env.get("SUPABASE_SERVICE_ROLE_KEY") ?? "";
-    if (!url || !key) throw new Error("SUPABASE_URL and SUPABASE_SECRET_KEY (or SUPABASE_SERVICE_ROLE_KEY for legacy) must be set in environment");
-    const supabase = createClient(url, key, { auth: { persistSession: false } });
-
-    const file = await Deno.open(path, { read: true });
-    const decoder = new TextDecoder();
-    const bufSize = 2 * 1024 * 1024; // Increased buffer size
-    const buf = new Uint8Array(bufSize);
-    let pending: any[] = [];
-    let leftover = "";
-    let total = 0;
-    let batchCount = 0;
-    let uploadedBatches = 0;
-    let pendingUploads: Promise<void>[] = [];
-    
-    try {
-        while (true) {
-            const read = await file.read(buf);
-            if (read === null) break;
-            const chunk = decoder.decode(buf.subarray(0, read));
-            let data = leftover + chunk;
-            let idx: number;
-            while ((idx = data.indexOf("\n")) !== -1) {
-                const line = data.slice(0, idx);
-                data = data.slice(idx + 1);
-                if (!line) continue;
-                try {
-                    const row = JSON.parse(line);
-                    // Set last_refreshed_at during upsert
-                    row.last_refreshed_at = new Date().toISOString();
-                    pending.push(row);
-                    total++;
-                    
-                    if (pending.length >= BATCH_UPLOAD_SIZE) {
-                        // Start parallel upload
-                        const batchToUpload = [...pending];
-                        batchCount++;
-                        const uploadPromise = uploadBatch(supabase, batchToUpload, batchCount);
-                        pendingUploads.push(uploadPromise);
-                        
-                        // Wait for uploads if we have too many pending
-                        if (pendingUploads.length >= PARALLEL_BATCHES) {
-                            await Promise.all(pendingUploads);
-                            pendingUploads = [];
-                        }
-                        
-                        uploadedBatches++;
-                        
-                        // Check if we need confirmation
-                        if (uploadedBatches >= BATCHES_PER_CONFIRMATION) {
-                            // Wait for all pending uploads before asking
-                            if (pendingUploads.length > 0) {
-                                await Promise.all(pendingUploads);
-                                pendingUploads = [];
-                            }
-                            console.log(`\n📊 Progress: ${total} rows uploaded in ${batchCount} batches`);
-                            const continueUpload = confirm(`Continue uploading? (${total} rows uploaded so far) [y/N]`);
-                            if (!continueUpload) {
-                                console.log("❌ Upload cancelled by user");
-                                return;
-                            }
-                            uploadedBatches = 0; // Reset counter
-                        }
-                        
-                        pending = [];
-                    }
-                } catch (_) {
-                    // skip invalid JSON
-                }
-            }
-            leftover = data;
-        }
-        
-        // Handle remaining data
-        if (leftover.trim()) {
-            try {
-                const row = JSON.parse(leftover.trim());
-                row.last_refreshed_at = new Date().toISOString();
-                pending.push(row);
-                total++;
-            } catch (_) {}
-        }
-        
-        // Upload final batch if any
-        if (pending.length) {
-            batchCount++;
-            const uploadPromise = uploadBatch(supabase, pending, batchCount);
-            pendingUploads.push(uploadPromise);
-        }
-        
-        // Wait for all remaining uploads
-        if (pendingUploads.length > 0) {
-            await Promise.all(pendingUploads);
-        }
-    } finally {
-        file.close();
-    }
-    
-    console.log(`\n🎉 Upload complete! Total: ${total} rows uploaded in ${batchCount} batches`);
-}
-
-async function main() {
-    // Load environment variables from .env file
-    await loadEnv();
-    
-    console.log("Downloading OFF JSONL.gz and streaming transform...");
-    const rows = projectRows(iterLinesFromGzip(OFF_JSONL_GZ_URL));
-    const stats = await writeJsonl(rows, OUTPUT_PATH);
-    console.log("\nField coverage (non-empty counts):");
-    console.log(`  brand: ${stats.nonEmpty.brand}`);
-    console.log(`  name: ${stats.nonEmpty.name}`);
-    console.log(`  ingredients: ${stats.nonEmpty.ingredients}`);
-    console.log(`  images: ${stats.nonEmpty.images}`);
-    
-    // Show validation summary
-    if (stats.validationStats) {
-        console.log("\n📊 Data Quality Summary:");
-        const validRate = ((stats.validationStats.validProducts / stats.validationStats.totalLines) * 100).toFixed(1);
-        console.log(`  Success rate: ${validRate}% (${stats.validationStats.validProducts.toLocaleString()} valid out of ${stats.validationStats.totalLines.toLocaleString()} total)`);
-        console.log(`  Invalid breakdown:`);
-        console.log(`    - Empty lines: ${stats.validationStats.emptyLines.toLocaleString()}`);
-        console.log(`    - JSON parse errors: ${stats.validationStats.jsonParseErrors.toLocaleString()}`);
-        console.log(`    - No barcode: ${stats.validationStats.noBarcode.toLocaleString()}`);
-    }
-    
-    const proceed = await askUploadPermission(stats);
-    if (!proceed) {
-        console.log("Upload skipped.");
-        return;
-    }
-    console.log("\nUploading to Supabase (batched upserts)...");
-    const start = Date.now();
-    await uploadJsonlToSupabase(OUTPUT_PATH);
-    const elapsed = (Date.now() - start) / 1000;
-    console.log(`Done in ${elapsed.toFixed(1)}s.`);
-}
-
-if (import.meta.main) {
-    await main().catch((err) => {
-        console.error("Error:", err);
-        Deno.exit(1);
-    });
-}
-
-
diff --git a/local/openfoodfacts/off_ingest.ts b/local/openfoodfacts/off_ingest.ts
new file mode 100644
index 0000000..0cd8263
--- /dev/null
+++ b/local/openfoodfacts/off_ingest.ts
@@ -0,0 +1,826 @@
+// deno run -A --unstable-kv local/openfoodfacts/off_ingest.ts
+// Environment: Copy .env.template to .env and fill in your values
+// Performance: Use --unstable-kv for better memory management
+
+// Load environment variables from .env file
+async function loadEnv() {
+    try {
+        const envText = await Deno.readTextFile("local/.env");
+        const lines = envText.split("\n");
+        for (const line of lines) {
+            const trimmed = line.trim();
+            if (trimmed && !trimmed.startsWith("#")) {
+                const [key, ...valueParts] = trimmed.split("=");
+                if (key && valueParts.length > 0) {
+                    const value = valueParts.join("=").trim();
+                    Deno.env.set(key.trim(), value);
+                }
+            }
+        }
+    } catch (error) {
+        console.warn("⚠️  Could not load .env file:", error.message);
+        console.warn("   Make sure to copy .env.template to .env and fill in your values");
+    }
+}
+
+type Ingredient = {
+    name: string;
+    vegan?: boolean;
+    vegetarian?: boolean;
+    ingredients?: Ingredient[];
+};
+
+type Image = { 
+    url: string; 
+    resolution?: string; 
+    width?: number; 
+    height?: number; 
+};
+
+type CacheRow = {
+    barcode: string;
+    data_source: string;
+    brand?: string;
+    name?: string;
+    ingredients: Ingredient[];
+    images: Image[];
+    off_last_modified_t?: number;
+};
+
+const OFF_JSONL_GZ_URL = "https://static.openfoodfacts.org/data/openfoodfacts-products.jsonl.gz";
+const OUTPUT_PATH = "local/off_inventory_cache.jsonl";
+const BATCH_UPLOAD_SIZE = 1000; // Products per batch
+const BATCHES_PER_CONFIRMATION = 500; // Upload 500 batches (500k rows) before asking for confirmation
+const PARALLEL_BATCHES = 10; // Upload 10 batches in parallel = 10,000 products per batch group
+const DELAY_BETWEEN_BATCH_GROUPS_MS = 500; // 500ms delay between batch groups to avoid rate limiting
+const SAMPLE_SIZE = 10000; // Sample size for size estimation
+const SAMPLE_LINES = 100000; // Only process first 100k lines for sampling
+
+function mapIngredient(node: any): Ingredient {
+    const item: Ingredient = {
+        name: typeof node?.text === "string" ? node.text : undefined as unknown as string,
+        vegan: node?.vegan,
+        vegetarian: node?.vegetarian,
+        ingredients: [],
+    };
+    if (Array.isArray(node?.ingredients) && node.ingredients.length > 0) {
+        item.ingredients = node.ingredients.filter((x: any) => x && typeof x === "object").map(mapIngredient);
+    }
+    return item;
+}
+
+function isValidImageUrl(url: string): boolean {
+    try {
+        const parsedUrl = new URL(url);
+        return parsedUrl.protocol === 'https:' && 
+               parsedUrl.hostname === 'static.openfoodfacts.org' &&
+               url.includes('/images/products/') &&
+               url.endsWith('.jpg');
+    } catch {
+        return false;
+    }
+}
+
+function extractDisplayImageUrls(images: any): Image[] {
+    if (!images || typeof images !== "object") {
+        return [];
+    }
+    
+    const urls: Image[] = [];
+    const processedImages = new Set<string>(); // Track processed image IDs to avoid duplicates
+    
+    try {
+        // Open Food Facts image structure: images contains both numeric keys (1,2,3,4) and language-specific keys (front_en, ingredients_fr, etc.)
+        // Language-specific keys reference numeric images via imgid
+        
+        // First, collect all language-specific front images
+        const languageKeys = ['front_en', 'front_fr', 'front_de', 'front_es', 'front_it', 'front_pt', 'front_nl', 'front_sv', 'front_da', 'front_no', 'front_fi'];
+        
+        for (const langKey of languageKeys) {
+            const imageRef = images[langKey];
+            if (imageRef && typeof imageRef === "object" && imageRef.imgid) {
+                const imgId = imageRef.imgid;
+                if (processedImages.has(imgId)) continue; // Skip if already processed
+                
+                const imageData = images[imgId];
+                if (imageData && typeof imageData === "object" && imageData.sizes) {
+                    const sizes = imageData.sizes;
+                    
+                    // Collect all available sizes for this image, grouped by resolution
+                    const imageUrls: { url: string; resolution: string; width: number; height: number }[] = [];
+                    
+                    if (sizes.full && sizes.full.w && sizes.full.h) {
+                        const url = `https://static.openfoodfacts.org/images/products/${imgId}/front.${sizes.full.w}x${sizes.full.h}.jpg`;
+                        if (isValidImageUrl(url)) {
+                            imageUrls.push({ url, resolution: 'full', width: sizes.full.w, height: sizes.full.h });
+                        }
+                    }
+                    
+                    if (sizes["400"] && sizes["400"].w && sizes["400"].h) {
+                        const url = `https://static.openfoodfacts.org/images/products/${imgId}/front.400x${sizes["400"].h}.jpg`;
+                        if (isValidImageUrl(url)) {
+                            imageUrls.push({ url, resolution: '400px', width: sizes["400"].w, height: sizes["400"].h });
+                        }
+                    }
+                    
+                    if (sizes["200"] && sizes["200"].w && sizes["200"].h) {
+                        const url = `https://static.openfoodfacts.org/images/products/${imgId}/front.200x${sizes["200"].h}.jpg`;
+                        if (isValidImageUrl(url)) {
+                            imageUrls.push({ url, resolution: '200px', width: sizes["200"].w, height: sizes["200"].h });
+                        }
+                    }
+                    
+                    if (sizes["100"] && sizes["100"].w && sizes["100"].h) {
+                        const url = `https://static.openfoodfacts.org/images/products/${imgId}/front.100x${sizes["100"].h}.jpg`;
+                        if (isValidImageUrl(url)) {
+                            imageUrls.push({ url, resolution: '100px', width: sizes["100"].w, height: sizes["100"].h });
+                        }
+                    }
+                    
+                    // Add all valid URLs for this image
+                    for (const img of imageUrls) {
+                        urls.push({ 
+                            url: img.url,
+                            resolution: img.resolution,
+                            width: img.width,
+                            height: img.height
+                        });
+                    }
+                    
+                    processedImages.add(imgId);
+                }
+            }
+        }
+        
+        // If no language-specific front images found, try any numeric image
+        if (urls.length === 0) {
+            for (const [key, imageData] of Object.entries(images)) {
+                if (/^\d+$/.test(key) && imageData && typeof imageData === "object" && imageData.sizes) {
+                    const sizes = imageData.sizes;
+                    
+                    // Collect all available sizes for this image
+                    if (sizes.full && sizes.full.w && sizes.full.h) {
+                        const url = `https://static.openfoodfacts.org/images/products/${key}/front.${sizes.full.w}x${sizes.full.h}.jpg`;
+                        if (isValidImageUrl(url)) {
+                            urls.push({ 
+                                url, 
+                                resolution: 'full', 
+                                width: sizes.full.w, 
+                                height: sizes.full.h 
+                            });
+                        }
+                    }
+                    
+                    if (sizes["400"] && sizes["400"].w && sizes["400"].h) {
+                        const url = `https://static.openfoodfacts.org/images/products/${key}/front.400x${sizes["400"].h}.jpg`;
+                        if (isValidImageUrl(url)) {
+                            urls.push({ 
+                                url, 
+                                resolution: '400px', 
+                                width: sizes["400"].w, 
+                                height: sizes["400"].h 
+                            });
+                        }
+                    }
+                    
+                    if (urls.length > 0) break; // Stop after finding the first valid image
+                }
+            }
+        }
+    } catch (error) {
+        // ignore malformed structures
+    }
+    
+    return urls;
+}
+
+function mapToCacheRow(product: any): CacheRow | null {
+    const dataSource = "openfoodfacts/v3";
+
+    let barcode: string | undefined;
+    const code = product?.code;
+    if (typeof code === "string" && code.trim()) {
+        barcode = code.trim();
+    } else if (typeof code === "number") {
+        barcode = String(code);
+    } else if (typeof product?._id === "string" && product._id.trim()) {
+        barcode = product._id.trim();
+    }
+    if (!barcode) return null;
+
+    let brand: string | undefined;
+    if (typeof product?.brand_owner === "string" && product.brand_owner.trim()) {
+        brand = product.brand_owner.trim();
+    } else if (typeof product?.brands === "string" && product.brands.trim()) {
+        brand = product.brands.split(",")[0]?.trim();
+    }
+
+    let name: string | undefined;
+    if (typeof product?.product_name === "string" && product.product_name.trim()) {
+        name = product.product_name.trim();
+    } else {
+        for (const [k, v] of Object.entries(product ?? {})) {
+            if (k.startsWith("product_name_") && typeof v === "string" && (v as string).trim()) {
+                name = (v as string).trim();
+                break;
+            }
+        }
+    }
+
+    let ingredients: Ingredient[] = [];
+    if (Array.isArray(product?.ingredients) && product.ingredients.length > 0) {
+        ingredients = product.ingredients.filter((x: any) => x && typeof x === "object").map(mapIngredient);
+    }
+
+    const images = extractDisplayImageUrls(product?.images);
+    const off_last_modified_t = typeof product?.last_modified_t === "number" ? product.last_modified_t : undefined;
+    
+    
+
+    return {
+        barcode,
+        data_source: dataSource,
+        brand,
+        name,
+        ingredients,
+        images,
+        off_last_modified_t,
+    };
+}
+
+async function* iterLinesFromGzip(url: string, showProgress: boolean = true): AsyncGenerator<string> {
+    if (showProgress) {
+        console.log("📥 Downloading Open Food Facts data...");
+    }
+    const res = await fetch(url);
+    if (!res.body) throw new Error("No response body from OFF");
+    
+    const contentLength = res.headers.get("content-length");
+    const totalBytes = contentLength ? parseInt(contentLength) : 0;
+    if (showProgress) {
+        console.log(`📦 File size: ${totalBytes > 0 ? formatBytes(totalBytes) : "unknown"}`);
+    }
+    
+    const decompressed = res.body.pipeThrough(new DecompressionStream("gzip"));
+    const textStream = decompressed.pipeThrough(new TextDecoderStream());
+    const reader = textStream.getReader();
+    let buf = "";
+    let lineCount = 0;
+    let lastProgressTime = Date.now();
+    
+    try {
+        while (true) {
+            const { value, done } = await reader.read();
+            if (done) break;
+            buf += value ?? "";
+            let idx: number;
+            while ((idx = buf.indexOf("\n")) !== -1) {
+                const line = buf.slice(0, idx);
+                buf = buf.slice(idx + 1);
+                lineCount++;
+                
+                // Show progress every 50k lines or every 10 seconds (only if showProgress is true)
+                if (showProgress) {
+                    const now = Date.now();
+                    if (lineCount % 50000 === 0 || now - lastProgressTime > 10000) {
+                        console.log(`📊 Processed ${lineCount.toLocaleString()} products...`);
+                        lastProgressTime = now;
+                    }
+                }
+                
+                yield line;
+            }
+        }
+        if (buf.length > 0) {
+            lineCount++;
+            yield buf;
+        }
+    } finally {
+        reader.releaseLock();
+    }
+    if (showProgress) {
+        console.log(`✅ Download complete! Processed ${lineCount.toLocaleString()} products`);
+    }
+}
+
+async function writeJsonl(rows: AsyncIterable<{ row: CacheRow; stats: any }>, outPath: string): Promise<{ count: number; totalBytes: number; nonEmpty: { brand: number; name: number; ingredients: number; images: number }; validationStats: any }> {
+    console.log("💾 Writing transformed data to local file...");
+    const file = await Deno.open(outPath, { create: true, write: true, truncate: true });
+    const encoder = new TextEncoder();
+    let count = 0;
+    let totalBytes = 0;
+    let nonBrand = 0, nonName = 0, nonIng = 0, nonImg = 0;
+    let lastProgressTime = Date.now();
+    let lastStats: any = null;
+    
+    try {
+        for await (const { row, stats } of rows) {
+            count++;
+            if (row.brand) nonBrand++;
+            if (row.name) nonName++;
+            if (row.ingredients && row.ingredients.length) nonIng++;
+            if (row.images && row.images.length) nonImg++;
+            const json = JSON.stringify(row);
+            totalBytes += encoder.encode(json).byteLength;
+            await file.write(encoder.encode(json + "\n"));
+            lastStats = stats;
+            
+            // Show progress every 25k rows or every 5 seconds
+            const now = Date.now();
+            if (count % 25000 === 0 || now - lastProgressTime > 5000) {
+                const validRate = ((lastStats.validProducts / lastStats.totalLines) * 100).toFixed(1);
+                const invalidRate = (((lastStats.emptyLines + lastStats.jsonParseErrors + lastStats.noBarcode) / lastStats.totalLines) * 100).toFixed(1);
+                console.log(`📝 Written ${count.toLocaleString()} products (${validRate}% valid, ${invalidRate}% invalid)`);
+                lastProgressTime = now;
+            }
+        }
+    } finally {
+        file.close();
+    }
+    
+    // Final validation statistics
+    if (lastStats) {
+        console.log(`\n📊 Validation Statistics:`);
+        console.log(`  Total lines processed: ${lastStats.totalLines.toLocaleString()}`);
+        console.log(`  Valid products: ${lastStats.validProducts.toLocaleString()} (${((lastStats.validProducts / lastStats.totalLines) * 100).toFixed(1)}%)`);
+        console.log(`  Invalid products: ${(lastStats.emptyLines + lastStats.jsonParseErrors + lastStats.noBarcode).toLocaleString()} (${(((lastStats.emptyLines + lastStats.jsonParseErrors + lastStats.noBarcode) / lastStats.totalLines) * 100).toFixed(1)}%)`);
+        console.log(`    - Empty lines: ${lastStats.emptyLines.toLocaleString()}`);
+        console.log(`    - JSON parse errors: ${lastStats.jsonParseErrors.toLocaleString()}`);
+        console.log(`    - No barcode: ${lastStats.noBarcode.toLocaleString()}`);
+    }
+    
+    console.log(`✅ Local file complete! ${count.toLocaleString()} products written`);
+    return { count, totalBytes, nonEmpty: { brand: nonBrand, name: nonName, ingredients: nonIng, images: nonImg }, validationStats: lastStats };
+}
+
+async function* projectRows(lines: AsyncIterable<string>): AsyncGenerator<{ row: CacheRow; stats: { totalLines: number; emptyLines: number; jsonParseErrors: number; noBarcode: number; validProducts: number } }> {
+    let totalLines = 0;
+    let emptyLines = 0;
+    let jsonParseErrors = 0;
+    let noBarcode = 0;
+    let validProducts = 0;
+    
+    for await (const line of lines) {
+        totalLines++;
+        
+        const trimmed = line.trim();
+        if (!trimmed) {
+            emptyLines++;
+            continue;
+        }
+        
+        try {
+            const product = JSON.parse(trimmed);
+            const row = mapToCacheRow(product);
+            if (row && row.barcode) {
+                validProducts++;
+                
+                // Progress reporting every 50k products
+                if (validProducts % 50000 === 0) {
+                    const validRate = ((validProducts / totalLines) * 100).toFixed(1);
+                    const invalidRate = (((emptyLines + jsonParseErrors + noBarcode) / totalLines) * 100).toFixed(1);
+                    console.log(`📊 Processed ${totalLines.toLocaleString()} lines, found ${validProducts.toLocaleString()} valid products (${validRate}% valid, ${invalidRate}% invalid)...`);
+                }
+                
+                yield { row, stats: { totalLines, emptyLines, jsonParseErrors, noBarcode, validProducts } };
+            } else {
+                noBarcode++;
+            }
+        } catch (_) {
+            jsonParseErrors++;
+        }
+    }
+}
+
+function formatBytes(bytes: number): string {
+    const units = ["B", "KB", "MB", "GB", "TB"] as const;
+    let i = 0;
+    let n = bytes;
+    while (n >= 1024 && i < units.length - 1) {
+        n /= 1024;
+        i++;
+    }
+    return `${n.toFixed(2)} ${units[i]}`;
+}
+
+function estimateDatabaseSize(sampleRows: CacheRow[], totalProducts: number): {
+    avgRowSize: number;
+    estimatedTableSize: number;
+    estimatedIndexSize: number;
+    estimatedTotalSize: number;
+} {
+    if (sampleRows.length === 0) {
+        return { avgRowSize: 0, estimatedTableSize: 0, estimatedIndexSize: 0, estimatedTotalSize: 0 };
+    }
+    
+    // Calculate average row size from sample
+    const sampleSizes = sampleRows.map(row => {
+        const json = JSON.stringify(row);
+        return new TextEncoder().encode(json).byteLength;
+    });
+    
+    const avgRowSize = sampleSizes.reduce((sum, size) => sum + size, 0) / sampleSizes.length;
+    
+    // Estimate table size (data only)
+    const estimatedTableSize = avgRowSize * totalProducts;
+    
+    // Estimate index size (barcode PK + other indexes)
+    // Barcode index: ~20 bytes per row + overhead
+    const barcodeIndexSize = (20 + 8) * totalProducts; // 20 bytes key + 8 bytes pointer
+    const otherIndexSize = totalProducts * 16; // Additional indexes
+    const estimatedIndexSize = barcodeIndexSize + otherIndexSize;
+    
+    // PostgreSQL overhead (20-30% for metadata, TOAST, etc.)
+    const overhead = 0.25;
+    const estimatedTotalSize = (estimatedTableSize + estimatedIndexSize) * (1 + overhead);
+    
+    return {
+        avgRowSize: Math.round(avgRowSize),
+        estimatedTableSize: Math.round(estimatedTableSize),
+        estimatedIndexSize: Math.round(estimatedIndexSize),
+        estimatedTotalSize: Math.round(estimatedTotalSize)
+    };
+}
+
+async function sampleProductsForSizeEstimation(lines: AsyncIterable<string>, sampleSize: number, maxLines: number): Promise<{ sampleRows: CacheRow[]; estimatedTotalValidProducts: number; sampleLines: number }> {
+    console.log(`📊 Sampling ${sampleSize.toLocaleString()} products from first ${maxLines.toLocaleString()} lines for size estimation...`);
+    const sampleRows: CacheRow[] = [];
+    let sampleLines = 0;
+    let validProductsInSample = 0;
+    let sampled = 0;
+    let lastProgressTime = Date.now();
+    
+    for await (const line of lines) {
+        sampleLines++;
+        const trimmed = line.trim();
+        if (!trimmed) continue;
+        
+        try {
+            const product = JSON.parse(trimmed);
+            const row = mapToCacheRow(product);
+            if (row && row.barcode) {
+                validProductsInSample++;
+                if (sampled < sampleSize) {
+                    sampleRows.push(row);
+                    sampled++;
+                }
+            }
+        } catch (_) {
+            // skip invalid lines
+        }
+        
+        // Show progress every 10k lines or every 5 seconds
+        const now = Date.now();
+        if (sampleLines % 10000 === 0 || now - lastProgressTime > 5000) {
+            console.log(`📊 Scanned ${sampleLines.toLocaleString()} lines, found ${validProductsInSample.toLocaleString()} valid products...`);
+            lastProgressTime = now;
+        }
+        
+        // Stop after processing maxLines
+        if (sampleLines >= maxLines) break;
+    }
+    
+    // Estimate total valid products based on sample ratio
+    const validRatio = validProductsInSample / sampleLines;
+    const estimatedTotalValidProducts = Math.round(validRatio * 4046118); // Known total lines from earlier
+    
+    console.log(`✅ Sampled ${sampled.toLocaleString()} products from ${validProductsInSample.toLocaleString()} valid products in ${sampleLines.toLocaleString()} lines`);
+    console.log(`📊 Estimated total valid products: ${estimatedTotalValidProducts.toLocaleString()} (${(validRatio * 100).toFixed(1)}% valid rate)`);
+    
+    return { sampleRows, estimatedTotalValidProducts, sampleLines };
+}
+
+async function askUploadPermission(stats: { count: number; totalBytes: number; dbEstimate?: any }): Promise<boolean> {
+    console.log("\nSummary:");
+    console.log(`  Rows: ${stats.count}`);
+    console.log(`  Payload size (JSON only): ${formatBytes(stats.totalBytes)} (~${Math.round(stats.totalBytes / Math.max(1, stats.count))} B/row)`);
+    
+    if (stats.dbEstimate) {
+        console.log("\n📊 Database Size Estimate:");
+        console.log(`  Average row size: ${formatBytes(stats.dbEstimate.avgRowSize)}`);
+        console.log(`  Table data size: ${formatBytes(stats.dbEstimate.estimatedTableSize)}`);
+        console.log(`  Index size: ${formatBytes(stats.dbEstimate.estimatedIndexSize)}`);
+        console.log(`  Total estimated size: ${formatBytes(stats.dbEstimate.estimatedTotalSize)}`);
+    }
+    
+    const answer = confirm("Upload to Supabase inventory_cache? This can take a long time. (y/N)");
+    return !!answer;
+}
+
+
+async function uploadJsonlToSupabase(path: string) {
+    console.log("📤 Starting upload to Supabase...");
+    
+    // Load env for Supabase connection
+    const url = Deno.env.get("SUPABASE_URL") ?? "";
+    const key = Deno.env.get("SUPABASE_SECRET_KEY") ?? Deno.env.get("SUPABASE_SERVICE_ROLE_KEY") ?? "";
+    
+    if (!url || !key) {
+        console.error("❌ SUPABASE_URL and SUPABASE_SECRET_KEY must be set");
+        Deno.exit(1);
+    }
+    
+    const { createClient } = await import("npm:@supabase/supabase-js@2.39.3");
+    const supabase = createClient(url, key, { auth: { persistSession: false } });
+
+    console.log("🔍 Opening file:", path);
+    const file = await Deno.open(path, { read: true });
+    console.log("✅ File opened successfully");
+
+    const decoder = new TextDecoder();
+    const bufSize = 64 * 1024;
+    const buf = new Uint8Array(bufSize);
+    let pending: any[] = [];
+    let leftover = "";
+    let total = 0;
+    let batchCount = 0;
+    let uploadedBatches = 0;
+    let parallelUploads: Promise<void>[] = []; // Track parallel uploads
+    
+    // Progress tracking
+    const startTime = Date.now();
+    const progressInterval = setInterval(() => {
+        const elapsed = (Date.now() - startTime) / 1000;
+        const rate = total / elapsed;
+        console.log(`📊 Progress: ${total} products processed, ${batchCount} batches uploaded (${rate.toFixed(0)} products/sec)`);
+    }, 10000); // Every 10 seconds
+    
+    try {
+        while (true) {
+            const read = await file.read(buf);
+            if (read === null) {
+                break;
+            }
+
+            const chunk = decoder.decode(buf.subarray(0, read));
+            let data = leftover + chunk;
+            let idx: number;
+            let linesInChunk = 0;
+            while ((idx = data.indexOf("\n")) !== -1) {
+                linesInChunk++;
+                const line = data.slice(0, idx);
+                data = data.slice(idx + 1);
+                if (!line) continue;
+
+                try {
+                    const row = JSON.parse(line);
+                    row.last_refreshed_at = new Date().toISOString();
+                    pending.push(row);
+                    total++;
+
+                    if (pending.length >= BATCH_UPLOAD_SIZE) {
+                        batchCount++;
+                        const currentBatchNum = batchCount;
+                        
+                        // Deduplicate by barcode (keep last occurrence) to avoid "ON CONFLICT DO UPDATE command cannot affect row a second time" error
+                        const deduped = new Map<string, any>();
+                        for (const row of pending) {
+                            deduped.set(row.barcode, row);
+                        }
+                        
+                        // Create batch for upload
+                        const batch = Array.from(deduped.values()).map(row => ({
+                            ...row,
+                            last_refreshed_at: new Date().toISOString()
+                        }));
+                        
+                        // Log if duplicates were found
+                        const duplicateCount = pending.length - batch.length;
+                        if (duplicateCount > 0) {
+                            console.log(`⚠️  Removed ${duplicateCount} duplicate barcodes from batch ${currentBatchNum}`);
+                        }
+                        
+                        // Clear pending immediately
+                        pending = [];
+                        uploadedBatches++;
+                        
+                        // Wait if we've reached the parallel limit BEFORE creating new promise
+                        if (parallelUploads.length >= PARALLEL_BATCHES) {
+                            console.log(`⏳ Waiting for ${parallelUploads.length} parallel uploads to complete...`);
+                            await Promise.all(parallelUploads);
+                            parallelUploads = [];
+                            console.log(`✅ Completed batch group at ${batchCount} batches`);
+                            
+                            // Add delay to avoid rate limiting
+                            if (DELAY_BETWEEN_BATCH_GROUPS_MS > 0) {
+                                await new Promise(resolve => setTimeout(resolve, DELAY_BETWEEN_BATCH_GROUPS_MS));
+                            }
+                        }
+                        
+                        // Now create and add the upload promise
+                        const uploadPromise = (async () => {
+                            try {
+                                const { error } = await supabase
+                                    .from('inventory_cache')
+                                    .upsert(batch, { onConflict: 'barcode' });
+                                
+                                if (error) {
+                                    console.error(`❌ Batch ${currentBatchNum} failed:`, error.message);
+                                    throw error;
+                                }
+                                
+                                // Only log every 10 batches
+                                if (currentBatchNum % 10 === 0) {
+                                    console.log(`✅ Uploaded batch ${currentBatchNum} (${batch.length} rows)`);
+                                }
+                            } catch (error) {
+                                console.error(`❌ Upload error at batch ${currentBatchNum}:`, error);
+                                throw error;
+                            }
+                        })();
+                        
+                        parallelUploads.push(uploadPromise);
+
+                        // Check if we need confirmation (every 500k products)
+                        if (uploadedBatches >= BATCHES_PER_CONFIRMATION) {
+                            // Wait for any pending uploads before asking
+                            if (parallelUploads.length > 0) {
+                                await Promise.all(parallelUploads);
+                                parallelUploads = [];
+                            }
+                            
+                            console.log(`\n📊 Checkpoint: ${total.toLocaleString()} products uploaded in ${batchCount} batches`);
+                            console.log(`   (Uploaded ${(uploadedBatches * BATCH_UPLOAD_SIZE).toLocaleString()} products since last checkpoint)`);
+                            const continueUpload = confirm(`Continue uploading next 500k products? [y/N]`);
+                            if (!continueUpload) {
+                                console.log("❌ Upload cancelled by user");
+                                return;
+                            }
+                            uploadedBatches = 0;
+                            console.log("✅ Continuing upload...\n");
+                        }
+                    }
+                } catch (error) {
+                    console.error(`❌ JSON parse error on line:`, error.message);
+                    // skip invalid JSON
+                }
+            }
+            leftover = data;
+        }
+        
+        // Handle remaining data
+        if (leftover.trim()) {
+            try {
+                const row = JSON.parse(leftover.trim());
+                row.last_refreshed_at = new Date().toISOString();
+                pending.push(row);
+                total++;
+            } catch (_) {
+                // skip invalid JSON
+            }
+        }
+        
+        // Upload final batch if there's pending data
+        if (pending.length > 0) {
+            batchCount++;
+            const currentBatchNum = batchCount;
+            
+            // Deduplicate by barcode (keep last occurrence)
+            const deduped = new Map<string, any>();
+            for (const row of pending) {
+                deduped.set(row.barcode, row);
+            }
+            
+            const batch = Array.from(deduped.values()).map(row => ({
+                ...row,
+                last_refreshed_at: new Date().toISOString()
+            }));
+            
+            const duplicateCount = pending.length - batch.length;
+            if (duplicateCount > 0) {
+                console.log(`⚠️  Removed ${duplicateCount} duplicate barcodes from final batch ${currentBatchNum}`);
+            }
+            
+            const uploadPromise = (async () => {
+                try {
+                    const { error } = await supabase
+                        .from('inventory_cache')
+                        .upsert(batch, { onConflict: 'barcode' });
+                    
+                    if (error) {
+                        console.error(`❌ Final batch ${currentBatchNum} failed:`, error.message);
+                        throw error;
+                    }
+                    
+                    console.log(`✅ Uploaded final batch ${currentBatchNum} (${batch.length} rows)`);
+                } catch (error) {
+                    console.error(`❌ Upload error at final batch ${currentBatchNum}:`, error);
+                    throw error;
+                }
+            })();
+            
+            parallelUploads.push(uploadPromise);
+        }
+        
+        // Wait for all remaining parallel uploads to complete
+        if (parallelUploads.length > 0) {
+            console.log(`⏳ Waiting for final ${parallelUploads.length} uploads to complete...`);
+            await Promise.all(parallelUploads);
+            console.log(`✅ All uploads completed!`);
+        }
+        
+        console.log(`✅ Upload complete! ${total} rows processed in ${batchCount} batches`);
+        
+    } finally {
+        file.close();
+        clearInterval(progressInterval);
+    }
+}
+
+async function main() {
+    // Parse command line arguments
+    const args = Deno.args;
+    const skipDownload = args.includes('--upload-only') || args.includes('-u');
+    const showHelp = args.includes('--help') || args.includes('-h');
+    
+    if (showHelp) {
+        console.log(`
+Usage: deno run -A --unstable-kv local/openfoodfacts/off_ingest.ts [options]
+
+Options:
+  --upload-only, -u    Skip download and processing, go straight to upload
+  --help, -h          Show this help message
+
+Examples:
+  deno run -A --unstable-kv local/openfoodfacts/off_ingest.ts                    # Full process
+  deno run -A --unstable-kv local/openfoodfacts/off_ingest.ts --upload-only      # Upload only
+  deno run -A --unstable-kv local/openfoodfacts/off_ingest.ts -u                 # Upload only (short)
+        `);
+        return;
+    }
+    
+    if (skipDownload) {
+        console.log("🚀 Starting in upload-only mode (skipping download and processing)...");
+    }
+    
+    // Load environment variables from .env file
+    await loadEnv();
+    
+    if (!skipDownload) {
+        console.log("Downloading OFF JSONL.gz and streaming transform...");
+        
+        // First pass: Sample products for size estimation (fast)
+        const lines = iterLinesFromGzip(OFF_JSONL_GZ_URL, false);
+        const { sampleRows, estimatedTotalValidProducts } = await sampleProductsForSizeEstimation(lines, SAMPLE_SIZE, SAMPLE_LINES);
+        
+        // Estimate database size for the entire dataset
+        const dbEstimate = estimateDatabaseSize(sampleRows, estimatedTotalValidProducts);
+        console.log("\n📊 Database Size Estimate (Full Dataset):");
+        console.log(`  Estimated total valid products: ${estimatedTotalValidProducts.toLocaleString()}`);
+        console.log(`  Average row size: ${formatBytes(dbEstimate.avgRowSize)}`);
+        console.log(`  Table data size: ${formatBytes(dbEstimate.estimatedTableSize)}`);
+        console.log(`  Index size: ${formatBytes(dbEstimate.estimatedIndexSize)}`);
+        console.log(`  Total estimated size: ${formatBytes(dbEstimate.estimatedTotalSize)}`);
+        
+        // Second pass: Process all products
+        console.log("\n🔄 Processing all products...");
+        const rows = projectRows(iterLinesFromGzip(OFF_JSONL_GZ_URL, true));
+        const stats = await writeJsonl(rows, OUTPUT_PATH);
+        
+        console.log("\nField coverage (non-empty counts):");
+        console.log(`  brand: ${stats.nonEmpty.brand}`);
+        console.log(`  name: ${stats.nonEmpty.name}`);
+        console.log(`  ingredients: ${stats.nonEmpty.ingredients}`);
+        console.log(`  images: ${stats.nonEmpty.images}`);
+        
+        // Show validation summary
+        if (stats.validationStats) {
+            console.log("\n📊 Data Quality Summary:");
+            const validRate = ((stats.validationStats.validProducts / stats.validationStats.totalLines) * 100).toFixed(1);
+            console.log(`  Success rate: ${validRate}% (${stats.validationStats.validProducts.toLocaleString()} valid out of ${stats.validationStats.totalLines.toLocaleString()} total)`);
+            console.log(`  Invalid breakdown:`);
+            console.log(`    - Empty lines: ${stats.validationStats.emptyLines.toLocaleString()}`);
+            console.log(`    - JSON parse errors: ${stats.validationStats.jsonParseErrors.toLocaleString()}`);
+            console.log(`    - No barcode: ${stats.validationStats.noBarcode.toLocaleString()}`);
+        }
+        
+        const proceed = await askUploadPermission({ ...stats, dbEstimate });
+        if (!proceed) {
+            console.log("Upload skipped.");
+            return;
+        }
+    } else {
+        // Check if the local file exists
+        try {
+            const stat = await Deno.stat(OUTPUT_PATH);
+            console.log(`📁 Found existing file: ${OUTPUT_PATH} (${formatBytes(stat.size)})`);
+        } catch {
+            console.error(`❌ No existing ${OUTPUT_PATH} file found. Run without --upload-only first.`);
+            return;
+        }
+    }
+    console.log("\nUploading to Supabase (batched upserts)...");
+    const start = Date.now();
+    await uploadJsonlToSupabase(OUTPUT_PATH);
+    const elapsed = (Date.now() - start) / 1000;
+    console.log(`Done in ${elapsed.toFixed(1)}s.`);
+}
+
+if (import.meta.main) {
+    await main().catch((err) => {
+        console.error("Error:", err);
+        Deno.exit(1);
+    });
+}
+
+
diff --git a/local/openfoodfacts/off_upload_batch.ts b/local/openfoodfacts/off_upload_batch.ts
new file mode 100644
index 0000000..7d82820
--- /dev/null
+++ b/local/openfoodfacts/off_upload_batch.ts
@@ -0,0 +1,56 @@
+// Child process for uploading a single batch to Supabase
+import { createClient } from "npm:@supabase/supabase-js@2.39.3";
+
+async function uploadBatch() {
+    const batchFile = Deno.args[0];
+    const batchNumber = Deno.args[1];
+    
+    if (!batchFile || !batchNumber) {
+        console.error("Usage: deno run off_upload_batch.ts <batch_file> <batch_number>");
+        Deno.exit(1);
+    }
+    
+    const url = Deno.env.get("SUPABASE_URL") ?? "";
+    const key = Deno.env.get("SUPABASE_SECRET_KEY") ?? Deno.env.get("SUPABASE_SERVICE_ROLE_KEY") ?? "";
+    
+    if (!url || !key) {
+        console.error("SUPABASE_URL and SUPABASE_SECRET_KEY must be set");
+        Deno.exit(1);
+    }
+    
+    const supabase = createClient(url, key, { auth: { persistSession: false } });
+    
+    try {
+        // Read batch file
+        const batchData = await Deno.readTextFile(batchFile);
+        const rows = batchData.trim().split('\n').map(line => JSON.parse(line));
+        
+        // Upload to Supabase
+        const { error } = await supabase
+            .from('inventory_cache')
+            .upsert(rows, { onConflict: 'barcode' });
+            
+        if (error) {
+            console.error(`❌ Batch ${batchNumber} failed:`, error.message);
+            Deno.exit(1);
+        }
+        
+        // Only log every 10th batch to reduce output
+        if (parseInt(batchNumber) % 10 === 0) {
+            console.log(`✅ Uploaded batch ${batchNumber} (${rows.length} rows)`);
+        }
+        
+    } catch (error) {
+        console.error(`❌ Batch ${batchNumber} error:`, error.message);
+        Deno.exit(1);
+    } finally {
+        // Clean up temp file
+        try {
+            await Deno.remove(batchFile);
+        } catch {
+            // Ignore cleanup errors
+        }
+    }
+}
+
+uploadBatch();

From 51817125a6e0bbe8394760b9a80a698b86ca28a7 Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Wed, 1 Oct 2025 15:52:12 +0530
Subject: [PATCH 15/21] Remove log_inventory table and replace with
 inventory_cache

- Removed log_inventory table definition from tables.sql
- Updated get_check_history and get_list_items functions to use inventory_cache
- Removed background/log_inventory endpoint
- Created consolidated getProductFromCache() function as single source of truth
- Updated all code to query inventory_cache instead of log_inventory
- Removed redundant lookupProduct wrapper function
- inventory.ts now checks cache first, falls back to fresh API fetch
---
 supabase/database/tables.sql                 |  81 ++++--------
 supabase/functions/background/index.ts       |  19 ---
 supabase/functions/ingredicheck/analyzer.ts  |  61 +++------
 supabase/functions/ingredicheck/inventory.ts | 127 +++++++++++++++----
 4 files changed, 142 insertions(+), 146 deletions(-)

diff --git a/supabase/database/tables.sql b/supabase/database/tables.sql
index 14f91a5..7e9c6e1 100644
--- a/supabase/database/tables.sql
+++ b/supabase/database/tables.sql
@@ -104,33 +104,6 @@ CREATE POLICY user_update_own_log_infer ON public.log_feedback
 
 --------------------------------------------------------------------------------
 
-create table
-    public.log_inventory (
-        created_at timestamp with time zone not null default now(),
-        start_time timestamp with time zone,
-        end_time timestamp with time zone,
-        user_id uuid not null,
-        client_activity_id uuid,
-        barcode text not null,
-        data_source text not null,
-        name text,
-        brand text,
-        ingredients json,
-        images json
-    ) tablespace pg_default;
-
-alter table public.log_inventory enable row level security;
-
-create policy "Select for all authenticated users" on public.log_inventory
-    for select
-    using (true);
-
-create policy "Insert for authenticated users" on public.log_inventory
-    for insert
-    with check (auth.uid() = user_id);
-
---------------------------------------------------------------------------------
-
 create table
     public.inventory_traderjoes (
         created_at timestamp with time zone not null default now(),
@@ -305,12 +278,12 @@ BEGIN
         SELECT DISTINCT ON (barcode, name, brand)
             la.created_at,
             la.client_activity_id,
-            COALESCE(li.barcode, le.barcode) AS barcode,
-            COALESCE(li.name, le.name) AS name,
-            COALESCE(li.brand, le.brand) AS brand,
-            COALESCE(li.ingredients, le.ingredients) AS ingredients,
+            COALESCE(le.barcode, ic.barcode) AS barcode,
+            COALESCE(ic.name, le.name) AS name,
+            COALESCE(ic.brand, le.brand) AS brand,
+            COALESCE(ic.ingredients::json, le.ingredients) AS ingredients,
             COALESCE(
-                li.images,
+                ic.images::json,
                 (SELECT json_agg(json_build_object('imageFileHash', text_val)) FROM unnest(le.images) AS dt(text_val))
             ) AS images,
             la.response_body AS ingredient_recommendations,
@@ -324,31 +297,27 @@ BEGIN
             ) AS favorited
         FROM
             public.log_analyzebarcode la
-        LEFT JOIN public.log_inventory li 
-            ON la.client_activity_id = li.client_activity_id 
         LEFT JOIN public.log_extract le 
             ON la.client_activity_id = le.client_activity_id 
+        LEFT JOIN public.inventory_cache ic
+            ON le.barcode = ic.barcode
         LEFT JOIN public.log_feedback lf
             ON la.client_activity_id = lf.client_activity_id
         WHERE
             la.created_at > '2024-03-15'::date
             AND
-            (
-                li.client_activity_id IS NOT NULL
-                OR
-                le.client_activity_id IS NOT NULL
-            )
+            le.client_activity_id IS NOT NULL
             AND
             (
                 search_query IS NULL
                 OR
-                to_tsvector('english', COALESCE(li.name, le.name) || ' ' || COALESCE(li.brand, le.brand) || ' ' || COALESCE(li.ingredients::text, le.ingredients::text)) @@ plainto_tsquery('english', search_query)
+                to_tsvector('english', COALESCE(ic.name, le.name) || ' ' || COALESCE(ic.brand, le.brand) || ' ' || COALESCE(ic.ingredients::text, le.ingredients::text)) @@ plainto_tsquery('english', search_query)
                 OR
-                COALESCE(li.name, le.name) ILIKE '%' || search_query || '%'
+                COALESCE(ic.name, le.name) ILIKE '%' || search_query || '%'
                 OR
-                COALESCE(li.brand, le.brand) ILIKE '%' || search_query || '%'
+                COALESCE(ic.brand, le.brand) ILIKE '%' || search_query || '%'
                 OR
-                COALESCE(li.ingredients::text, le.ingredients::text) ILIKE '%' || search_query || '%'
+                COALESCE(ic.ingredients::text, le.ingredients::text) ILIKE '%' || search_query || '%'
             )
         ORDER BY
             barcode, name, brand, la.created_at DESC
@@ -380,37 +349,33 @@ BEGIN
         uli.created_at,
         uli.list_id,
         uli.list_item_id,
-        COALESCE(li.barcode, le.barcode) AS barcode,
-        COALESCE(li.name, le.name) AS name,
-        COALESCE(li.brand, le.brand) AS brand,
-        COALESCE(li.ingredients, le.ingredients::json) AS ingredients,
+        COALESCE(le.barcode, ic.barcode) AS barcode,
+        COALESCE(ic.name, le.name) AS name,
+        COALESCE(ic.brand, le.brand) AS brand,
+        COALESCE(ic.ingredients::json, le.ingredients::json) AS ingredients,
         COALESCE(
-            li.images,
+            ic.images::json,
             (SELECT json_agg(json_build_object('imageFileHash', text_val)) FROM unnest(le.images) AS dt(text_val))
         ) AS images
     FROM
         public.user_list_items uli
-        LEFT JOIN public.log_inventory li ON uli.list_item_id = li.client_activity_id
         LEFT JOIN public.log_extract le ON uli.list_item_id = le.client_activity_id
+        LEFT JOIN public.inventory_cache ic ON le.barcode = ic.barcode
     WHERE
         uli.list_id = input_list_id
         AND
-        (
-            li.client_activity_id IS NOT NULL
-            OR
-            le.client_activity_id IS NOT NULL
-        )
+        le.client_activity_id IS NOT NULL
         AND
         (
             search_query IS NULL
             OR
-            to_tsvector('english', COALESCE(li.name, le.name) || ' ' || COALESCE(li.brand, le.brand) || ' ' || COALESCE(li.ingredients::text, le.ingredients::text)) @@ plainto_tsquery('english', search_query)
+            to_tsvector('english', COALESCE(ic.name, le.name) || ' ' || COALESCE(ic.brand, le.brand) || ' ' || COALESCE(ic.ingredients::text, le.ingredients::text)) @@ plainto_tsquery('english', search_query)
             OR
-            COALESCE(li.name, le.name) ILIKE '%' || search_query || '%'
+            COALESCE(ic.name, le.name) ILIKE '%' || search_query || '%'
             OR
-            COALESCE(li.brand, le.brand) ILIKE '%' || search_query || '%'
+            COALESCE(ic.brand, le.brand) ILIKE '%' || search_query || '%'
             OR
-            COALESCE(li.ingredients::text, le.ingredients::text) ILIKE '%' || search_query || '%'
+            COALESCE(ic.ingredients::text, le.ingredients::text) ILIKE '%' || search_query || '%'
         )
     ORDER BY
         uli.created_at DESC;
diff --git a/supabase/functions/background/index.ts b/supabase/functions/background/index.ts
index b7219d7..32da2bd 100644
--- a/supabase/functions/background/index.ts
+++ b/supabase/functions/background/index.ts
@@ -45,25 +45,6 @@ router
         }
         ctx.response.status = 201
     })
-    .post('/background/log_inventory', async (ctx) => {
-        const body = ctx.request.body({ type: 'json', limit: 0 })
-        const body_json = await body.value
-        const user_id = await KitchenSink.getUserId(ctx)
-        const entry = {
-            ...body_json,
-            user_id: user_id,
-        }
-        const result = await ctx.state.supabaseClient
-            .from('log_inventory')
-            .insert(entry)
-        if (result.error) {
-            console.log('supabaseClient.from(log_inventory).insert() failed: ', result.error)
-            ctx.response.status = 500
-            ctx.response.body = result.error
-            return
-        }
-        ctx.response.status = 201
-    })
     .post('/background/log_llmcalls', async (ctx) => {
         const body = ctx.request.body({ type: 'json', limit: 0 })
         const body_json = await body.value
diff --git a/supabase/functions/ingredicheck/analyzer.ts b/supabase/functions/ingredicheck/analyzer.ts
index cddb36e..cfe5113 100644
--- a/supabase/functions/ingredicheck/analyzer.ts
+++ b/supabase/functions/ingredicheck/analyzer.ts
@@ -72,7 +72,7 @@ export async function streamInventoryAndAnalysis(ctx: Context) {
     return;
   }
 
-  const inventoryResult = await Inventory.fetchProduct({
+  const inventoryResult = await Inventory.getProductFromCache({
     supabaseClient: ctx.state.supabaseClient,
     barcode,
     clientActivityId,
@@ -162,7 +162,23 @@ export async function performAnalysis(
 
   ctx.state.clientActivityId = requestBody.clientActivityId;
 
-  const product = productOverride ?? await lookupProduct(ctx, requestBody);
+  let product: DB.Product;
+
+  if (productOverride) {
+    product = productOverride;
+  } else {
+    const result = await Inventory.getProductFromCache({
+      supabaseClient: ctx.state.supabaseClient,
+      barcode: requestBody.barcode,
+      clientActivityId: ctx.state.clientActivityId,
+    });
+
+    if (result.status !== 200 || !result.product) {
+      throw new Error(result.error ?? "Product not found");
+    }
+
+    product = result.product;
+  }
 
   const hasValidPreferences = requestBody.userPreferenceText &&
     requestBody.userPreferenceText.trim() !== "" &&
@@ -214,44 +230,3 @@ export async function logAnalysisResult(
     console.error("Failed to log analyze barcode event", error);
   }
 }
-
-async function lookupProduct(
-  ctx: Context,
-  requestBody: AnalysisRequest,
-): Promise<DB.Product> {
-  if (requestBody.barcode !== undefined) {
-    const result = await ctx.state.supabaseClient
-      .from("log_inventory")
-      .select()
-      .eq("barcode", requestBody.barcode)
-      .order("created_at", { ascending: false })
-      .limit(1)
-      .single();
-
-    if (result.error) {
-      throw result.error;
-    }
-
-    return result.data as DB.Product;
-  }
-
-  const result = await ctx.state.supabaseClient
-    .from("log_extract")
-    .select()
-    .eq("client_activity_id", ctx.state.clientActivityId)
-    .order("created_at", { ascending: false })
-    .limit(1)
-    .single();
-
-  if (result.error) {
-    throw result.error;
-  }
-
-  return {
-    barcode: result.data.barcode,
-    brand: result.data.brand,
-    name: result.data.name,
-    ingredients: result.data.ingredients ?? [],
-    images: [],
-  };
-}
diff --git a/supabase/functions/ingredicheck/inventory.ts b/supabase/functions/ingredicheck/inventory.ts
index 5c188a5..5d577e6 100644
--- a/supabase/functions/ingredicheck/inventory.ts
+++ b/supabase/functions/ingredicheck/inventory.ts
@@ -13,21 +13,93 @@ type InventoryFetchResult = {
   error?: string;
 };
 
+type InventoryCacheOptions = {
+  supabaseClient: any;
+  barcode?: string;
+  clientActivityId?: string;
+};
+
+type InventoryCacheResult = {
+  status: number;
+  product: DB.Product | null;
+  error?: string;
+};
+
+/**
+ * Queries the inventory_cache for a product by barcode.
+ * If no barcode is provided, falls back to log_extract by clientActivityId.
+ */
+export async function getProductFromCache(
+  options: InventoryCacheOptions,
+): Promise<InventoryCacheResult> {
+  const { supabaseClient, barcode, clientActivityId } = options;
+
+  // Query inventory_cache if barcode is provided
+  if (barcode !== undefined) {
+    const result = await supabaseClient
+      .from("inventory_cache")
+      .select()
+      .eq("barcode", barcode)
+      .single();
+
+    if (result.error) {
+      return {
+        status: 404,
+        product: null,
+        error: result.error.message ?? "Product not found in cache.",
+      };
+    }
+
+    return {
+      status: 200,
+      product: result.data as DB.Product,
+    };
+  }
+
+  // Fallback to log_extract if no barcode provided
+  if (clientActivityId !== undefined) {
+    const result = await supabaseClient
+      .from("log_extract")
+      .select()
+      .eq("client_activity_id", clientActivityId)
+      .order("created_at", { ascending: false })
+      .limit(1)
+      .single();
+
+    if (result.error) {
+      return {
+        status: 404,
+        product: null,
+        error: result.error.message ?? "Product not found in extract log.",
+      };
+    }
+
+    return {
+      status: 200,
+      product: {
+        barcode: result.data.barcode,
+        brand: result.data.brand,
+        name: result.data.name,
+        ingredients: result.data.ingredients ?? [],
+        images: [],
+      },
+    };
+  }
+
+  return {
+    status: 400,
+    product: null,
+    error: "Either barcode or clientActivityId must be provided.",
+  };
+}
+
 export async function fetchProduct(
   options: InventoryFetchOptions,
 ): Promise<InventoryFetchResult> {
-  const { supabaseClient, barcode, clientActivityId } = options;
+  const { barcode } = options;
 
   let product: DB.Product | null = null;
   let errorMessage: string | undefined;
-
-  const log_json: Record<string, unknown> = {
-    start_time: new Date(),
-    barcode: barcode,
-    data_source: "openfoodfacts/v3",
-    client_activity_id: clientActivityId,
-  };
-
   let status = 200;
 
   try {
@@ -44,7 +116,6 @@ export async function fetchProduct(
       errorMessage = data.status_verbose || "Product not found.";
     } else {
       product = processOpenFoodFactsProductData(barcode, data.product);
-      Object.assign(log_json, product);
     }
   } catch (error) {
     status = 500;
@@ -52,17 +123,6 @@ export async function fetchProduct(
     console.error(`Failed to fetch product ${barcode}: ${errorMessage}`);
   }
 
-  log_json.end_time = new Date();
-  log_json.response_status = status;
-  if (errorMessage) {
-    log_json.error = errorMessage;
-  }
-
-  await supabaseClient.functions.invoke("background/log_inventory", {
-    body: log_json,
-    method: "POST",
-  });
-
   return {
     status,
     product,
@@ -75,18 +135,33 @@ export async function get(
   barcode: string,
   clientActivityId: string | null,
 ) {
-  const result = await fetchProduct({
+  // First, try to get product from cache
+  const cacheResult = await getProductFromCache({
+    supabaseClient: ctx.state.supabaseClient,
+    barcode,
+    clientActivityId: clientActivityId ?? undefined,
+  });
+
+  // If found in cache, return it
+  if (cacheResult.status === 200 && cacheResult.product) {
+    ctx.response.status = 200;
+    ctx.response.body = cacheResult.product;
+    return;
+  }
+
+  // If not in cache, fetch from OpenFoodFacts (fetchProduct is still available as fallback)
+  const fetchResult = await fetchProduct({
     supabaseClient: ctx.state.supabaseClient,
     barcode,
     clientActivityId,
   });
 
-  ctx.response.status = result.status;
-  if (result.status === 200 && result.product) {
-    ctx.response.body = result.product;
+  ctx.response.status = fetchResult.status;
+  if (fetchResult.status === 200 && fetchResult.product) {
+    ctx.response.body = fetchResult.product;
   } else {
     ctx.response.body = {
-      error: result.error ?? "Unexpected inventory error.",
+      error: fetchResult.error ?? "Unexpected inventory error.",
     };
   }
 }

From 111a945a2f622ca2059f4fff1536ba092386e4d6 Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Wed, 1 Oct 2025 16:04:58 +0530
Subject: [PATCH 16/21] Remove OpenFoodFacts API fallback, make inventory
 endpoint cache-only

- Removed fetchProduct() function and OpenFoodFacts API integration
- Removed processOpenFoodFactsProductData() and helper functions
- Simplified get() endpoint to only read from inventory_cache
- Reduced file from 252 lines to 104 lines (67% reduction)
- Now returns 404 if product not found in cache
---
 supabase/functions/ingredicheck/inventory.ts | 158 +------------------
 1 file changed, 5 insertions(+), 153 deletions(-)

diff --git a/supabase/functions/ingredicheck/inventory.ts b/supabase/functions/ingredicheck/inventory.ts
index 5d577e6..6330cda 100644
--- a/supabase/functions/ingredicheck/inventory.ts
+++ b/supabase/functions/ingredicheck/inventory.ts
@@ -1,18 +1,6 @@
 import { Context } from "https://deno.land/x/oak@v12.6.0/mod.ts";
 import * as DB from "../shared/db.ts";
 
-type InventoryFetchOptions = {
-  supabaseClient: any;
-  barcode: string;
-  clientActivityId?: string | null;
-};
-
-type InventoryFetchResult = {
-  status: number;
-  product: DB.Product | null;
-  error?: string;
-};
-
 type InventoryCacheOptions = {
   supabaseClient: any;
   barcode?: string;
@@ -93,159 +81,23 @@ export async function getProductFromCache(
   };
 }
 
-export async function fetchProduct(
-  options: InventoryFetchOptions,
-): Promise<InventoryFetchResult> {
-  const { barcode } = options;
-
-  let product: DB.Product | null = null;
-  let errorMessage: string | undefined;
-  let status = 200;
-
-  try {
-    const url =
-      `https://world.openfoodfacts.org/api/v3/product/${barcode}.json`;
-    const response = await fetch(url);
-    const data = await response.json();
-
-    if (data.status === "failure") {
-      console.log(
-        `Unexpected product details: ${JSON.stringify(data, null, 2)}`,
-      );
-      status = 404;
-      errorMessage = data.status_verbose || "Product not found.";
-    } else {
-      product = processOpenFoodFactsProductData(barcode, data.product);
-    }
-  } catch (error) {
-    status = 500;
-    errorMessage = (error as Error).message;
-    console.error(`Failed to fetch product ${barcode}: ${errorMessage}`);
-  }
-
-  return {
-    status,
-    product,
-    error: errorMessage,
-  };
-}
-
 export async function get(
   ctx: Context,
   barcode: string,
   clientActivityId: string | null,
 ) {
-  // First, try to get product from cache
-  const cacheResult = await getProductFromCache({
+  const result = await getProductFromCache({
     supabaseClient: ctx.state.supabaseClient,
     barcode,
     clientActivityId: clientActivityId ?? undefined,
   });
 
-  // If found in cache, return it
-  if (cacheResult.status === 200 && cacheResult.product) {
-    ctx.response.status = 200;
-    ctx.response.body = cacheResult.product;
-    return;
-  }
-
-  // If not in cache, fetch from OpenFoodFacts (fetchProduct is still available as fallback)
-  const fetchResult = await fetchProduct({
-    supabaseClient: ctx.state.supabaseClient,
-    barcode,
-    clientActivityId,
-  });
-
-  ctx.response.status = fetchResult.status;
-  if (fetchResult.status === 200 && fetchResult.product) {
-    ctx.response.body = fetchResult.product;
+  ctx.response.status = result.status;
+  if (result.status === 200 && result.product) {
+    ctx.response.body = result.product;
   } else {
     ctx.response.body = {
-      error: fetchResult.error ?? "Unexpected inventory error.",
-    };
-  }
-}
-
-type SelectedImages = {
-  [key: string]: {
-    display: {
-      [key: string]: string;
+      error: result.error ?? "Product not found in cache.",
     };
-  };
-};
-
-type ImageUrl = {
-  url: string;
-};
-
-function extractDisplayImageUrls(selectedImages?: SelectedImages): ImageUrl[] {
-  if (selectedImages) {
-    return Object.values(selectedImages).flatMap((image) => {
-      if (image.display?.en) {
-        return [{
-          url: image.display.en,
-        }];
-      }
-      return [];
-    });
-  }
-  return [];
-}
-
-function processOpenFoodFactsProductData(
-  barcode: string,
-  product: any,
-): DB.Product {
-  let brand: string | undefined = undefined;
-  let name: string | undefined = undefined;
-  let ingredients: any[] = [];
-
-  if (product.brand_owner) {
-    brand = product.brand_owner;
   }
-
-  if (product.product_name) {
-    name = product.product_name;
-  }
-
-  if (product.ingredients) {
-    ingredients = product.ingredients.map((i: any) => {
-      return {
-        name: i.text,
-        vegan: i.vegan,
-        vegetarian: i.vegetarian,
-        ingredients: i.ingredients?.map((i2: any) => {
-          return {
-            name: i2.text,
-            vegan: i2.vegan,
-            vegetarian: i2.vegetarian,
-            ingredients: i2.ingredients?.map((i3: any) => {
-              return {
-                name: i3.text,
-                vegan: i3.vegan,
-                vegetarian: i3.vegetarian,
-                ingredients: [],
-              };
-            }) ?? [],
-          };
-        }) ?? [],
-      };
-    });
-  }
-
-  const images = extractDisplayImageUrls(product.selected_images);
-
-  // Workaround for known issues with OpenFoodFacts data
-  if (barcode === "0096619362776") {
-    // Label says 'Contains No Animal Rennet', but ingredient list has 'Animal Rennet'.
-    ingredients = ingredients.filter((i) => i.name !== "Animal Rennet");
-  }
-
-  return {
-    barcode: barcode,
-    brand: brand,
-    name: name,
-    ingredients: ingredients,
-    images: images,
-  };
 }

From d4487cf52df615173cc226d7f420f3f9af01692c Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Wed, 1 Oct 2025 16:35:27 +0530
Subject: [PATCH 17/21] Fix Open Food Facts image URL generation

- Add barcodeToPath() function to convert barcodes to proper OFF path format (e.g., 088/491/237/3946)
- Fix image URL format: full size uses imgId.jpg instead of imgId.WxH.jpg
- Add validateImageUrlExists() function for optional HEAD request validation (currently disabled)
- Update extractDisplayImageUrls() to use correct barcode paths in URLs
- All image URLs now follow format: /images/products/{barcodePath}/{imgId}.{size}.jpg
---
 local/openfoodfacts/off_ingest.ts | 57 +++++++++++++++++++++++--------
 1 file changed, 43 insertions(+), 14 deletions(-)

diff --git a/local/openfoodfacts/off_ingest.ts b/local/openfoodfacts/off_ingest.ts
index 0cd8263..2e9040d 100644
--- a/local/openfoodfacts/off_ingest.ts
+++ b/local/openfoodfacts/off_ingest.ts
@@ -18,7 +18,7 @@ async function loadEnv() {
             }
         }
     } catch (error) {
-        console.warn("⚠️  Could not load .env file:", error.message);
+        console.warn("⚠️  Could not load .env file:", (error as Error).message);
         console.warn("   Make sure to copy .env.template to .env and fill in your values");
     }
 }
@@ -81,13 +81,42 @@ function isValidImageUrl(url: string): boolean {
     }
 }
 
-function extractDisplayImageUrls(images: any): Image[] {
+async function validateImageUrlExists(url: string): Promise<boolean> {
+    try {
+        const response = await fetch(url, { method: 'HEAD' });
+        return response.status === 200;
+    } catch {
+        return false;
+    }
+}
+
+// Convert barcode to Open Food Facts path format
+// Example: "3017620422003" -> "301/762/042/2003"
+// Example: "1" -> "1"
+function barcodeToPath(barcode: string): string {
+    // Short barcodes (8 digits or fewer) are used as-is
+    if (barcode.length <= 8) {
+        return barcode;
+    }
+    // Longer barcodes are padded to at least 13 digits and split
+    const code = barcode.padStart(13, '0');
+    // Split into segments of 3 digits, except the last part
+    const segments: string[] = [];
+    for (let i = 0; i < code.length - 4; i += 3) {
+        segments.push(code.slice(i, i + 3));
+    }
+    segments.push(code.slice(code.length - 4)); // Last 4 digits
+    return segments.join('/');
+}
+
+function extractDisplayImageUrls(images: any, barcode: string): Image[] {
     if (!images || typeof images !== "object") {
         return [];
     }
     
     const urls: Image[] = [];
     const processedImages = new Set<string>(); // Track processed image IDs to avoid duplicates
+    const barcodePath = barcodeToPath(barcode);
     
     try {
         // Open Food Facts image structure: images contains both numeric keys (1,2,3,4) and language-specific keys (front_en, ingredients_fr, etc.)
@@ -99,7 +128,7 @@ function extractDisplayImageUrls(images: any): Image[] {
         for (const langKey of languageKeys) {
             const imageRef = images[langKey];
             if (imageRef && typeof imageRef === "object" && imageRef.imgid) {
-                const imgId = imageRef.imgid;
+                const imgId = String(imageRef.imgid); // Ensure imgid is a string
                 if (processedImages.has(imgId)) continue; // Skip if already processed
                 
                 const imageData = images[imgId];
@@ -110,28 +139,28 @@ function extractDisplayImageUrls(images: any): Image[] {
                     const imageUrls: { url: string; resolution: string; width: number; height: number }[] = [];
                     
                     if (sizes.full && sizes.full.w && sizes.full.h) {
-                        const url = `https://static.openfoodfacts.org/images/products/${imgId}/front.${sizes.full.w}x${sizes.full.h}.jpg`;
+                        const url = `https://static.openfoodfacts.org/images/products/${barcodePath}/${imgId}.jpg`;
                         if (isValidImageUrl(url)) {
                             imageUrls.push({ url, resolution: 'full', width: sizes.full.w, height: sizes.full.h });
                         }
                     }
                     
                     if (sizes["400"] && sizes["400"].w && sizes["400"].h) {
-                        const url = `https://static.openfoodfacts.org/images/products/${imgId}/front.400x${sizes["400"].h}.jpg`;
+                        const url = `https://static.openfoodfacts.org/images/products/${barcodePath}/${imgId}.400.jpg`;
                         if (isValidImageUrl(url)) {
                             imageUrls.push({ url, resolution: '400px', width: sizes["400"].w, height: sizes["400"].h });
                         }
                     }
                     
                     if (sizes["200"] && sizes["200"].w && sizes["200"].h) {
-                        const url = `https://static.openfoodfacts.org/images/products/${imgId}/front.200x${sizes["200"].h}.jpg`;
+                        const url = `https://static.openfoodfacts.org/images/products/${barcodePath}/${imgId}.200.jpg`;
                         if (isValidImageUrl(url)) {
                             imageUrls.push({ url, resolution: '200px', width: sizes["200"].w, height: sizes["200"].h });
                         }
                     }
                     
                     if (sizes["100"] && sizes["100"].w && sizes["100"].h) {
-                        const url = `https://static.openfoodfacts.org/images/products/${imgId}/front.100x${sizes["100"].h}.jpg`;
+                        const url = `https://static.openfoodfacts.org/images/products/${barcodePath}/${imgId}.100.jpg`;
                         if (isValidImageUrl(url)) {
                             imageUrls.push({ url, resolution: '100px', width: sizes["100"].w, height: sizes["100"].h });
                         }
@@ -155,12 +184,12 @@ function extractDisplayImageUrls(images: any): Image[] {
         // If no language-specific front images found, try any numeric image
         if (urls.length === 0) {
             for (const [key, imageData] of Object.entries(images)) {
-                if (/^\d+$/.test(key) && imageData && typeof imageData === "object" && imageData.sizes) {
-                    const sizes = imageData.sizes;
+                if (/^\d+$/.test(key) && imageData && typeof imageData === "object" && (imageData as any).sizes) {
+                    const sizes = (imageData as any).sizes;
                     
                     // Collect all available sizes for this image
                     if (sizes.full && sizes.full.w && sizes.full.h) {
-                        const url = `https://static.openfoodfacts.org/images/products/${key}/front.${sizes.full.w}x${sizes.full.h}.jpg`;
+                        const url = `https://static.openfoodfacts.org/images/products/${barcodePath}/${key}.jpg`;
                         if (isValidImageUrl(url)) {
                             urls.push({ 
                                 url, 
@@ -172,7 +201,7 @@ function extractDisplayImageUrls(images: any): Image[] {
                     }
                     
                     if (sizes["400"] && sizes["400"].w && sizes["400"].h) {
-                        const url = `https://static.openfoodfacts.org/images/products/${key}/front.400x${sizes["400"].h}.jpg`;
+                        const url = `https://static.openfoodfacts.org/images/products/${barcodePath}/${key}.400.jpg`;
                         if (isValidImageUrl(url)) {
                             urls.push({ 
                                 url, 
@@ -187,7 +216,7 @@ function extractDisplayImageUrls(images: any): Image[] {
                 }
             }
         }
-    } catch (error) {
+    } catch (_error) {
         // ignore malformed structures
     }
     
@@ -232,7 +261,7 @@ function mapToCacheRow(product: any): CacheRow | null {
         ingredients = product.ingredients.filter((x: any) => x && typeof x === "object").map(mapIngredient);
     }
 
-    const images = extractDisplayImageUrls(product?.images);
+    const images = extractDisplayImageUrls(product?.images, barcode);
     const off_last_modified_t = typeof product?.last_modified_t === "number" ? product.last_modified_t : undefined;
     
     
@@ -651,7 +680,7 @@ async function uploadJsonlToSupabase(path: string) {
                         }
                     }
                 } catch (error) {
-                    console.error(`❌ JSON parse error on line:`, error.message);
+                    console.error(`❌ JSON parse error on line:`, (error as Error).message);
                     // skip invalid JSON
                 }
             }

From 42866198a50a3e1463905b927e807ad4636e0e79 Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Wed, 1 Oct 2025 22:49:40 +0530
Subject: [PATCH 18/21] TEMP: Modify upload to only update images column

- Only upsert barcode and images fields during upload
- Remove updated_at field from upserts (column may not exist)
- Add ignoreDuplicates: false to ensure existing rows are updated
- Improve error logging with JSON.stringify for better debugging
- This is a temporary change to fix image URLs without reprocessing all data
---
 local/openfoodfacts/off_ingest.ts | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/local/openfoodfacts/off_ingest.ts b/local/openfoodfacts/off_ingest.ts
index 2e9040d..4c1f401 100644
--- a/local/openfoodfacts/off_ingest.ts
+++ b/local/openfoodfacts/off_ingest.ts
@@ -538,7 +538,7 @@ async function askUploadPermission(stats: { count: number; totalBytes: number; d
 
 
 async function uploadJsonlToSupabase(path: string) {
-    console.log("📤 Starting upload to Supabase...");
+    console.log("📤 Starting upload to Supabase (TEMPORARY: only updating images column)...");
     
     // Load env for Supabase connection
     const url = Deno.env.get("SUPABASE_URL") ?? "";
@@ -639,18 +639,24 @@ async function uploadJsonlToSupabase(path: string) {
                         // Now create and add the upload promise
                         const uploadPromise = (async () => {
                             try {
+                                // TEMPORARY: Only update images column
+                                const imageUpdates = batch.map(row => ({
+                                    barcode: row.barcode,
+                                    images: row.images
+                                }));
+                                
                                 const { error } = await supabase
                                     .from('inventory_cache')
-                                    .upsert(batch, { onConflict: 'barcode' });
+                                    .upsert(imageUpdates, { onConflict: 'barcode', ignoreDuplicates: false });
                                 
                                 if (error) {
-                                    console.error(`❌ Batch ${currentBatchNum} failed:`, error.message);
+                                    console.error(`❌ Batch ${currentBatchNum} failed:`, JSON.stringify(error));
                                     throw error;
                                 }
                                 
                                 // Only log every 10 batches
                                 if (currentBatchNum % 10 === 0) {
-                                    console.log(`✅ Uploaded batch ${currentBatchNum} (${batch.length} rows)`);
+                                    console.log(`✅ Updated images for batch ${currentBatchNum} (${batch.length} rows)`);
                                 }
                             } catch (error) {
                                 console.error(`❌ Upload error at batch ${currentBatchNum}:`, error);
@@ -722,16 +728,22 @@ async function uploadJsonlToSupabase(path: string) {
             
             const uploadPromise = (async () => {
                 try {
+                    // TEMPORARY: Only update images column
+                    const imageUpdates = batch.map(row => ({
+                        barcode: row.barcode,
+                        images: row.images
+                    }));
+                    
                     const { error } = await supabase
                         .from('inventory_cache')
-                        .upsert(batch, { onConflict: 'barcode' });
+                        .upsert(imageUpdates, { onConflict: 'barcode', ignoreDuplicates: false });
                     
                     if (error) {
-                        console.error(`❌ Final batch ${currentBatchNum} failed:`, error.message);
+                        console.error(`❌ Final batch ${currentBatchNum} failed:`, JSON.stringify(error));
                         throw error;
                     }
                     
-                    console.log(`✅ Uploaded final batch ${currentBatchNum} (${batch.length} rows)`);
+                    console.log(`✅ Updated images for final batch ${currentBatchNum} (${batch.length} rows)`);
                 } catch (error) {
                     console.error(`❌ Upload error at final batch ${currentBatchNum}:`, error);
                     throw error;
@@ -748,7 +760,7 @@ async function uploadJsonlToSupabase(path: string) {
             console.log(`✅ All uploads completed!`);
         }
         
-        console.log(`✅ Upload complete! ${total} rows processed in ${batchCount} batches`);
+        console.log(`✅ Image update complete! ${total} rows processed in ${batchCount} batches`);
         
     } finally {
         file.close();
@@ -838,7 +850,7 @@ Examples:
             return;
         }
     }
-    console.log("\nUploading to Supabase (batched upserts)...");
+    console.log("\nUploading to Supabase (TEMPORARY: only updating images column)...");
     const start = Date.now();
     await uploadJsonlToSupabase(OUTPUT_PATH);
     const elapsed = (Date.now() - start) / 1000;

From 2468a517f5c4926909173850f631218f79dcdd10 Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Wed, 1 Oct 2025 22:52:17 +0530
Subject: [PATCH 19/21] Handle barcode matching with or without leading zeros

- Add barcode_matches() SQL function that pads upward only to avoid false matches
- Update inventory query to try multiple barcode format variants (EAN-8, UPC-A, EAN-13, ITF-14)
- Apply barcode_matches() to JOINs in get_check_history() and get_list_items()
- Prevents 8-digit barcodes from matching unrelated 13-digit barcodes
- Fixes case where barcode 884912373946 should match 0884912373946
---
 supabase/database/tables.sql                 | 56 +++++++++++++++++++-
 supabase/functions/ingredicheck/inventory.ts | 39 +++++++++++++-
 2 files changed, 91 insertions(+), 4 deletions(-)

diff --git a/supabase/database/tables.sql b/supabase/database/tables.sql
index 7e9c6e1..bd404bb 100644
--- a/supabase/database/tables.sql
+++ b/supabase/database/tables.sql
@@ -40,6 +40,58 @@ create trigger trg_inventory_cache_updated_at
 before update on public.inventory_cache
 for each row execute function set_inventory_cache_updated_at();
 
+-- Function to match barcodes with or without leading zeros
+-- Only pads UPWARD to avoid false matches between different barcode types
+-- Based on inventory stats: 93% are 13-digit, 5% are 8-digit
+-- Examples: "884912373946" (12) matches "0884912373946" (13) ✓
+--           "12345678" (8) does NOT match "0000012345678" (13) ✗
+create or replace function barcode_matches(barcode1 text, barcode2 text)
+returns boolean as $$
+declare
+    len1 int;
+    len2 int;
+    min_len int;
+begin
+    if barcode1 is null or barcode2 is null then
+        return false;
+    end if;
+    
+    -- Direct match (fastest check)
+    if barcode1 = barcode2 then
+        return true;
+    end if;
+    
+    len1 := length(barcode1);
+    len2 := length(barcode2);
+    min_len := least(len1, len2);
+    
+    -- Only pad to lengths equal to or greater than the shorter barcode
+    -- This prevents 8-digit codes from matching unrelated 13-digit codes
+    
+    -- Try 8 digits (EAN-8) only if shortest is <= 8
+    if min_len <= 8 and lpad(barcode1, 8, '0') = lpad(barcode2, 8, '0') then
+        return true;
+    end if;
+    
+    -- Try 12 digits (UPC-A) only if shortest is <= 12
+    if min_len <= 12 and lpad(barcode1, 12, '0') = lpad(barcode2, 12, '0') then
+        return true;
+    end if;
+    
+    -- Try 13 digits (EAN-13) only if shortest is <= 13
+    if min_len <= 13 and lpad(barcode1, 13, '0') = lpad(barcode2, 13, '0') then
+        return true;
+    end if;
+    
+    -- Try 14 digits (ITF-14) only if shortest is <= 14
+    if min_len <= 14 and lpad(barcode1, 14, '0') = lpad(barcode2, 14, '0') then
+        return true;
+    end if;
+    
+    return false;
+end;
+$$ language plpgsql immutable;
+
 --------------------------------------------------------------------------------
 
 create table
@@ -300,7 +352,7 @@ BEGIN
         LEFT JOIN public.log_extract le 
             ON la.client_activity_id = le.client_activity_id 
         LEFT JOIN public.inventory_cache ic
-            ON le.barcode = ic.barcode
+            ON barcode_matches(le.barcode, ic.barcode)
         LEFT JOIN public.log_feedback lf
             ON la.client_activity_id = lf.client_activity_id
         WHERE
@@ -360,7 +412,7 @@ BEGIN
     FROM
         public.user_list_items uli
         LEFT JOIN public.log_extract le ON uli.list_item_id = le.client_activity_id
-        LEFT JOIN public.inventory_cache ic ON le.barcode = ic.barcode
+        LEFT JOIN public.inventory_cache ic ON barcode_matches(le.barcode, ic.barcode)
     WHERE
         uli.list_id = input_list_id
         AND
diff --git a/supabase/functions/ingredicheck/inventory.ts b/supabase/functions/ingredicheck/inventory.ts
index 6330cda..34c1043 100644
--- a/supabase/functions/ingredicheck/inventory.ts
+++ b/supabase/functions/ingredicheck/inventory.ts
@@ -24,11 +24,38 @@ export async function getProductFromCache(
 
   // Query inventory_cache if barcode is provided
   if (barcode !== undefined) {
+    // Try to match barcodes with or without leading zeros
+    // Only pad UPWARD to avoid false matches between different barcode types
+    const variants = [barcode]; // Always include original
+    const len = barcode.length;
+    
+    if (len <= 8) {
+      // EAN-8 format (5% of inventory) - only pad to 8
+      variants.push(barcode.padStart(8, '0'));
+    } else if (len <= 12) {
+      // UPC-A format - pad to 12, 13, 14
+      variants.push(barcode.padStart(12, '0'));
+      variants.push(barcode.padStart(13, '0')); // UPC-A → EAN-13 conversion
+      variants.push(barcode.padStart(14, '0'));
+    } else if (len === 13) {
+      // EAN-13 format (93% of inventory) - pad to 13, 14
+      variants.push(barcode.padStart(13, '0'));
+      variants.push(barcode.padStart(14, '0'));
+    } else {
+      // 14+ digits - only pad to 14
+      variants.push(barcode.padStart(14, '0'));
+    }
+    
+    // Remove duplicates and create OR condition
+    const uniqueVariants = [...new Set(variants)];
+    const orCondition = uniqueVariants.map(v => `barcode.eq.${v}`).join(',');
+    
     const result = await supabaseClient
       .from("inventory_cache")
       .select()
-      .eq("barcode", barcode)
-      .single();
+      .or(orCondition)
+      .limit(1)
+      .maybeSingle();
 
     if (result.error) {
       return {
@@ -38,6 +65,14 @@ export async function getProductFromCache(
       };
     }
 
+    if (!result.data) {
+      return {
+        status: 404,
+        product: null,
+        error: "Product not found in cache.",
+      };
+    }
+
     return {
       status: 200,
       product: result.data as DB.Product,

From 3b418e9f7d8de7226c1f32a00fb5c28ffb8a2c26 Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Thu, 2 Oct 2025 08:34:45 +0530
Subject: [PATCH 20/21] Revert to full upserts with all columns

- Remove temporary image-only update logic
- Restore full batch upserts with all product data
- Will populate fresh inventory_cache table with:
  - Fixed image URLs with correct barcode paths
  - All product metadata (name, brand, ingredients, etc.)
  - Proper last_refreshed_at timestamps
---
 local/openfoodfacts/off_ingest.ts | 30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

diff --git a/local/openfoodfacts/off_ingest.ts b/local/openfoodfacts/off_ingest.ts
index 4c1f401..2e9040d 100644
--- a/local/openfoodfacts/off_ingest.ts
+++ b/local/openfoodfacts/off_ingest.ts
@@ -538,7 +538,7 @@ async function askUploadPermission(stats: { count: number; totalBytes: number; d
 
 
 async function uploadJsonlToSupabase(path: string) {
-    console.log("📤 Starting upload to Supabase (TEMPORARY: only updating images column)...");
+    console.log("📤 Starting upload to Supabase...");
     
     // Load env for Supabase connection
     const url = Deno.env.get("SUPABASE_URL") ?? "";
@@ -639,24 +639,18 @@ async function uploadJsonlToSupabase(path: string) {
                         // Now create and add the upload promise
                         const uploadPromise = (async () => {
                             try {
-                                // TEMPORARY: Only update images column
-                                const imageUpdates = batch.map(row => ({
-                                    barcode: row.barcode,
-                                    images: row.images
-                                }));
-                                
                                 const { error } = await supabase
                                     .from('inventory_cache')
-                                    .upsert(imageUpdates, { onConflict: 'barcode', ignoreDuplicates: false });
+                                    .upsert(batch, { onConflict: 'barcode' });
                                 
                                 if (error) {
-                                    console.error(`❌ Batch ${currentBatchNum} failed:`, JSON.stringify(error));
+                                    console.error(`❌ Batch ${currentBatchNum} failed:`, error.message);
                                     throw error;
                                 }
                                 
                                 // Only log every 10 batches
                                 if (currentBatchNum % 10 === 0) {
-                                    console.log(`✅ Updated images for batch ${currentBatchNum} (${batch.length} rows)`);
+                                    console.log(`✅ Uploaded batch ${currentBatchNum} (${batch.length} rows)`);
                                 }
                             } catch (error) {
                                 console.error(`❌ Upload error at batch ${currentBatchNum}:`, error);
@@ -728,22 +722,16 @@ async function uploadJsonlToSupabase(path: string) {
             
             const uploadPromise = (async () => {
                 try {
-                    // TEMPORARY: Only update images column
-                    const imageUpdates = batch.map(row => ({
-                        barcode: row.barcode,
-                        images: row.images
-                    }));
-                    
                     const { error } = await supabase
                         .from('inventory_cache')
-                        .upsert(imageUpdates, { onConflict: 'barcode', ignoreDuplicates: false });
+                        .upsert(batch, { onConflict: 'barcode' });
                     
                     if (error) {
-                        console.error(`❌ Final batch ${currentBatchNum} failed:`, JSON.stringify(error));
+                        console.error(`❌ Final batch ${currentBatchNum} failed:`, error.message);
                         throw error;
                     }
                     
-                    console.log(`✅ Updated images for final batch ${currentBatchNum} (${batch.length} rows)`);
+                    console.log(`✅ Uploaded final batch ${currentBatchNum} (${batch.length} rows)`);
                 } catch (error) {
                     console.error(`❌ Upload error at final batch ${currentBatchNum}:`, error);
                     throw error;
@@ -760,7 +748,7 @@ async function uploadJsonlToSupabase(path: string) {
             console.log(`✅ All uploads completed!`);
         }
         
-        console.log(`✅ Image update complete! ${total} rows processed in ${batchCount} batches`);
+        console.log(`✅ Upload complete! ${total} rows processed in ${batchCount} batches`);
         
     } finally {
         file.close();
@@ -850,7 +838,7 @@ Examples:
             return;
         }
     }
-    console.log("\nUploading to Supabase (TEMPORARY: only updating images column)...");
+    console.log("\nUploading to Supabase (batched upserts)...");
     const start = Date.now();
     await uploadJsonlToSupabase(OUTPUT_PATH);
     const elapsed = (Date.now() - start) / 1000;

From 9ac3c8df366a9ecfed20dcf90623681f48949776 Mon Sep 17 00:00:00 2001
From: justanotheratom <sanketpatel.1805@gmail.com>
Date: Thu, 2 Oct 2025 10:18:56 +0530
Subject: [PATCH 21/21] refactor: store raw image metadata instead of
 constructed URLs

- Change ImageMetadata type to minimal structure (type, language, imgid, sizes[])
- Extract all image types: front, ingredients, nutrition, packaging
- Extract images for all languages (en, fr, de, es, it, pt, nl, pl, ru, ja, zh, etc.)
- Remove URL construction logic (moved to runtime in inventory.ts)
- Remove unused helper functions: isValidImageUrl, validateImageUrlExists, barcodeToPath
- Add comprehensive documentation about image storage strategy
- Store only language keys, drop numeric keys to reduce storage by ~60%

Image URLs will be constructed at runtime with smart selection:
- Prefer English, fallback to other languages
- Prefer 400px (medium), fallback to next available size
- Return one image per type (front, ingredients, nutrition, packaging)
---
 local/openfoodfacts/off_ingest.ts | 205 ++++++++++--------------------
 1 file changed, 65 insertions(+), 140 deletions(-)

diff --git a/local/openfoodfacts/off_ingest.ts b/local/openfoodfacts/off_ingest.ts
index 2e9040d..cd88ce4 100644
--- a/local/openfoodfacts/off_ingest.ts
+++ b/local/openfoodfacts/off_ingest.ts
@@ -1,6 +1,15 @@
 // deno run -A --unstable-kv local/openfoodfacts/off_ingest.ts
 // Environment: Copy .env.template to .env and fill in your values
 // Performance: Use --unstable-kv for better memory management
+//
+// IMAGE STORAGE STRATEGY:
+// This script extracts raw image metadata (language keys only) from Open Food Facts
+// and stores it in the database. Image URLs are NOT constructed here.
+// At runtime, server code will:
+//   - Select appropriate images based on type (front, ingredients, nutrition, packaging)
+//   - Prefer English, fallback to other languages
+//   - Prefer medium resolution (400px), fallback to next available size
+//   - Construct URLs using: https://static.openfoodfacts.org/images/products/{barcodePath}/{imgId}.{size}.jpg
 
 // Load environment variables from .env file
 async function loadEnv() {
@@ -30,11 +39,11 @@ type Ingredient = {
     ingredients?: Ingredient[];
 };
 
-type Image = { 
-    url: string; 
-    resolution?: string; 
-    width?: number; 
-    height?: number; 
+type ImageMetadata = {
+    type: 'front' | 'ingredients' | 'nutrition' | 'packaging';
+    language: string;  // e.g., 'en', 'fr', 'de'
+    imgid: string;     // numeric image ID (e.g., '1', '2', '3')
+    sizes: ('full' | '400' | '200' | '100')[];  // available sizes
 };
 
 type CacheRow = {
@@ -43,7 +52,7 @@ type CacheRow = {
     brand?: string;
     name?: string;
     ingredients: Ingredient[];
-    images: Image[];
+    images: ImageMetadata[];
     off_last_modified_t?: number;
 };
 
@@ -69,158 +78,74 @@ function mapIngredient(node: any): Ingredient {
     return item;
 }
 
-function isValidImageUrl(url: string): boolean {
-    try {
-        const parsedUrl = new URL(url);
-        return parsedUrl.protocol === 'https:' && 
-               parsedUrl.hostname === 'static.openfoodfacts.org' &&
-               url.includes('/images/products/') &&
-               url.endsWith('.jpg');
-    } catch {
-        return false;
-    }
-}
-
-async function validateImageUrlExists(url: string): Promise<boolean> {
-    try {
-        const response = await fetch(url, { method: 'HEAD' });
-        return response.status === 200;
-    } catch {
-        return false;
-    }
-}
-
-// Convert barcode to Open Food Facts path format
-// Example: "3017620422003" -> "301/762/042/2003"
-// Example: "1" -> "1"
-function barcodeToPath(barcode: string): string {
-    // Short barcodes (8 digits or fewer) are used as-is
-    if (barcode.length <= 8) {
-        return barcode;
-    }
-    // Longer barcodes are padded to at least 13 digits and split
-    const code = barcode.padStart(13, '0');
-    // Split into segments of 3 digits, except the last part
-    const segments: string[] = [];
-    for (let i = 0; i < code.length - 4; i += 3) {
-        segments.push(code.slice(i, i + 3));
-    }
-    segments.push(code.slice(code.length - 4)); // Last 4 digits
-    return segments.join('/');
-}
-
-function extractDisplayImageUrls(images: any, barcode: string): Image[] {
+/**
+ * Extract image metadata from Open Food Facts product data.
+ * Stores only language keys (front_en, ingredients_fr, etc.) - drops numeric keys.
+ * URL construction happens at runtime in server code.
+ * 
+ * RUNTIME URL CONSTRUCTION (for server code):
+ * 
+ * function barcodeToPath(barcode: string): string {
+ *     if (barcode.length <= 8) return barcode;
+ *     const code = barcode.padStart(13, '0');
+ *     const segments: string[] = [];
+ *     for (let i = 0; i < code.length - 4; i += 3) {
+ *         segments.push(code.slice(i, i + 3));
+ *     }
+ *     segments.push(code.slice(code.length - 4));
+ *     return segments.join('/');
+ * }
+ * 
+ * function constructImageUrl(barcode: string, imgId: string, size?: '400' | '200' | '100'): string {
+ *     const path = barcodeToPath(barcode);
+ *     const sizeStr = size ? `.${size}` : '';
+ *     return `https://static.openfoodfacts.org/images/products/${path}/${imgId}${sizeStr}.jpg`;
+ * }
+ */
+function extractDisplayImageUrls(images: any, _barcode: string): ImageMetadata[] {
     if (!images || typeof images !== "object") {
         return [];
     }
     
-    const urls: Image[] = [];
-    const processedImages = new Set<string>(); // Track processed image IDs to avoid duplicates
-    const barcodePath = barcodeToPath(barcode);
+    const metadata: ImageMetadata[] = [];
     
     try {
-        // Open Food Facts image structure: images contains both numeric keys (1,2,3,4) and language-specific keys (front_en, ingredients_fr, etc.)
-        // Language-specific keys reference numeric images via imgid
+        // Image types we care about
+        const imageTypes = ['front', 'ingredients', 'nutrition', 'packaging'];
         
-        // First, collect all language-specific front images
-        const languageKeys = ['front_en', 'front_fr', 'front_de', 'front_es', 'front_it', 'front_pt', 'front_nl', 'front_sv', 'front_da', 'front_no', 'front_fi'];
+        // Common languages (ordered by priority)
+        const languages = ['en', 'fr', 'de', 'es', 'it', 'pt', 'nl', 'pl', 'ru', 'ja', 'zh', 'sv', 'da', 'no', 'fi'];
         
-        for (const langKey of languageKeys) {
-            const imageRef = images[langKey];
-            if (imageRef && typeof imageRef === "object" && imageRef.imgid) {
-                const imgId = String(imageRef.imgid); // Ensure imgid is a string
-                if (processedImages.has(imgId)) continue; // Skip if already processed
+        // Extract all language-specific keys for each image type
+        for (const imageType of imageTypes) {
+            for (const lang of languages) {
+                const key = `${imageType}_${lang}`;
+                const imageRef = images[key];
                 
-                const imageData = images[imgId];
-                if (imageData && typeof imageData === "object" && imageData.sizes) {
-                    const sizes = imageData.sizes;
-                    
-                    // Collect all available sizes for this image, grouped by resolution
-                    const imageUrls: { url: string; resolution: string; width: number; height: number }[] = [];
-                    
-                    if (sizes.full && sizes.full.w && sizes.full.h) {
-                        const url = `https://static.openfoodfacts.org/images/products/${barcodePath}/${imgId}.jpg`;
-                        if (isValidImageUrl(url)) {
-                            imageUrls.push({ url, resolution: 'full', width: sizes.full.w, height: sizes.full.h });
-                        }
-                    }
-                    
-                    if (sizes["400"] && sizes["400"].w && sizes["400"].h) {
-                        const url = `https://static.openfoodfacts.org/images/products/${barcodePath}/${imgId}.400.jpg`;
-                        if (isValidImageUrl(url)) {
-                            imageUrls.push({ url, resolution: '400px', width: sizes["400"].w, height: sizes["400"].h });
-                        }
-                    }
+                if (imageRef && typeof imageRef === "object" && imageRef.imgid && imageRef.sizes) {
+                    // Collect which sizes are available
+                    const availableSizes: ('full' | '400' | '200' | '100')[] = [];
+                    if (imageRef.sizes.full) availableSizes.push('full');
+                    if (imageRef.sizes["400"]) availableSizes.push('400');
+                    if (imageRef.sizes["200"]) availableSizes.push('200');
+                    if (imageRef.sizes["100"]) availableSizes.push('100');
                     
-                    if (sizes["200"] && sizes["200"].w && sizes["200"].h) {
-                        const url = `https://static.openfoodfacts.org/images/products/${barcodePath}/${imgId}.200.jpg`;
-                        if (isValidImageUrl(url)) {
-                            imageUrls.push({ url, resolution: '200px', width: sizes["200"].w, height: sizes["200"].h });
-                        }
-                    }
-                    
-                    if (sizes["100"] && sizes["100"].w && sizes["100"].h) {
-                        const url = `https://static.openfoodfacts.org/images/products/${barcodePath}/${imgId}.100.jpg`;
-                        if (isValidImageUrl(url)) {
-                            imageUrls.push({ url, resolution: '100px', width: sizes["100"].w, height: sizes["100"].h });
-                        }
-                    }
-                    
-                    // Add all valid URLs for this image
-                    for (const img of imageUrls) {
-                        urls.push({ 
-                            url: img.url,
-                            resolution: img.resolution,
-                            width: img.width,
-                            height: img.height
+                    if (availableSizes.length > 0) {
+                        metadata.push({
+                            type: imageType as 'front' | 'ingredients' | 'nutrition' | 'packaging',
+                            language: lang,
+                            imgid: String(imageRef.imgid),
+                            sizes: availableSizes
                         });
                     }
-                    
-                    processedImages.add(imgId);
-                }
-            }
-        }
-        
-        // If no language-specific front images found, try any numeric image
-        if (urls.length === 0) {
-            for (const [key, imageData] of Object.entries(images)) {
-                if (/^\d+$/.test(key) && imageData && typeof imageData === "object" && (imageData as any).sizes) {
-                    const sizes = (imageData as any).sizes;
-                    
-                    // Collect all available sizes for this image
-                    if (sizes.full && sizes.full.w && sizes.full.h) {
-                        const url = `https://static.openfoodfacts.org/images/products/${barcodePath}/${key}.jpg`;
-                        if (isValidImageUrl(url)) {
-                            urls.push({ 
-                                url, 
-                                resolution: 'full', 
-                                width: sizes.full.w, 
-                                height: sizes.full.h 
-                            });
-                        }
-                    }
-                    
-                    if (sizes["400"] && sizes["400"].w && sizes["400"].h) {
-                        const url = `https://static.openfoodfacts.org/images/products/${barcodePath}/${key}.400.jpg`;
-                        if (isValidImageUrl(url)) {
-                            urls.push({ 
-                                url, 
-                                resolution: '400px', 
-                                width: sizes["400"].w, 
-                                height: sizes["400"].h 
-                            });
-                        }
-                    }
-                    
-                    if (urls.length > 0) break; // Stop after finding the first valid image
                 }
             }
         }
     } catch (_error) {
-        // ignore malformed structures
+        // Ignore malformed structures
     }
     
-    return urls;
+    return metadata;
 }
 
 function mapToCacheRow(product: any): CacheRow | null {