graph TD
Q["New Query"] --> E["Embed Query\n(sentence-transformers)"]
E --> S["Cosine Similarity Search\nvs cache entries"]
S -->|sim > 0.93| HIT["Cache HIT\nReturn stored response"]
S -->|sim ≤ 0.93| MISS["Cache MISS"]
MISS --> LLM["Call LLM API"]
LLM --> STORE["Store response + embedding\nin cache"]
STORE --> RESP["Return response"]
HIT --> RESP
style HIT fill:#00C9A7,stroke:#1C355E,color:#1C355E
style MISS fill:#FF7A5C,stroke:#1C355E,color:#1C355E
style LLM fill:#9B8EC0,stroke:#1C355E,color:#1C355E



