File size: 1,672 Bytes
22ad0ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import json
import os
from datetime import datetime
from dotenv import load_dotenv
load_dotenv()

from langchain_community.vectorstores import FAISS
from langchain_core.example_selectors import (
    SemanticSimilarityExampleSelector,
)
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Load topics from data file
with open(
    file="data/topics.json",
    encoding="utf-8"
) as f:
    data = json.load(f)

# Make sure each example is a dict with "topic" key (wrap as dict if plain string)
def format_examples(examples):
    formatted = []
    for ex in examples:
        if isinstance(ex, str):
            formatted.append({"topic": ex})
        elif isinstance(ex, dict) and "topic" in ex:
            formatted.append({"topic": ex["topic"]})
        else:
            formatted.append({"topic": str(ex)})
    return formatted

# topics.json should have a top-level "topics" key
examples = data.get("topics", [])
formatted_examples = format_examples(examples)

start_time = datetime.now()
example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples=formatted_examples,
    embeddings=GoogleGenerativeAIEmbeddings(
        model="models/text-embedding-004",
        api_key=os.getenv("GOOGLE_API_KEY")
    ),
    vectorstore_cls=FAISS,
    k=1,
    input_keys=["topic"],
)

# Example call to selector (for demonstration; remove in production)
result = example_selector.select_examples(
    {"topic": "people who are terminally ill and suffering greatly should have the right to end their own life if they so desire."}
)
print(result)
end_time = datetime.now()
print(f"Time taken: {(end_time - start_time).total_seconds()} seconds")