
import os
import json
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Paths
data_dir = os.path.join("sue_core", "sue_modules", "sue_news_index")
index_file = os.path.join("sue_core", "sue_modules", "vector.index")
metadata_file = os.path.join("sue_core", "sue_modules", "vector_metadata.json")

# Load model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Index structure
dimension = 384  # for MiniLM
index = faiss.IndexFlatL2(dimension)
metadata = []

# Load all summaries
def load_data():
    vectors = []
    for file_name in os.listdir(data_dir):
        if file_name.endswith(".json"):
            path = os.path.join(data_dir, file_name)
            with open(path, "r") as f:
                article = json.load(f)
                text = f"{article['title']}. {article['summary']}"
                embedding = model.encode([text])[0]
                vectors.append(embedding)
                metadata.append({
                    "title": article['title'],
                    "source": article['source'],
                    "link": article['link']
                })
    return np.array(vectors).astype("float32")

# Build vector index
data_vectors = load_data()
if len(data_vectors) > 0:
    index.add(data_vectors)
    faiss.write_index(index, index_file)
    with open(metadata_file, "w") as f:
        json.dump(metadata, f, indent=2)
    print(f"Vector memory index created with {len(metadata)} entries.")
else:
    print("No data found to index.")
