import csv
import os
import psycopg2

# Function to parse genus accuracies from a CSV file
def parse_genus_accuracies(file_path):
    accuracies = {}
    with open(file_path, 'r') as file:
        reader = list(csv.reader(file))
        total_rows = len(reader)
        for row in reader[1:total_rows-3]:  # Skip header and last 3 rows
            name = row[0].strip()  # Genus name
            f1_score = round(float(row[3].strip()), 2)  # Assuming F1-score is in the 4th column
            accuracies[name] = f1_score
    return accuracies

# Function to get image counts for genera from the database
def get_genus_image_counts():
    image_counts = {}
    # Database connection details
    conn = psycopg2.connect(
        host="sp2.cs.vt.edu",
        database="pelagic",
        user="francesco",
        password="pelagicMaster"
    )
    cur = conn.cursor()
    
    # Query to get genus and their image counts
    cur.execute("""
        SELECT genus, COUNT(*)
        FROM training_copy
        WHERE genus IS NOT NULL
        GROUP BY genus
    """)
    
    rows = cur.fetchall()
    for row in rows:
        genus = row[0].strip()
        image_counts[genus] = row[1]
    
    cur.close()
    conn.close()
    
    return image_counts

# File paths
genus_metrics_file = '/home/spr/SDv4/metrics/classification_report.csv'

# Parse genus accuracies
genus_accuracies = parse_genus_accuracies(genus_metrics_file)

# Get image counts for genera from the database
genus_image_counts = get_genus_image_counts()

# Prepare data for CSV output, but only for genera present in the classification report
csv_data = []
for genus, accuracy in genus_accuracies.items():
    image_count = genus_image_counts.get(genus, 0)  # Use the correct image count from the database
    csv_data.append([genus, image_count, accuracy])

# Sort the data alphabetically by genus name
csv_data.sort(key=lambda x: x[0])

# Write CSV file
csv_file_path = 'sd_table_genus.csv'
with open(csv_file_path, 'w', newline='') as csv_file:
    writer = csv.writer(csv_file)
    # Write CSV headers
    writer.writerow(['Genus', 'Images', 'Accuracy'])
    # Write data to CSV
    writer.writerows(csv_data)

print(f"CSV file '{csv_file_path}' has been created.")
