import os import requests import pandas as pd import psycopg2 from io import StringIO from datetime import datetime DATABASE_URL = os.getenv("DATABASE_URL") # EPA resmi dataset URL CSV_URL = "https://www.fueleconomy.gov/feg/epadata/vehicles.csv" print("CSV indiriliyor...") response = requests.get(CSV_URL) response.raise_for_status() print("CSV memory'e alınıyor...") df = pd.read_csv(StringIO(response.text), low_memory=False) print("Filtreleme başlıyor...") # 2000 sonrası df = df[df["year"] >= 2000] df = df[[ "make", "model", "year", "fuelType1", "trany", "cylinders", "comb08" ]] df = df.dropna(subset=["make", "model", "year"]) df = df.drop_duplicates(subset=["make", "model", "year", "fuelType1", "trany"]) # MPG -> L/100km df["avgConsumption"] = df["comb08"].apply( lambda mpg: 235.214 / mpg if mpg and mpg > 0 else None ) df["createdAt"] = datetime.utcnow() df = df.rename(columns={ "make": "brand", "fuelType1": "fuelType", "trany": "transmission", "comb08": "combinationMpg" }) df = df[[ "brand", "model", "year", "fuelType", "transmission", "cylinders", "combinationMpg", "avgConsumption", "createdAt" ]] print("DB bağlantısı kuruluyor...") conn = psycopg2.connect(DATABASE_URL) cur = conn.cursor() print("Eski veriler temizleniyor...") cur.execute('TRUNCATE TABLE "CarLibrary";') conn.commit() print("COPY başlıyor...") buffer = StringIO() df.to_csv(buffer, index=False, header=False) buffer.seek(0) cur.copy_expert( """ COPY "CarLibrary" (brand, model, year, fuelType, transmission, cylinders, combinationMpg, avgConsumption, createdAt) FROM STDIN WITH CSV """, buffer ) conn.commit() cur.close() conn.close() print("🚀 IMPORT TAMAMLANDI.")