Files
aracvericek/import.py
mstfyldz 35c7bac556 main
2026-02-22 23:23:57 +03:00

92 lines
1.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import os
import requests
import pandas as pd
import psycopg2
from io import StringIO
from datetime import datetime
DATABASE_URL = os.getenv("DATABASE_URL")
# EPA resmi dataset URL
CSV_URL = "https://www.fueleconomy.gov/feg/epadata/vehicles.csv"
print("CSV indiriliyor...")
response = requests.get(CSV_URL)
response.raise_for_status()
print("CSV memory'e alınıyor...")
df = pd.read_csv(StringIO(response.text), low_memory=False)
print("Filtreleme başlıyor...")
# 2000 sonrası
df = df[df["year"] >= 2000]
df = df[[
"make",
"model",
"year",
"fuelType1",
"trany",
"cylinders",
"comb08"
]]
df = df.dropna(subset=["make", "model", "year"])
df = df.drop_duplicates(subset=["make", "model", "year", "fuelType1", "trany"])
# MPG -> L/100km
df["avgConsumption"] = df["comb08"].apply(
lambda mpg: 235.214 / mpg if mpg and mpg > 0 else None
)
df["createdAt"] = datetime.utcnow()
df = df.rename(columns={
"make": "brand",
"fuelType1": "fuelType",
"trany": "transmission",
"comb08": "combinationMpg"
})
df = df[[
"brand",
"model",
"year",
"fuelType",
"transmission",
"cylinders",
"combinationMpg",
"avgConsumption",
"createdAt"
]]
print("DB bağlantısı kuruluyor...")
conn = psycopg2.connect(DATABASE_URL)
cur = conn.cursor()
print("Eski veriler temizleniyor...")
cur.execute('TRUNCATE TABLE "CarLibrary";')
conn.commit()
print("COPY başlıyor...")
buffer = StringIO()
df.to_csv(buffer, index=False, header=False)
buffer.seek(0)
cur.copy_expert(
"""
COPY "CarLibrary"
(brand, model, year, fuelType, transmission, cylinders,
combinationMpg, avgConsumption, createdAt)
FROM STDIN WITH CSV
""",
buffer
)
conn.commit()
cur.close()
conn.close()
print("🚀 IMPORT TAMAMLANDI.")