from rest_framework.decorators import api_view, permission_classes
from rest_framework.permissions import AllowAny
from serp.models import Keyword, Groups, Competitors
from django.http import JsonResponse
import requests
import json
import os
from django.conf import settings
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from datetime import date
import time
import gc
API_KEY = "67d1a58f740bc0b8bfd018ac"
URL = "https://api.scrapingdog.com/google/"
KEYWORD_THREAD_LIMIT = 5 
# RAPID_API_KEY = "f6d117df33msh3da512288c98c0cp17197ajsn973cb4ee4f5f"


def save_json_file(data, filename="data.json"):
    # Define the file path inside Django's media directory
    file_path = os.path.join(settings.MEDIA_ROOT, filename)

    # Ensure the directory exists
    os.makedirs(os.path.dirname(file_path), exist_ok=True)

    # Write data to JSON file
    with open(file_path, "w", encoding="utf-8") as json_file:
        json.dump(data, json_file, indent=4)

    return file_path


def get_latest_rank_trend(rank_history):
    try:
        """
        Compares the two latest entries in rank_history and returns 'up', 'down', or 'same'.
        Assumes lower rank is better.
        """

        if not isinstance(rank_history, list) or len(rank_history) < 2:
            return "NA"

        # Sort rank history by year, month, date
        sorted_history = sorted(rank_history, key=lambda x: (x["year"], x["month"], x["date"]))

        # Get the last two records (latest two dates)
        latest_two = sorted_history[-2:]

        current = latest_two[1]["rank"]
        previous = latest_two[0]["rank"]

        if current < previous:
            return "up"
        elif current > previous:
            return "down"
        else:
            return "same"
    except Exception as e:
        print(str(e))
        return "NA"


def remove_duplicate_ranks_by_date(rank_history):
    unique = {}
    for entry in rank_history:
        key = (entry["year"], entry["month"], entry["date"])
        unique[key] = entry
    return list(unique.values())


# def get_traffic_monthly_avg(site_url, max_attempts=3):
#     url = "https://ahrefs2.p.rapidapi.com/traffic"
#     headers = {"x-rapidapi-key": RAPID_API_KEY, "x-rapidapi-host": "ahrefs2.p.rapidapi.com"}
#     params = {"url": site_url, "mode": "exact"}

#     for attempt in range(1, max_attempts + 1):
#         try:
#             response = requests.get(url, headers=headers, params=params, timeout=60)
#             response.raise_for_status()
#             data = response.json()
#             print(data.get("trafficMonthlyAvg", 0))
#             return data.get("trafficMonthlyAvg", 0)
#         except requests.exceptions.RequestException as e:
#             print(f"[Attempt {attempt}] Request failed for {site_url}: {e}")
#             if attempt < max_attempts:
#                 time.sleep(2 * attempt)  # exponential backoff
#         except ValueError as e:
#             print(f"[Attempt {attempt}] Invalid JSON response for {site_url}: {e}")
#             break  # No point retrying if response is not JSON

#     return 0


def process_keyword(keyword, group_id):
    """Process a single keyword ranking check."""
    if keyword.track_status != "INIT":  # Skip processing if it's already completed or failed
        return
    
    # Change keyword status to SCHD before processing
    Keyword.objects.filter(id=keyword.id).update(track_status="SCHD")
    
    print(f"Started processing keyword {keyword.id} - '{keyword.keyword}' - {keyword.isocode}")
    
    
    params = {"api_key": API_KEY, "query": keyword.keyword, "results": 100, "country": keyword.isocode, "page": 0, "advance_search": "false"}

    try:
        today = date.today()
        response = requests.get(URL, params=params,timeout=10)
        print("Request finished")
        response.raise_for_status()
        data = response.json()
        save_json_file(data, f"jsonData_{group_id}_{keyword.id}.json")
        print("Organic results:", data.get("organic_results"))

        found = False
        for entry in data.get("organic_results", []):
          
            site_avg_traffic = 0
            if keyword.site_url in entry.get("link", ""):
                position = entry.get("rank")
                ranked_url = entry.get("link")
                if position is not None and ranked_url is not None:
                    rank_history = keyword.rank_history
                    rank_history.append(
                        {
                            "date": today.day,
                            "month": today.month,
                            "year": today.year,
                            "rank": position,
                        }
                    )
                    rank_history = remove_duplicate_ranks_by_date(rank_history)
                    rank_trend_now = get_latest_rank_trend(rank_history)
                    # site_avg_traffic = get_traffic_monthly_avg(ranked_url)
                    # Update the keyword status to COMP after processing
                    Keyword.objects.filter(id=keyword.id).update(rank_history=rank_history, ranknow=position, ranked_url=ranked_url, rank_trend=rank_trend_now, track_status="COMP")
                    Competitors.objects.filter(fk_group_id=int(keyword.fk_group_id)).update(track_status="INIT")
                    print(f"Keyword {keyword.id} updated successfully: rank {position}")
                    found = True
                    break

        if not found:
            # If no ranking was found, update status to FAIL
            Keyword.objects.filter(id=keyword.id).update(track_status="COMP")

    except Exception as e:
        print(str(e))
        # On request failure, update status to FAIL
        Keyword.objects.filter(id=keyword.id).update(track_status="FAIL")


# def process_group(group, sort_order):
#     """Process a single group and its keywords sequentially."""
#     group_id = group.id
#     print("Group id", group_id)
#     # Mark the group as scheduled (SCHD)
#     Groups.objects.filter(id=group_id).update(track_status="SCHD")

#     # Get keywords with INIT status and randomize their order
#     sort_order = "created_date" if int(sort_order) % 2 == 1 else "-created_date"
#     keywords = Keyword.objects.filter(fk_group_id=group_id, track_status="INIT").order_by(sort_order)
#     print("All matching keywords:", keywords)

#     if not keywords:
#         # If no keywords to process, mark the group as completed
#         uptrend_keywords = Keyword.objects.filter(fk_group_id=group_id, rank_trend="up").count()
#         downtrend_keywords = Keyword.objects.filter(fk_group_id=group_id, rank_trend="down").count()
#         Groups.objects.filter(id=group_id).update(track_status="COMP", uptrend_cnt=uptrend_keywords, downtrend_cnt=downtrend_keywords)
#         return

#     # Process the keywords concurrently
#     with ThreadPoolExecutor(max_workers=KEYWORD_THREAD_LIMIT) as executor:
#         futures = [executor.submit(process_keyword, keyword, group_id) for keyword in keywords]
#         for future in as_completed(futures):
#             try:
#                 future.result()  # Wait for all futures to complete
#             except Exception as e:
#                 print(f"[Keyword Error] {e}")

#     # Once all keywords are processed, mark group as completed (COMP)
#     uptrend_keywords = Keyword.objects.filter(fk_group_id=group_id, rank_trend="up").count()
#     downtrend_keywords = Keyword.objects.filter(fk_group_id=group_id, rank_trend="down").count()
#     Groups.objects.filter(id=group_id).update(track_status="COMP", uptrend_cnt=uptrend_keywords, downtrend_cnt=downtrend_keywords)


def chunkify(iterable, size):
    """Yield successive chunks from a list."""
    for i in range(0, len(iterable), size):
        yield iterable[i:i + size]
        
        
def process_group(group, sort_order):
    """Process a single group and its keywords in batches."""
    group_id = group.id
    print("Group id", group_id)
    # Mark the group as scheduled (SCHD)
    Groups.objects.filter(id=group_id).update(track_status="SCHD")
    sort_order = "created_date" if int(sort_order) % 2 == 1 else "-created_date"
    keywords = list(Keyword.objects.filter(fk_group_id=group_id, track_status="INIT").order_by(sort_order))
    print("Total keywords to process:", len(keywords))
    if not keywords:
        uptrend_keywords = Keyword.objects.filter(fk_group_id=group_id, rank_trend="up").count()
        downtrend_keywords = Keyword.objects.filter(fk_group_id=group_id, rank_trend="down").count()
        Groups.objects.filter(id=group_id).update(track_status="COMP", uptrend_cnt=uptrend_keywords, downtrend_cnt=downtrend_keywords)
        return
    BATCH_SIZE = 10  # You can tweak this depending on your memory usage
    for keyword_batch in chunkify(keywords, BATCH_SIZE):
        with ThreadPoolExecutor(max_workers=KEYWORD_THREAD_LIMIT) as executor:
            futures = [executor.submit(process_keyword, keyword, group_id) for keyword in keyword_batch]
            for future in as_completed(futures):
                try:
                    future.result()
                except Exception as e:
                    print(f"[Keyword Error] {e}")
        # :white_check_mark: Free memory after each batch
        del keyword_batch
        gc.collect()
    # Mark group as completed after all batches
    uptrend_keywords = Keyword.objects.filter(fk_group_id=group_id, rank_trend="up").count()
    downtrend_keywords = Keyword.objects.filter(fk_group_id=group_id, rank_trend="down").count()
    Groups.objects.filter(id=group_id).update(track_status="COMP", uptrend_cnt=uptrend_keywords, downtrend_cnt=downtrend_keywords)

@api_view(["GET"])
@permission_classes((AllowAny,))
def keywordRanker(request, sort_order):
    """Cron job to process one group at a time using track_status."""
    print(f"cron started--->")
    # Step 1: Prioritize group that is already in progress (SCHD)
    group = Groups.objects.filter(track_status="SCHD").first()
    if not group:
        # Step 2: If no group is currently processing, pick INIT group
        group = Groups.objects.filter(track_status="INIT").first()
        print(f"No group processing.")
        if group:
            Groups.objects.filter(id=group.id).update(track_status="SCHD")
            
    if not group:
        print(f"Current group: {group}")
        return JsonResponse({"status": "true", "message": "No groups to process"})

    try:
        process_group(group, sort_order)  # Process this group one by one
    except Exception as e:
        print(f"[Group Error] {group.id} - {e}")
        # If something goes wrong with a group, mark it as failed
        Groups.objects.filter(id=group.id).update(track_status="FAIL")
    return JsonResponse({"status": "true", "message": "Group processing completed"})



# @api_view(["GET"])
# @permission_classes((AllowAny,))
# def keywordRanker(request, sort_order):
    """
    Cron job to process *all* groups in INIT or SCHD state in one go.
    """
    print("cron started ---> processing all groups")

    # 1. Grab every group that still needs work
    groups = Groups.objects.filter(track_status__in=["INIT", "SCHD"]).order_by("id")

    if not groups.exists():
        return JsonResponse({"status": "true", "message": "No groups to process"})

    for group in groups:
        # If it's still INIT, mark it SCHD so process_group sees it as in‐progress
        if group.track_status == "INIT":
            Groups.objects.filter(id=group.id).update(track_status="SCHD")

        try:
            print(f"Processing group {group.id} (status={group.track_status})")
            process_group(group, sort_order)
        except Exception as e:
            print(f"[Group Error] {group.id} - {e}")
            Groups.objects.filter(id=group.id).update(track_status="FAIL")

    return JsonResponse({"status": "true", "message": "All groups processed"})
