from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout
import time
import re
import math
import requests
from django.conf import settings 
from datetime import timedelta
from django.utils.timezone import now

def normalize_jd_experience(text):
        """
        Normalize JD experience into (min_years, max_years)
        """
        if not text:
            return None, None

        text = text.lower().strip()
        text = re.sub(r"(years|year|yrs|yr)", "", text).strip()

        # 1️⃣ Range: 2-4
        match = re.search(r"(\d+(\.\d+)?)\s*-\s*(\d+(\.\d+)?)", text)
        if match:
            return float(match.group(1)), float(match.group(3))

        # 2️⃣ Plus: 2+
        match = re.search(r"(\d+(\.\d+)?)\s*\+", text)
        if match:
            return float(match.group(1)), math.inf

        # 3️⃣ Over / More than
        match = re.search(r"(over|more than)\s*(\d+(\.\d+)?)", text)
        if match:
            return float(match.group(2)), math.inf

        # 4️⃣ Within / Up to
        match = re.search(r"(within|upto|up to)\s*(\d+(\.\d+)?)", text)
        if match:
            return 0.0, float(match.group(2))

        # 5️⃣ Single value: 2 or 2.4 → (2,3)
        match = re.search(r"(\d+(\.\d+)?)", text)
        if match:
            min_val = int(float(match.group(1)))
            return min_val, min_val + 1

        return None, None

######################### AUTO RECRUITER #######################
# def run_scraper(username, password):

#     with sync_playwright() as p:

#         browser = p.chromium.launch(headless=False)
#         page = browser.new_page()

#         page.goto(
#             "https://auto-recruiter.itconnectus.com/authlogin",
#             wait_until="networkidle"
#         )

#         # Fill login form
#         page.fill("#email-login", username)
#         page.fill("#password-login", password)

#         page.get_by_role("button", name="Sign In").click()

#         try:
#             # Wait for element that appears after login
#             page.wait_for_selector("text=Dashboard", timeout=10000)

#             login_status = "Login Successful"

#         except PlaywrightTimeout:
#             login_status = "Login Failed"

#         browser.close()

#     return login_status


######################### LINKEDIN #######################
# def run_scraper(username, password):

#     with sync_playwright() as p:

#         browser = p.chromium.launch(headless=False)
#         page = browser.new_page()

#         page.goto(
#             "https://www.linkedin.com/login-cap",
#             wait_until="networkidle"
#         )

#         # Fill login form
#         page.fill("#username", username)
#         page.fill("#password", password)

#         page.get_by_role("button", name="Sign in").click()

#         try:
#             # Wait for element that appears after login
#             page.wait_for_selector("text=Dashboard", timeout=10000)

#             login_status = "Login Successful"

#         except PlaywrightTimeout:
#             login_status = "Login Failed"

#         browser.close()

#     return login_status

######################### FOUNDIT #######################
def run_scraper_foundit(jd_data, candidate_data, token):
    with sync_playwright() as p:

        browser = p.chromium.launch_persistent_context(
            user_data_dir="profile_data",
            headless=False
        )

        page = browser.pages[0]

        try:
            page.goto(
                "https://recruiter.foundit.sg/edge/recruiter-dashboard",
                wait_until="domcontentloaded"
            )

            try:
                page.wait_for_selector("#login_username", timeout=5000)
                page.wait_for_url("**/recruiter-dashboard**", timeout=600000)
            except PlaywrightTimeout:
                page.wait_for_url("**/recruiter-dashboard**", timeout=600000)

            page.wait_for_load_state("networkidle")
            page.wait_for_selector("text=Search Candidates", timeout=15000)
            page.locator("text=Search Candidates").click()
            page.locator(".qs_advance_search button:has-text('Advance Search')").click()

            page.wait_for_selector(".SearchForm_label-inputTuning", timeout=15000)
            page.locator(".SearchForm_label-inputTuning").click()

            page.wait_for_selector(".public-DraftEditor-content", timeout=15000)
            editor = page.locator(".public-DraftEditor-content")
            editor.click()

            keyword_query = ""

            jd_search_patterns = jd_data.search_pattern
            primary = jd_search_patterns["primary_skills"]
            secondary = jd_search_patterns["secondary_skills"]
            tools = jd_search_patterns["tools_and_frameworks"]

            secondary = [s for s in secondary if s not in primary]
            tools = [t for t in tools if t not in primary and t not in secondary]

            max_len = 300
            primary_part = " AND ".join(f'"{p}"' for p in primary)
            query = primary_part + " AND ("
            current_len = len(query)

            items = secondary + tools
            selected_items = []

            for skill in items:
                formatted = f'"{skill}"'
                
                addition = formatted if not selected_items else ", " + formatted
                
                if current_len + len(addition) + 1 > max_len:
                    break
                
                selected_items.append(formatted)
                current_len += len(addition)

            secondary_tools_group = ", ".join(selected_items)
            keyword_query = f'{primary_part} AND ({secondary_tools_group})'

            # second_keyword_query = ""
            # secondary_part = " AND ".join(f'"{s}"' for s in secondary)
            # tools_group = ", ".join(f'"{t}"' for t in tools)

            # second_keyword_query = f'{primary_part} AND {secondary_part} AND ({tools_group})'

            editor.type(keyword_query)

            jd_exp = jd_data.years_of_experience
            if jd_exp != "Not Specified":
                jd_min, jd_max = normalize_jd_experience(jd_exp)
                jd_min = math.floor(jd_min) if jd_min is not None else None
                jd_max = math.ceil(jd_max) if jd_max is not None else None

                exp_min_container = page.locator(
                    ".filters__container__filter__title:has-text('Experience (Minimum)')"
                ).locator("..")

                exp_min_container.locator(".dwn_arrow").click()
                exp_min_container.locator(".filters__container__range-dropdown__options").wait_for(timeout=15000)
                min_options = page.locator(".filters__container__range-dropdown__option")
                if jd_min is None or jd_min < 0 or jd_min > 50:
                    min_options.first.click()
                else:
                    page.locator(
                        f".filters__container__range-dropdown__option:text-is('{jd_min} Years')"
                    ).click()


                exp_max_container = page.locator(
                    ".filters__container__filter__title:has-text('Experience (Maximum)')"
                ).locator("..")

                exp_max_container.locator(".dwn_arrow").click()
                exp_max_container.locator(".filters__container__range-dropdown__options").wait_for(timeout=15000)
                max_options = page.locator(".filters__container__range-dropdown__option")
                if jd_max is None or jd_max < 0 or jd_max > 50:
                    max_options.last.click()
                else:
                    page.locator(
                        f".filters__container__range-dropdown__option:text-is('{jd_max} Years')"
                    ).click()

            page.locator(".Search_LeftStickyFoot-filters button:has-text('Search Candidates')").click()

            page.wait_for_url("**/search-results**")
            page.wait_for_selector(".p-card_body_rt_profileContainer", timeout=30000)            

            processed_profiles = 0

            while processed_profiles < 5:
                previous_count = 0
                while True:
                    cards = page.locator(".p-card_body_rt_profileContainer")
                    current_count = cards.count()

                    if current_count == previous_count:
                        print("All candidates loaded on this page")
                        break

                    previous_count = current_count
                    page.mouse.wheel(0, 1800)
                    page.wait_for_timeout(2000)

                cards = page.locator(".p-card_body_rt_profileContainer")
                card_count = cards.count()

                for i in range(card_count):

                    if processed_profiles >= 5:
                        break

                    card = cards.nth(i)

                    try:
                        resume_button = card.locator("button:has-text('Resume')")
                        resume_button.wait_for(timeout=5000)
                    except:
                        continue

                    try:
                        with page.expect_popup() as popup_info:
                            resume_button.click()

                        resume_page = popup_info.value
                        resume_page.wait_for_load_state()
                        resume_page.wait_for_timeout(2000)

                    except:
                        print("Popup failed")
                        continue

                    try:
                        email_value = (
                            resume_page
                            .locator(".rec-cdp_contactValueBold")
                            .last
                            .locator("span")
                            .inner_text()
                            .strip()
                            .lower()
                        )
                    except:
                        resume_page.close()
                        continue

                    if "..." in email_value:
                        resume_page.close()
                        continue

                    if email_value in candidate_data:
                        updated_dt = candidate_data[email_value]
                        six_months_ago = now() - timedelta(days=180)

                        if updated_dt >= six_months_ago:
                            resume_page.close()
                            continue

                    try:
                        with resume_page.expect_download() as download_info:
                            resume_page.locator(
                                ".rec-cdp_attatchedResume_links button:has-text('Download')"
                            ).click()

                        download = download_info.value
                        file_path = download.path()

                    except:
                        resume_page.close()
                        continue

                    candidate_resume_upload_api_url = (
                        f"{settings.DJANGO_ORIGIN_LINK}"
                        "upload_candidate_resume/check_resume_similarity/"
                    )

                    headers = {"Authorization": token}

                    with open(file_path, "rb") as f:
                        files = {"file": (download.suggested_filename, f)}

                        response = requests.post(
                            candidate_resume_upload_api_url,
                            files=files,
                            headers=headers
                        )

                    if response.status_code != 200:
                        print("Upload check failed:", response.text)
                        resume_page.close()
                        continue

                    data = response.json()

                    candidate_exists = data.get("candidate_exists")
                    has_existing_resume = data.get("has_existing_resume")
                    temp_file_name = data.get("temp_file_name")
                    db_data = data.get("db_data")
                    action_required = data.get("action_required")

                    if not candidate_exists or not has_existing_resume or action_required == "new":
                        choice_value = "new"
                    else:
                        choice_value = "update"

                    confirm_upload_api_url = (
                        f"{settings.DJANGO_ORIGIN_LINK}"
                        "upload_candidate_resume/upload_resume_confirm/"
                    )

                    confirm_payload = {
                        "temp_file_name": temp_file_name,
                        "choice": choice_value,
                        "db_data": db_data,
                        "source": "foundit"
                    }

                    confirm_response = requests.post(
                        confirm_upload_api_url,
                        json=confirm_payload,
                        headers=headers
                    )

                    if confirm_response.status_code not in [200, 201]:
                        print("Resume save failed")
                    else:
                        matched_profile_reset_api_url = (
                            f"{settings.DJANGO_ORIGIN_LINK}matched-profiles/"
                        )

                        matched_payload = {
                            "jd_id": jd_data.jd_id,
                            "regen_filter": True,
                            "eligible_search_params": [
                                "years_of_experience",
                                "keywords",
                                "primary_skills",
                                "responsibilities",
                                "tools_and_frameworks",
                                "secondary_skills",
                                "domain_requirements",
                                "education",
                                "location",
                            ]
                        }

                        requests.post(
                            matched_profile_reset_api_url,
                            json=matched_payload,
                            headers=headers
                        )

                        processed_profiles += 1

                    resume_page.close()

                if processed_profiles < 5:

                    next_btn = page.locator("button[aria-label='Next Page']").first

                    if next_btn.count() == 0 or next_btn.is_disabled():
                        print("No more pages available")
                        break

                    next_btn.click()

                    page.wait_for_function(
                        "() => document.querySelectorAll('.p-card_body_rt_profileContainer').length > 0"
                    )

                    page.wait_for_timeout(3000)

            time.sleep(30)
            response_status = "Extracted Successfully"

        except PlaywrightTimeout:

            print("Timeout occurred during process")
            response_status = "Extraction Failed"

        except Exception as e:

            print("Unexpected error:", str(e))
            response_status = "Extraction Failed"

        browser.close()

    return response_status

def run_scraper_naukri(jd_data, candidate_data, token):
    with sync_playwright() as p:

        browser = p.chromium.launch_persistent_context(
            user_data_dir="profile_data",
            headless=False
        )

        page = browser.pages[0]

        try:
            page.goto(
                "https://recruit.naukri.com/",
                wait_until="domcontentloaded"
            )

            login_button = page.get_by_role("button", name="Register/Log in")

            if login_button.count() > 0:
                login_button.click()
                page.wait_for_url("**/recruit.naukri**", timeout=600000)
            else:
                page.wait_for_url("**/recruit.naukri**", timeout=600000)

            page.wait_for_load_state("networkidle")

            resdex_menu = page.locator(".xzaIN:has-text('Resdex')")
            resdex_menu.wait_for(timeout=10000)
            resdex_menu.hover()

            page.wait_for_timeout(5000)
            page.wait_for_selector(".I3sTS:has-text('Search Resumes')", timeout=5000)
            page.locator(".I3sTS:has-text('Search Resumes')").click()

            page.wait_for_selector(".ts-slider__button", timeout=15000)
            page.locator(".ts-slider__button").first.click()

            page.wait_for_selector("input[name='boolKeywords']", timeout=15000)
            editor = page.locator("input[name='boolKeywords']")
            editor.click()

            keyword_query = ""

            jd_search_patterns = jd_data.search_pattern
            primary = jd_search_patterns["primary_skills"]
            secondary = jd_search_patterns["secondary_skills"]
            tools = jd_search_patterns["tools_and_frameworks"]

            secondary = [s for s in secondary if s not in primary]
            tools = [t for t in tools if t not in primary and t not in secondary]

            max_len = 300
            primary_part = " AND ".join(f'"{p}"' for p in primary)
            query = primary_part + " AND ("
            current_len = len(query)

            items = secondary + tools
            selected_items = []

            for skill in items:
                formatted = f'"{skill}"'
                
                addition = formatted if not selected_items else ", " + formatted
                
                if current_len + len(addition) + 1 > max_len:
                    break
                
                selected_items.append(formatted)
                current_len += len(addition)

            secondary_tools_group = ", ".join(selected_items)
            keyword_query = f'{primary_part} AND ({secondary_tools_group})'

            editor.type(keyword_query)

            jd_exp = jd_data.years_of_experience
            if jd_exp != "Not Specified":
                jd_min, jd_max = normalize_jd_experience(jd_exp)
                jd_min = math.floor(jd_min) if jd_min is not None else None
                jd_max = math.ceil(jd_max) if jd_max is not None else None

                exp_min_container = page.locator(
                    "input[name='minExp']"
                )
                exp_min_container.click()
                exp_min_container.type(str(jd_min), delay=100)

                exp_max_container = page.locator(
                    "input[name='maxExp']"
                )
                exp_max_container.click()
                exp_max_container.type(str(jd_max), delay=100)

            page.locator("#adv-search-btn").click()

            page.wait_for_url("**/search**")
            page.wait_for_selector(".candidate-headline", timeout=30000)            

            processed_profiles = 0

            while processed_profiles < 5:
                cards = page.locator(".candidate-headline")
                card_count = cards.count()

                for i in range(card_count):

                    if processed_profiles >= 5:
                        break

                    card = cards.nth(i)

                    try:
                        resume_button = card.locator("a")
                        resume_button.wait_for(timeout=5000)
                    except:
                        continue

                    try:
                        with page.expect_popup() as popup_info:
                            resume_button.click()

                        resume_page = popup_info.value
                        resume_page.wait_for_load_state()
                        resume_page.wait_for_timeout(2000)

                    except:
                        print("Popup failed")
                        continue

                    try:
                        email_value = (
                            resume_page
                            .locator(".rL5xY")
                            .first
                            .locator("span")
                            .inner_text()
                            .strip()
                            .lower()
                        )
                    except:
                        resume_page.close()
                        continue

                    if "..." in email_value:
                        resume_page.close()
                        continue

                    if email_value in candidate_data:
                        updated_dt = candidate_data[email_value]
                        six_months_ago = now() - timedelta(days=180)

                        if updated_dt >= six_months_ago:
                            resume_page.close()
                            continue

                    try:
                        with resume_page.expect_download() as download_info:
                            resume_page.locator(
                                ".LS1P2"
                            ).first.click()

                        download = download_info.value
                        file_path = download.path()

                    except:
                        resume_page.close()
                        continue

                    candidate_resume_upload_api_url = (
                        f"{settings.DJANGO_ORIGIN_LINK}"
                        "upload_candidate_resume/check_resume_similarity/"
                    )

                    headers = {"Authorization": token}

                    with open(file_path, "rb") as f:
                        files = {"file": (download.suggested_filename, f)}

                        response = requests.post(
                            candidate_resume_upload_api_url,
                            files=files,
                            headers=headers
                        )

                    if response.status_code != 200:
                        print("Upload check failed:", response.text)
                        resume_page.close()
                        continue

                    data = response.json()

                    candidate_exists = data.get("candidate_exists")
                    has_existing_resume = data.get("has_existing_resume")
                    temp_file_name = data.get("temp_file_name")
                    db_data = data.get("db_data")
                    action_required = data.get("action_required")

                    if not candidate_exists or not has_existing_resume or action_required == "new":
                        choice_value = "new"
                    else:
                        choice_value = "update"

                    confirm_upload_api_url = (
                        f"{settings.DJANGO_ORIGIN_LINK}"
                        "upload_candidate_resume/upload_resume_confirm/"
                    )

                    confirm_payload = {
                        "temp_file_name": temp_file_name,
                        "choice": choice_value,
                        "db_data": db_data,
                        "source": "naukri"
                    }

                    confirm_response = requests.post(
                        confirm_upload_api_url,
                        json=confirm_payload,
                        headers=headers
                    )

                    if confirm_response.status_code not in [200, 201]:
                        print("Resume save failed")
                    else:
                        matched_profile_reset_api_url = (
                            f"{settings.DJANGO_ORIGIN_LINK}matched-profiles/"
                        )

                        matched_payload = {
                            "jd_id": jd_data.jd_id,
                            "regen_filter": True,
                            "eligible_search_params": [
                                "years_of_experience",
                                "keywords",
                                "primary_skills",
                                "responsibilities",
                                "tools_and_frameworks",
                                "secondary_skills",
                                "domain_requirements",
                                "education",
                                "location",
                            ]
                        }

                        requests.post(
                            matched_profile_reset_api_url,
                            json=matched_payload,
                            headers=headers
                        )

                        processed_profiles += 1

                    resume_page.close()

                if processed_profiles < 5:

                    next_btn = page.locator("button[data-testid='next-page']")

                    if next_btn.count() == 0 or next_btn.is_disabled():
                        print("No more pages available")
                        break

                    next_btn.click()

                    page.wait_for_function(
                        "() => document.querySelectorAll('.candidate-headline').length > 0"
                    )

                    page.wait_for_timeout(3000)

            response_status = "Extracted Successfully"

        except PlaywrightTimeout:

            print("Timeout occurred during process")
            response_status = "Extraction Failed"

        except Exception as e:

            print("Unexpected error:", str(e))
            response_status = "Extraction Failed"

        time.sleep(30)
        browser.close()

    return response_status