""" PDF Generator — 1-Page ATS-Friendly Resume Uses fpdf2. Auto-sizes fonts to fit everything on exactly 1 page. ATS-safe: single column, standard fonts, no images/tables. """ import re import tempfile import logging from fpdf import FPDF logger = logging.getLogger(__name__) PAGE_W = 215.9 PAGE_H = 279.4 MARGIN_L = 15 MARGIN_R = 15 MARGIN_T = 12 MARGIN_B = 12 USABLE_W = PAGE_W - MARGIN_L - MARGIN_R def _sanitize_text(text: str) -> str: """Replace unicode characters with ASCII equivalents for PDF compatibility.""" replacements = { '\u2013': '-', # en-dash '\u2014': '-', # em-dash '\u2018': "'", # left single quote '\u2019': "'", # right single quote '\u201c': '"', # left double quote '\u201d': '"', # right double quote '\u2022': '-', # bullet '\u2023': '-', # triangular bullet '\u2027': '-', # hyphenation point '\u2043': '-', # hyphen bullet '\u25aa': '-', # small black square '\u25cf': '-', # black circle '\u25cb': '-', # white circle '\u25e6': '-', # white bullet '\u2026': '...', # ellipsis '\u00b7': '-', # middle dot '\u2219': '-', # bullet operator '\u00a0': ' ', # non-breaking space '\u200b': '', # zero-width space '\u200e': '', # left-to-right mark '\u200f': '', # right-to-left mark '\ufeff': '', # BOM '\u00e9': 'e', # e-acute '\u00e8': 'e', # e-grave '\u00f1': 'n', # n-tilde '\u00fc': 'u', # u-umlaut '\u00e4': 'a', # a-umlaut '\u00f6': 'o', # o-umlaut '\u00df': 'ss', # eszett } for orig, repl in replacements.items(): text = text.replace(orig, repl) # Remove any remaining non-ASCII that would break the font text = text.encode('ascii', 'replace').decode('ascii') return text def _parse_cv_text(cv_text: str) -> dict: """Parse CV text into name, contact, and ordered sections.""" cv_text = _sanitize_text(cv_text) lines = cv_text.strip().split('\n') section_keywords = [ 'professional summary', 'summary', 'profile', 'objective', 'experience', 'work experience', 'professional experience', 'skills', 'technical skills', 'core competencies', 'education', 'academic background', 'certifications', 'certificates', 'projects', 'key projects', 'awards', 'achievements', 'languages', 'volunteer', 'publications', 'research', ] sections = [] current_header = None current_lines = [] name_line = None contact_lines = [] for i, line in enumerate(lines): stripped = line.strip() if not stripped: if current_header: current_lines.append('') continue stripped_lower = stripped.lower().rstrip(':') is_header = False for kw in section_keywords: if stripped_lower == kw or stripped_lower.startswith(kw + ':'): if current_header: sections.append((current_header, '\n'.join(current_lines).strip())) current_header = kw.title() current_lines = [] after = stripped.split(':', 1) if len(after) > 1 and after[1].strip(): current_lines.append(after[1].strip()) is_header = True break if is_header: continue if stripped.isupper() and len(stripped) > 3: check = stripped.lower().rstrip(':') if check in section_keywords: if current_header: sections.append((current_header, '\n'.join(current_lines).strip())) current_header = check.title() current_lines = [] continue if i < 4 and not current_header: if not name_line and not re.search(r'[@|•\-·]|phone|email|linkedin', stripped, re.IGNORECASE): name_line = stripped continue if re.search(r'[@|•\-·]|phone|email|linkedin|github|\d{3}[-.\s]?\d{3}', stripped, re.IGNORECASE): contact_lines.append(stripped) continue if current_header: current_lines.append(stripped) elif not name_line: name_line = stripped else: current_lines.append(stripped) if current_header and current_lines: sections.append((current_header, '\n'.join(current_lines).strip())) return { 'name': name_line or '', 'contact': ' | '.join(contact_lines), 'sections': sections, } class ResumePDF(FPDF): def __init__(self, font_sizes=None): super().__init__() self.font_sizes = font_sizes or { 'name': 16, 'contact': 8, 'header': 10, 'body': 9, 'bullet': 9 } def _add_name(self, name): if not name: return self.set_font('Helvetica', 'B', self.font_sizes['name']) self.set_text_color(31, 73, 125) self.cell(USABLE_W, self.font_sizes['name'] * 0.5, name, align='C', new_x="LMARGIN", new_y="NEXT") self.ln(1) def _add_contact(self, contact): if not contact: return self.set_font('Helvetica', '', self.font_sizes['contact']) self.set_text_color(100, 100, 100) self.cell(USABLE_W, self.font_sizes['contact'] * 0.45, contact, align='C', new_x="LMARGIN", new_y="NEXT") self.ln(2) def _add_section_header(self, title): self.ln(1.5) self.set_font('Helvetica', 'B', self.font_sizes['header']) self.set_text_color(31, 73, 125) self.cell(USABLE_W, self.font_sizes['header'] * 0.45, title.upper(), new_x="LMARGIN", new_y="NEXT") y = self.get_y() self.set_draw_color(180, 180, 180) self.line(MARGIN_L, y, PAGE_W - MARGIN_R, y) self.ln(1.5) def _add_body_text(self, text): self.set_font('Helvetica', '', self.font_sizes['body']) self.set_text_color(50, 50, 50) self.multi_cell(USABLE_W, self.font_sizes['body'] * 0.45, text) self.ln(0.5) def _add_bullet(self, text): self.set_font('Helvetica', '', self.font_sizes['bullet']) self.set_text_color(50, 50, 50) indent = 4 self.set_x(MARGIN_L + indent) self.multi_cell(USABLE_W - indent, self.font_sizes['bullet'] * 0.42, f"- {text}") self.ln(0.3) def _add_subheader(self, text): self.set_font('Helvetica', 'B', self.font_sizes['body']) self.set_text_color(50, 50, 50) self.multi_cell(USABLE_W, self.font_sizes['body'] * 0.45, text) self.ln(0.3) def _add_section_content(self, content): for line in content.split('\n'): stripped = line.strip() if not stripped: continue if re.match(r'^[\-\*]\s', stripped): self._add_bullet(re.sub(r'^[\-\*]\s*', '', stripped)) elif re.match(r'^[A-Z].*\|', stripped) or (stripped.endswith(':') and len(stripped) < 80): self._add_subheader(stripped.rstrip(':')) else: self._add_body_text(stripped) def _build_pdf(parsed, font_sizes): pdf = ResumePDF(font_sizes=font_sizes) pdf.add_page() pdf.set_margins(MARGIN_L, MARGIN_T, MARGIN_R) pdf.set_auto_page_break(auto=True, margin=MARGIN_B) pdf.set_y(MARGIN_T) pdf._add_name(parsed['name']) pdf._add_contact(parsed['contact']) for header, content in parsed['sections']: pdf._add_section_header(header) pdf._add_section_content(content) return pdf.output(), pdf.pages_count def generate_pdf(cv_text: str) -> bytes: """Generate 1-page ATS PDF. Auto-shrinks fonts if needed.""" parsed = _parse_cv_text(cv_text) base_sizes = {'name': 16, 'contact': 8, 'header': 10, 'body': 9.5, 'bullet': 9.5} for shrink_step in range(12): factor = 1.0 - (shrink_step * 0.05) sizes = {k: max(v * factor, 5.5) for k, v in base_sizes.items()} pdf_bytes, page_count = _build_pdf(parsed, sizes) if page_count <= 1: logger.info(f"PDF: 1 page (factor={factor:.2f}, body={sizes['body']:.1f}pt)") return pdf_bytes logger.warning("PDF: couldn't fit 1 page at min font") return pdf_bytes def save_pdf_to_file(cv_text: str, output_path: str = None) -> str: if output_path is None: output_path = tempfile.mktemp(suffix='.pdf', prefix='ats_cv_') with open(output_path, 'wb') as f: f.write(generate_pdf(cv_text)) return output_path # Backward compat def save_docx_to_file(cv_text: str, output_path: str = None) -> str: return save_pdf_to_file(cv_text, output_path)