How to convert from docx to pdf with a python function (WINDOWS)?

Esteban Chornet picture Esteban Chornet · Feb 18, 2019 · Viewed 9.2k times · Source

I am working on an env with a python function to convert docx to pdf files. I'm using postman to send base64. Then I mount the docx file (everything works yet), but when it converts the docx file into pdf, an error shows up. I'm thinking that is because I do not have Office on my env? How can I fix it without having office? Thanks.

import sys
import os
import comtypes.client
import pythoncom
import uuid
import requests
from docx import Document
import base64
from os import listdir
from os.path import isfile, join
import azure.functions as func

def main(req: func.HttpRequest) -> func.HttpResponse:
  bytesDoc = req.get_json()['base']

  path = '/users/echornet/pruebas/'
  newFile = open(path + 'prueba.docx','wb')
  newFile.write(base64.b64decode(bytesDoc))

  newFile.close()
  wdFormatPDF = 17

  out_file = path + 'prueba.pdf'
  word = comtypes.client.CreateObject('Word.Application')

  doc = word.Documents.Open(newFile)
  doc.SaveAs(out_file, FileFormat=wdFormatPDF)
  doc.Close()

This is the error I'm getting. I get the docx created from base64, but no conversion.

System.Private.CoreLib: Exception while executing function: Functions.FunConverter. System.Private.CoreLib: Result: Failure Exception: AttributeError: module 'comtypes.gen.Word' has no attribute '_Application' Stack: File "C:\PruebaFunction\ConvEnv\lib\site-packages\azure\functions_worker\dispatcher.py", line 288, in _handle__invocation_request self.run_sync_func, invocation_id, fi.func, args) File "C:\Users\echornet\AppData\Local\Programs\Python\Python36\lib\concurrent\futures\thread.py", line 55, in run result = self.fn(*self.args, **self.kwargs) File "C:\PruebaFunction\ConvEnv\lib\site-packages\azure\functions_worker\dispatcher.py", line 347, in __run_sync_func return func(**params) File "C:\PruebaFunction\FunConverter__init.py", line 32, in main word = comtypes.client.CreateObject('Word.Application') File "C:\PruebaFunction\ConvEnv\lib\site-packages\comtypes\client__init__.py", line 250, in CreateObject return _manage(obj, clsid, interface=interface) File "C:\PruebaFunction\ConvEnv\lib\site-packages\comtypes\client__init__.py", line 188, in _manage obj = GetBestInterface(obj) File "C:\PruebaFunction\ConvEnv\lib\site-packages\comtypes\client__init__.py", line 112, in GetBestInterface interface = getattr(mod, itf_name)

Answer

PandaO picture PandaO · Feb 18, 2019

U can try lib win32com to finish that

# -*- encoding: utf-8 -*-
import  os
from win32com import client
#pip instatll win32com
def doc2pdf(doc_name, pdf_name):
    """
    :word to pdf
    :param doc_name word file name
    :param pdf_name to_pdf file name
    """
    try:
        word = client.DispatchEx("Word.Application")
        if os.path.exists(pdf_name):
            os.remove(pdf_name)
        worddoc = word.Documents.Open(doc_name,ReadOnly = 1)
        worddoc.SaveAs(pdf_name, FileFormat = 17)
        worddoc.Close()
        return pdf_name
    except:
        return 1
if __name__=='__main__':
    doc_name = "f:/test.doc"
    ftp_name = "f:/test.pdf"
    doc2pdf(doc_name, ftp_name)