Logo Search packages:      
Sourcecode: zope-attachmentfield version File versions  Download package

AttachmentHandler.py

# -*- coding: utf-8 -*-
## AttchmentField
## Copyright (C)2006 Ingeniweb

## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program; see the file COPYING. If not, write to the
## Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
"""
AttchmentField
"""
__version__ = "$Revision: 25863 $"
# $Source: /cvsroot/ingeniweb/PloneSubscription/SubscriptionTool.py,v $
# $Id: AttachmentHandler.py 25863 2006-07-07 14:47:14Z manuco $
__docformat__ = 'restructuredtext'


from Acquisition import Implicit
from Globals import Persistent
from Globals import MessageDialog, DTMLFile      # fakes a method from a DTML file
from AccessControl import ClassSecurityInfo
from Products.Archetypes import Field
import App.Common

import striphtml

import tempfile
import string
import cgi
import re
import sys
import os
import os.path

from global_symbols import *

INVALID_VALUE = "******** INVALID VALUE *********"

# HTML Strippping variables
header_re = re.compile("(.*?<body[^>]*>)(.*?)(</body>.*)", re.S | re.I)


__HANDLERS__ = []


def registerHandler(handler_class):
    """
    registerHandler(handler_class) => register a handler class
    for a specific mime type or such.

    handler_class is a AbstractHandler-derived class
    """
    # Ensure attachmentClass conformance with the model
    if handler_class.__CHECK_INTERFACE__:
        for property in AbstractHandler.__must_derive__:
            if getattr(handler_class, property, INVALID_VALUE) == getattr(AbstractHandler, property, INVALID_VALUE):
                # Works for properties
                try:
                    raise NotImplementedError, "Attribute '%s' of class '%s' must be derived. This plugin won't be enabled." % (property, handler_class.__name__)
                except:
                    LogException()
                    return None
            # There's nothing to enforce method-overriding, but, one day I'll put some code snippet here to do so ;-)

    # Instanciate the class
    handler = handler_class()        # We instanciate it

    if handler.is_external_conv:
        # Compute the converter and previewer paths
        index_path = handler.getIndexPath(None, None)
        preview_path = handler.getPreviewPath(None, None)
        if index_path:
            handler.index_program = getConverterProgram(
                handler
            )
        if preview_path:
            handler.preview_program = getConverterProgram(
                handler
            )
    else:
        handler.index_program = None
        handler.preview_program = None


    # Store the class
    class_name = handler_class.__name__
    __HANDLERS__.append((class_name, handler, ))
    Log(LOG_DEBUG, "Registered '%s' class." % (class_name, ))


def getAttachmentHandler(contentType, field, instance):
    """
    getAttachmentHandler(contentType, field, instance) => tuple(class, globs) where class is a AbstractHandler-derived class
    According to a given content type, return a class matching this kind of file
    """
    # Find a content type matching.
##    Log(LOG_DEBUG, "trying to find a handler for", contentType)
    handler = None
    for hnd in __HANDLERS__:
        if contentType in hnd[1].getContentTypes(field, instance, ):
            Log(LOG_DEBUG, "We use", hnd[1])
            return hnd[1]
        if hnd[0] == 'DummyAttachment':
            dummy = hnd[1]

    # No match. Have to return the dummy handler
    Log(LOG_DEBUG, "No match, we fall back to the dummy handler", dummy)
    return dummy



00121 class AbstractHandler(Implicit, ):
    """
    CODING INFORMATION : If you create additional methods or properties in ZAbstractAttachment that must
    be derived for the class to work, put them in the __must_derive__ tuple so that they will be checked
    at class registration. This will ensure better coding quality.

    Please note that ZAbstractAttachment-dervied objects are instanciated every time a file is uploaded.
    So you're guaranteed that the indexed file won't change during the class's lifetime.

    You can store additional files inside a ZAA with the addRelatedFile() method.
    If you want to get it back, use getRelatedFile() with the file's identifier.

    In many methods, a 'field' and 'instance' paramters are passed.
    If you're working outside an Archetypes you can pass None as 'field' and 'self' as 'instance' in
    most of the methods.
    """
    # List of methods and properties that must be derived in subclasses
    __must_derive__ = (
        "converter_type",                       # String corresponding to config.py types
        "icon_file",                            # (string) of the icon name IN A PLONE SKIN.
        "small_icon_file",                      # (string) of the SMALL icon name IN A PLONE SKIN.
        "content_types",                        # List (of strings) of content_types supported by the class

        "is_external_conv",                     # is the converter using an external programm ?
        "is_working",                           # is the converter producing anything meaningfull ?
        "index_path",                           # Index program relative path
        "index_arguments",                      # Converter program args. File name will be '%s'.
        "index_encoding",                       # Encoding managed by the encoder (output only - input is nonsence as it's most likely binary data!) or None

        "preview_path",
        "preview_arguments",                    # Preview arguments or None to disable preview
        "preview_format",                       # Previewer output format : 'text' (default) or 'html' or 'pre' (text with fixed width)
        "preview_encoding",                     # Encoding managed by the encoder (output only - input is nonsence as it's most likely binary data!) or None
        )


    # attachment properties (MUST BE DERIVEd
    converter_type = INVALID_VALUE
    icon_file = INVALID_VALUE                   # Icon file (as an instanciated Image object)
    small_icon_file = INVALID_VALUE
    content_types = INVALID_VALUE               # Supported content-types (tuple of strings)

    is_external_conv = INVALID_VALUE
    index_path = INVALID_VALUE
    index_arguments = INVALID_VALUE
    index_encoding = INVALID_VALUE

    preview_path = INVALID_VALUE
    preview_arguments = INVALID_VALUE
    preview_encoding = INVALID_VALUE
    preview_format = INVALID_VALUE

    program_found = False
    error = False ## if true, gui will display it.


    __CHECK_INTERFACE__ = 1             # Special attribute to enforce IF checking

    #                                                                   #
    #              Overridable interfaces for those methods             #
    #                                                                   #

00183     def initHandler(self, field, instance):
        """Initialize handler data if needed."""
        return

    def getConverterType(self, field, instance):
        return self.converter_type

    def getIconFile(self, field, instance):
        return self.icon_file

    def getSmallIconFile(self, field, instance):
        return self.small_icon_file

    def getContentTypes(self, field, instance):
        return self.content_types

    def getIndexPath(self, field, instance):
        return self.index_path

    def getIndexArguments(self, field, instance):
        return self.index_arguments

    def getIndexEncoding(self, field, instance):
        return self.index_encoding

    def getPreviewPath(self, field, instance):
        return self.preview_path

    def getPreviewArguments(self, field, instance):
        return self.preview_arguments

    def getPreviewEncoding(self, field, instance):
        return self.preview_encoding

    def getPreviewFormat(self, field, instance):
        return self.preview_format

00220     def getIndexableValue(self, field, instance):
        """
        getIndexableValue(self, field, instance) => (possibliy big) string
        Return the ZCatalog-indexable string for that type.
        """
        Log(LOG_DEBUG, "converting field", field.getName(), self.index_arguments, self.__class__.__name__)
        index = self._convert(
            field,
            instance,
            self.index_program,
            self.index_arguments,
            self.index_encoding,
            )

        # Convert indexer output to plain "optimized" text
        index = striphtml.strip(index)
        words = []
        for w in string.split(index):
            stripped = string.lower(string.strip(w))
            if not stripped in words:
                words.append(stripped)
        words.sort
        return string.join(words, " ")

00244     def convertStringToIndex(self, content, content_type, instance):
        """
        convertStringToIndex(self, content, content_type, instance) => Utility to convert a string to HTML
        using the converter stuff.
        """
        return self._convertString(
            content,
            "<string>",
            self.index_program,
            self.index_arguments,
            self.index_encoding,
            )

00257     def convertStringToPreview(self, content, content_type, instance):
        """
        convertStringToPreview(self, content, content_type, instance) => Utility to convert a string to HTML
        using the converter stuff.
        """
        preview = self._convertString(
            content,
            "<string>",
            self.preview_program,
            self.preview_arguments,
            self.preview_encoding,
            )
        return self._convertOutput(preview, self.preview_format)

00271     def _convert(self, field, instance, program, arguments, encoding):
        """
        _convert(self, field, instance, program, arguments, encoding) => call a converter with arguments. Return an unicode string.

        Won't convert if 'None' is provided as the converter or argument parameter -> This is the way for
        a plugin to inhibit conversion.
        """
        # Perform conversion
        uustring = self._convertString(
            str(field.get(instance)),
            field.getFilename(instance),
            program,
            arguments,
            encoding,
            )

        # Return an encoded string
        return Field.encode(uustring, instance)


00291     def unicode2string(self, str, instance):
        """Careful unicode converter"""
        return Field.encode(str, instance)

00295     def string2unicode(self, str, instance):
        """Careful unicode converter"""
        return Field.decode(str, instance)

00299     def _convertString(self, content, filename, program, arguments, encoding, ):
        """
        Utility method to convert stuff.
        This method will ALWAYS return an unicode string
        Encoding can be a string, a list, or None.
        """
        # Basic checks.
        index = ""
        if arguments is None:
            return ""
        if not program:
            return ""

        f, fn = tempfile.mkstemp()
        try:
            # Write attachment in a temporary file
            os.close(f)
            f = open(fn, "w+b")
            f.write(content)
            f.close()

            # Call converter in the right directory
            Log(LOG_DEBUG, "Calling converter for", filename, "in directory", tempfile.tempdir)
            index = self.callConverter(
                program,
                arguments % (fn, ),
                )

        finally:
            # Delete attachment file
            if os.path.isfile(fn):
                os.unlink(fn)
                Log(LOG_DEBUG, "Removed attachment file '%s'" % (fn,))

        # Deal with various encodings
        if encoding:
            # Convert from encoded string to unicode
            if type(encoding) in (type(''), type(u''), ):
                Log(LOG_DEBUG, "Encoding", encoding)
                index = index.decode(encoding, "replace")

            elif type(encoding) in (type(None),):
                pass

            elif type(encoding) in (type([]), type(()), ):
                for enc in encoding:
                    try:
                        Log(LOG_DEBUG, "Trying encoding", enc)
                        index = index.decode(enc, "replace")
                        break

                    except UnicodeError:
                        Log(LOG_DEBUG, "Encoding", enc, "failed.")
                        pass

        # Return the string
        return index


    #                                                                   #
    #                         HTML PREVIEW SUPPORT                      #
    #                                                                   #

    _strip_style = re.compile(r"""style\s*=\s*["'][^"']*["']""", re.I | re.S)
    _has_body_start = re.compile(r"""<\s*body""", re.I)
    _strip_body_start = re.compile(r""".*?<body[^>]*>""", re.I | re.S)
    _has_body_end = re.compile(r"""</\s*body""", re.I)
    _strip_body_end = re.compile(r"""</\s*body\s*>.*""", re.I | re.S)
    _strip_tags = re.compile(r"""<[^>]+>""", re.I | re.S)


    def _html_to_text(self, html):
        """crudely convert html to text"""
        Log(LOG_DEBUG, "Stripping html tags")
        text = self._strip_tags.sub('', html, )
        Log(LOG_DEBUG, "done.")
        return text

    def _cleanHTML(self, text):
        """
        _cleanHTML(self, text) => text
        Uses regexps to clean HTML code from various buggy attr / tags
        Return the BODY content, without 'body' tags
        """
        # Regular file size
        Log(LOG_DEBUG, "Stripping style...")
        text = self._strip_style.sub('', text, )

        # _strip_body_start regexp may go in infinite loop
        # if there is no body attribute (in certain conditions)
        if self._has_body_start.search(text) is not None:
            Log(LOG_DEBUG, "Stripping start tag...")
            text = self._strip_body_start.sub('', text, 1)
        if self._has_body_end.search(text) is not None:
            Log(LOG_DEBUG, "Stripping end tag...")
            text = self._strip_body_end.sub('', text, 1)
        Log(LOG_DEBUG, "done")
        return text.strip()

    def _convertOutput(self, preview, preview_format):
        """
        _convertOutput(self, preview, preview_format) => Convert preview to the right format,

        depending on the previewer output
        """
        if preview_format == 'text':
            # If the converter outputs plain text, we convert it into HTML
            preview = self.textToHTML(preview)

        elif preview_format == "html":
            # We just have to try to strip buggy/unuseful HTML
            preview = self._cleanHTML(preview)

        elif preview_format == "pre":
            preview = "<pre>%s</pre>" % (preview,)

        return preview

    def getPreview(self, field, instance):
        """
        getPreview(self, field, instance) => string or None

        Return the HTML preview (generating it if it's not already done) for this attachement.
        If the attachment is not previewable, or if there's a problem in the preview,
        return None.
        """
        # Check if we can preview
        if self.preview_arguments is None:
            return None

        # Call the converter with the proper arguments
        preview = self._convert(
            field,
            instance,
            self.preview_program,
            self.preview_arguments,
            self.preview_encoding,
            )
        Log(LOG_DEBUG, "Getting preview for file", field.getFilename(instance))

        # Return the previewable string
        return self._convertOutput(preview, self.preview_format)

    def getSmallPreview(self,):
        """
        getSmallPreview(self,) => string or None

        Default behaviour : if the preview string is shorter than MAX_PREVIEW_SIZE, return it, else return None.
        You can override this, of course.
        """
        ret = self.preview()
        if not ret:
            return None
        if len(ret) < MAX_PREVIEW_SIZE:
            return ret
        return None


    #                                                                   #
    #                           UTILITY METHODS                         #
    #                                                                   #
    #   Those methods can be called from your products to make your     #
    #   work easier when creating plugins.                              #
    #                                                                   #


    def callConverter(self, program_path, arguments = '', report_errors = 1):
        """
        callConverter(self, program_path, arguments = '', report_errors = 1) => convert file using program_path with given arguments.
        Return the output stream of the converter program.

        if stdin is given, it is feed into the program. Else, it is ignored.
        if report_errors is true, 2> ~/tempfile is appended at the end of the command line
        """
        # Open read & write streams
        cmd = "%s %s" % (program_path, arguments,)
        Log(LOG_DEBUG, "Converting file using '%s' program and '%s' arguments" % (
            program_path,
            arguments, ))
        idx = ""
        err = ""
        stdout_done = 0
        stderr_done = 0

        # Manage file for error reporting
        if report_errors:
            f_, errfile = tempfile.mkstemp()
            os.close(f_)
            cmd = "%s 2> %s" % (cmd, errfile, )
        else:
            errfile = None

        # Actually execute command
        errors = ""
        curdir = os.getcwd()
        tmpdir = tempfile.mkdtemp()
        try:
            os.chdir(tmpdir)
            Log(LOG_DEBUG, "We work in", os.getcwd())
            r = os.popen(cmd, "r")
            idx = r.read()

        finally:
            # Go back the the current dir
            os.chdir(curdir)

            # Parse error file
            if errfile:
                try:
                    f = open(errfile, "r")
                except:
                    Log(LOG_NOTICE, "Unable to open error file '%s'" % (errfile, ))
                else:
                    errors = f.read()
                    f.close()

            # Remove the temporary directory
            try:
                for root, dirs, files in os.walk(tmpdir, topdown=False):
                    for name in files:
                        os.remove(os.path.join(root, name))
                    for name in dirs:
                        os.rmdir(os.path.join(root, name))
                os.rmdir(tmpdir)

            except:
                LogException()
                Log(LOG_NOTICE, "Could not remove temporary stuff in", tmpdir)

        # Report errors
        if not idx and not errors:
            raise RuntimeError, "'%s' returned nothing. No error reported by plugin. Indexing cancelled." % (cmd, )
        elif not idx:
            raise RuntimeError, "'%s' returned nothing. Error reported by plugin: '%s'. Indexing cancelled." % (cmd, errors, )
        elif idx and errors:
            Log(LOG_WARNING, "'%s' returned error while indexing: '%s'. Indexing done anyway." % (cmd, errors, ))

        Log(LOG_DEBUG, "Conversion done. Implicitly closing streams.")

        return idx


    def textToHTML(self, text):
        """
        textToHTML(self, text) => string

        Convert a plain-text string into pretty HTML, keeping you away from the need
        to use dirty '<pre>' tags and quoting the string.
        """
        # HTML-Quote the string
        text = cgi.escape(text)

        # Convert double linefeeds into paragraphs
        # XXX TODO

        # Convert double spaces into paragraphs
        text = re.sub("  ", "&nbsp;", text, )

        # Convert simple linefeeds into line breaks
        text = re.sub("\n", "<br />\n", text, )

        # Return this pretty converted string
        text = string.strip(text)
        return text


    def getGUIIndexProgramCommand(self, field, instance):
        if not self.is_working:
            return "<em>unavailable</em>"
        if not self.is_external_conv:
            return "<em>internal</em>"
        elif self.program_found == False:
            return self.index_path + " " + self.index_arguments + "<br/><strong>not found</strong>"
        else:
            return self.index_path + " " + self.index_arguments

    def getGUIPreviewProgramCommand(self, field, instance):
        if not self.is_working:
            return "<em>unavailable</em>"
        if not self.is_external_conv:
            return "<em>internal</em>"
        elif self.program_found == False:
            return self.preview_path + " " + self.preview_arguments + "<br/><strong>not found</strong>"
        else:
            return self.preview_path + " " + self.preview_arguments

    def getError(self, field, instance):
        if self.error:
            return "error"
        return ""

#                                                                       #
#                       External programs interface                     #
#                                                                       #

PACKAGE_HOME = App.Common.package_home(globals())
if sys.platform == 'win32':
    # Windows platform
    def getConverterProgram(converter):
        conv_type = converter.converter_type
        conv_path = converter.index_path
        is_external_conv = converter.is_external_conv

        if conv_path is None:
            return None
        if conv_path.lower() == "type":
            return "type"

        program = '"' + os.path.join(PACKAGE_HOME, "converters", conv_type, "win32", conv_path, ) + '"'
        if os.path.isfile(program[1:-1]):
            Log(LOG_DEBUG, "Using '%s' program to convert %s attachments." % (program, conv_type))
            converter.program_found = True
        else:
            Log(
                LOG_WARNING,
                "Converter program '%s' not found for '%s' attachments! Indexing and preview won't work." % (
                program, conv_type,
                ),
                )
            program = None
            converter.error = True

        return program

else:
    def getConverterProgram(converter):
        conv_type = converter.converter_type
        conv_path = converter.index_path
        is_external_conv = converter.is_external_conv
        # Unix platform
        import commands
        if not is_external_conv:
            return None         # In case we don't need a program

        if "(internal)" == conv_path or conv_path is None:
            raise RuntimeError("converter path is invalid, but external conv has been set.")


        # Try to find the real full path of the program
        program = conv_path
        program = commands.getoutput("which %s" % (program))
        if not program:
            Log(
                LOG_WARNING,
                "Converter program '%s' not found for '%s' attachments! Indexing and preview won't work." % (
                    conv_path, conv_type,
                    ),
                )
            converter.error = True
        else:
            program = string.strip(program)
            Log(LOG_DEBUG, "Using '%s' program to convert %s attachments." % (program, conv_type))
            converter.program_found = True

        return program

Generated by  Doxygen 1.6.0   Back to index