Logo Search packages:      
Sourcecode: zope-attachmentfield version File versions  Download package


# -*- coding: utf-8 -*-
## AttchmentField
## Copyright (C)2006 Ingeniweb

## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program; see the file COPYING. If not, write to the
## Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
__version__ = "$Revision: 25863 $"
# $Source: /cvsroot/ingeniweb/PloneSubscription/SubscriptionTool.py,v $
# $Id: AttachmentHandler.py 25863 2006-07-07 14:47:14Z manuco $
__docformat__ = 'restructuredtext'

from Acquisition import Implicit
from Globals import Persistent
from Globals import MessageDialog, DTMLFile      # fakes a method from a DTML file
from AccessControl import ClassSecurityInfo
from Products.Archetypes import Field
import App.Common

import striphtml

import tempfile
import string
import cgi
import re
import sys
import os
import os.path

from global_symbols import *

INVALID_VALUE = "******** INVALID VALUE *********"

# HTML Strippping variables
header_re = re.compile("(.*?<body[^>]*>)(.*?)(</body>.*)", re.S | re.I)

__HANDLERS__ = []

def registerHandler(handler_class):
    registerHandler(handler_class) => register a handler class
    for a specific mime type or such.

    handler_class is a AbstractHandler-derived class
    # Ensure attachmentClass conformance with the model
    if handler_class.__CHECK_INTERFACE__:
        for property in AbstractHandler.__must_derive__:
            if getattr(handler_class, property, INVALID_VALUE) == getattr(AbstractHandler, property, INVALID_VALUE):
                # Works for properties
                    raise NotImplementedError, "Attribute '%s' of class '%s' must be derived. This plugin won't be enabled." % (property, handler_class.__name__)
                    return None
            # There's nothing to enforce method-overriding, but, one day I'll put some code snippet here to do so ;-)

    # Instanciate the class
    handler = handler_class()        # We instanciate it

    if handler.is_external_conv:
        # Compute the converter and previewer paths
        index_path = handler.getIndexPath(None, None)
        preview_path = handler.getPreviewPath(None, None)
        if index_path:
            handler.index_program = getConverterProgram(
        if preview_path:
            handler.preview_program = getConverterProgram(
        handler.index_program = None
        handler.preview_program = None

    # Store the class
    class_name = handler_class.__name__
    __HANDLERS__.append((class_name, handler, ))
    Log(LOG_DEBUG, "Registered '%s' class." % (class_name, ))

def getAttachmentHandler(contentType, field, instance):
    getAttachmentHandler(contentType, field, instance) => tuple(class, globs) where class is a AbstractHandler-derived class
    According to a given content type, return a class matching this kind of file
    # Find a content type matching.
##    Log(LOG_DEBUG, "trying to find a handler for", contentType)
    handler = None
    for hnd in __HANDLERS__:
        if contentType in hnd[1].getContentTypes(field, instance, ):
            Log(LOG_DEBUG, "We use", hnd[1])
            return hnd[1]
        if hnd[0] == 'DummyAttachment':
            dummy = hnd[1]

    # No match. Have to return the dummy handler
    Log(LOG_DEBUG, "No match, we fall back to the dummy handler", dummy)
    return dummy

00121 class AbstractHandler(Implicit, ):
    CODING INFORMATION : If you create additional methods or properties in ZAbstractAttachment that must
    be derived for the class to work, put them in the __must_derive__ tuple so that they will be checked
    at class registration. This will ensure better coding quality.

    Please note that ZAbstractAttachment-dervied objects are instanciated every time a file is uploaded.
    So you're guaranteed that the indexed file won't change during the class's lifetime.

    You can store additional files inside a ZAA with the addRelatedFile() method.
    If you want to get it back, use getRelatedFile() with the file's identifier.

    In many methods, a 'field' and 'instance' paramters are passed.
    If you're working outside an Archetypes you can pass None as 'field' and 'self' as 'instance' in
    most of the methods.
    # List of methods and properties that must be derived in subclasses
    __must_derive__ = (
        "converter_type",                       # String corresponding to config.py types
        "icon_file",                            # (string) of the icon name IN A PLONE SKIN.
        "small_icon_file",                      # (string) of the SMALL icon name IN A PLONE SKIN.
        "content_types",                        # List (of strings) of content_types supported by the class

        "is_external_conv",                     # is the converter using an external programm ?
        "is_working",                           # is the converter producing anything meaningfull ?
        "index_path",                           # Index program relative path
        "index_arguments",                      # Converter program args. File name will be '%s'.
        "index_encoding",                       # Encoding managed by the encoder (output only - input is nonsence as it's most likely binary data!) or None

        "preview_arguments",                    # Preview arguments or None to disable preview
        "preview_format",                       # Previewer output format : 'text' (default) or 'html' or 'pre' (text with fixed width)
        "preview_encoding",                     # Encoding managed by the encoder (output only - input is nonsence as it's most likely binary data!) or None

    # attachment properties (MUST BE DERIVEd
    converter_type = INVALID_VALUE
    icon_file = INVALID_VALUE                   # Icon file (as an instanciated Image object)
    small_icon_file = INVALID_VALUE
    content_types = INVALID_VALUE               # Supported content-types (tuple of strings)

    is_external_conv = INVALID_VALUE
    index_path = INVALID_VALUE
    index_arguments = INVALID_VALUE
    index_encoding = INVALID_VALUE

    preview_path = INVALID_VALUE
    preview_arguments = INVALID_VALUE
    preview_encoding = INVALID_VALUE
    preview_format = INVALID_VALUE

    program_found = False
    error = False ## if true, gui will display it.

    __CHECK_INTERFACE__ = 1             # Special attribute to enforce IF checking

    #                                                                   #
    #              Overridable interfaces for those methods             #
    #                                                                   #

00183     def initHandler(self, field, instance):
        """Initialize handler data if needed."""

    def getConverterType(self, field, instance):
        return self.converter_type

    def getIconFile(self, field, instance):
        return self.icon_file

    def getSmallIconFile(self, field, instance):
        return self.small_icon_file

    def getContentTypes(self, field, instance):
        return self.content_types

    def getIndexPath(self, field, instance):
        return self.index_path

    def getIndexArguments(self, field, instance):
        return self.index_arguments

    def getIndexEncoding(self, field, instance):
        return self.index_encoding

    def getPreviewPath(self, field, instance):
        return self.preview_path

    def getPreviewArguments(self, field, instance):
        return self.preview_arguments

    def getPreviewEncoding(self, field, instance):
        return self.preview_encoding

    def getPreviewFormat(self, field, instance):
        return self.preview_format

00220     def getIndexableValue(self, field, instance):
        getIndexableValue(self, field, instance) => (possibliy big) string
        Return the ZCatalog-indexable string for that type.
        Log(LOG_DEBUG, "converting field", field.getName(), self.index_arguments, self.__class__.__name__)
        index = self._convert(

        # Convert indexer output to plain "optimized" text
        index = striphtml.strip(index)
        words = []
        for w in string.split(index):
            stripped = string.lower(string.strip(w))
            if not stripped in words:
        return string.join(words, " ")

00244     def convertStringToIndex(self, content, content_type, instance):
        convertStringToIndex(self, content, content_type, instance) => Utility to convert a string to HTML
        using the converter stuff.
        return self._convertString(

00257     def convertStringToPreview(self, content, content_type, instance):
        convertStringToPreview(self, content, content_type, instance) => Utility to convert a string to HTML
        using the converter stuff.
        preview = self._convertString(
        return self._convertOutput(preview, self.preview_format)

00271     def _convert(self, field, instance, program, arguments, encoding):
        _convert(self, field, instance, program, arguments, encoding) => call a converter with arguments. Return an unicode string.

        Won't convert if 'None' is provided as the converter or argument parameter -> This is the way for
        a plugin to inhibit conversion.
        # Perform conversion
        uustring = self._convertString(

        # Return an encoded string
        return Field.encode(uustring, instance)

00291     def unicode2string(self, str, instance):
        """Careful unicode converter"""
        return Field.encode(str, instance)

00295     def string2unicode(self, str, instance):
        """Careful unicode converter"""
        return Field.decode(str, instance)

00299     def _convertString(self, content, filename, program, arguments, encoding, ):
        Utility method to convert stuff.
        This method will ALWAYS return an unicode string
        Encoding can be a string, a list, or None.
        # Basic checks.
        index = ""
        if arguments is None:
            return ""
        if not program:
            return ""

        f, fn = tempfile.mkstemp()
            # Write attachment in a temporary file
            f = open(fn, "w+b")

            # Call converter in the right directory
            Log(LOG_DEBUG, "Calling converter for", filename, "in directory", tempfile.tempdir)
            index = self.callConverter(
                arguments % (fn, ),

            # Delete attachment file
            if os.path.isfile(fn):
                Log(LOG_DEBUG, "Removed attachment file '%s'" % (fn,))

        # Deal with various encodings
        if encoding:
            # Convert from encoded string to unicode
            if type(encoding) in (type(''), type(u''), ):
                Log(LOG_DEBUG, "Encoding", encoding)
                index = index.decode(encoding, "replace")

            elif type(encoding) in (type(None),):

            elif type(encoding) in (type([]), type(()), ):
                for enc in encoding:
                        Log(LOG_DEBUG, "Trying encoding", enc)
                        index = index.decode(enc, "replace")

                    except UnicodeError:
                        Log(LOG_DEBUG, "Encoding", enc, "failed.")

        # Return the string
        return index

    #                                                                   #
    #                         HTML PREVIEW SUPPORT                      #
    #                                                                   #

    _strip_style = re.compile(r"""style\s*=\s*["'][^"']*["']""", re.I | re.S)
    _has_body_start = re.compile(r"""<\s*body""", re.I)
    _strip_body_start = re.compile(r""".*?<body[^>]*>""", re.I | re.S)
    _has_body_end = re.compile(r"""</\s*body""", re.I)
    _strip_body_end = re.compile(r"""</\s*body\s*>.*""", re.I | re.S)
    _strip_tags = re.compile(r"""<[^>]+>""", re.I | re.S)

    def _html_to_text(self, html):
        """crudely convert html to text"""
        Log(LOG_DEBUG, "Stripping html tags")
        text = self._strip_tags.sub('', html, )
        Log(LOG_DEBUG, "done.")
        return text

    def _cleanHTML(self, text):
        _cleanHTML(self, text) => text
        Uses regexps to clean HTML code from various buggy attr / tags
        Return the BODY content, without 'body' tags
        # Regular file size
        Log(LOG_DEBUG, "Stripping style...")
        text = self._strip_style.sub('', text, )

        # _strip_body_start regexp may go in infinite loop
        # if there is no body attribute (in certain conditions)
        if self._has_body_start.search(text) is not None:
            Log(LOG_DEBUG, "Stripping start tag...")
            text = self._strip_body_start.sub('', text, 1)
        if self._has_body_end.search(text) is not None:
            Log(LOG_DEBUG, "Stripping end tag...")
            text = self._strip_body_end.sub('', text, 1)
        Log(LOG_DEBUG, "done")
        return text.strip()

    def _convertOutput(self, preview, preview_format):
        _convertOutput(self, preview, preview_format) => Convert preview to the right format,

        depending on the previewer output
        if preview_format == 'text':
            # If the converter outputs plain text, we convert it into HTML
            preview = self.textToHTML(preview)

        elif preview_format == "html":
            # We just have to try to strip buggy/unuseful HTML
            preview = self._cleanHTML(preview)

        elif preview_format == "pre":
            preview = "<pre>%s</pre>" % (preview,)

        return preview

    def getPreview(self, field, instance):
        getPreview(self, field, instance) => string or None

        Return the HTML preview (generating it if it's not already done) for this attachement.
        If the attachment is not previewable, or if there's a problem in the preview,
        return None.
        # Check if we can preview
        if self.preview_arguments is None:
            return None

        # Call the converter with the proper arguments
        preview = self._convert(
        Log(LOG_DEBUG, "Getting preview for file", field.getFilename(instance))

        # Return the previewable string
        return self._convertOutput(preview, self.preview_format)

    def getSmallPreview(self,):
        getSmallPreview(self,) => string or None

        Default behaviour : if the preview string is shorter than MAX_PREVIEW_SIZE, return it, else return None.
        You can override this, of course.
        ret = self.preview()
        if not ret:
            return None
        if len(ret) < MAX_PREVIEW_SIZE:
            return ret
        return None

    #                                                                   #
    #                           UTILITY METHODS                         #
    #                                                                   #
    #   Those methods can be called from your products to make your     #
    #   work easier when creating plugins.                              #
    #                                                                   #

    def callConverter(self, program_path, arguments = '', report_errors = 1):
        callConverter(self, program_path, arguments = '', report_errors = 1) => convert file using program_path with given arguments.
        Return the output stream of the converter program.

        if stdin is given, it is feed into the program. Else, it is ignored.
        if report_errors is true, 2> ~/tempfile is appended at the end of the command line
        # Open read & write streams
        cmd = "%s %s" % (program_path, arguments,)
        Log(LOG_DEBUG, "Converting file using '%s' program and '%s' arguments" % (
            arguments, ))
        idx = ""
        err = ""
        stdout_done = 0
        stderr_done = 0

        # Manage file for error reporting
        if report_errors:
            f_, errfile = tempfile.mkstemp()
            cmd = "%s 2> %s" % (cmd, errfile, )
            errfile = None

        # Actually execute command
        errors = ""
        curdir = os.getcwd()
        tmpdir = tempfile.mkdtemp()
            Log(LOG_DEBUG, "We work in", os.getcwd())
            r = os.popen(cmd, "r")
            idx = r.read()

            # Go back the the current dir

            # Parse error file
            if errfile:
                    f = open(errfile, "r")
                    Log(LOG_NOTICE, "Unable to open error file '%s'" % (errfile, ))
                    errors = f.read()

            # Remove the temporary directory
                for root, dirs, files in os.walk(tmpdir, topdown=False):
                    for name in files:
                        os.remove(os.path.join(root, name))
                    for name in dirs:
                        os.rmdir(os.path.join(root, name))

                Log(LOG_NOTICE, "Could not remove temporary stuff in", tmpdir)

        # Report errors
        if not idx and not errors:
            raise RuntimeError, "'%s' returned nothing. No error reported by plugin. Indexing cancelled." % (cmd, )
        elif not idx:
            raise RuntimeError, "'%s' returned nothing. Error reported by plugin: '%s'. Indexing cancelled." % (cmd, errors, )
        elif idx and errors:
            Log(LOG_WARNING, "'%s' returned error while indexing: '%s'. Indexing done anyway." % (cmd, errors, ))

        Log(LOG_DEBUG, "Conversion done. Implicitly closing streams.")

        return idx

    def textToHTML(self, text):
        textToHTML(self, text) => string

        Convert a plain-text string into pretty HTML, keeping you away from the need
        to use dirty '<pre>' tags and quoting the string.
        # HTML-Quote the string
        text = cgi.escape(text)

        # Convert double linefeeds into paragraphs
        # XXX TODO

        # Convert double spaces into paragraphs
        text = re.sub("  ", "&nbsp;", text, )

        # Convert simple linefeeds into line breaks
        text = re.sub("\n", "<br />\n", text, )

        # Return this pretty converted string
        text = string.strip(text)
        return text

    def getGUIIndexProgramCommand(self, field, instance):
        if not self.is_working:
            return "<em>unavailable</em>"
        if not self.is_external_conv:
            return "<em>internal</em>"
        elif self.program_found == False:
            return self.index_path + " " + self.index_arguments + "<br/><strong>not found</strong>"
            return self.index_path + " " + self.index_arguments

    def getGUIPreviewProgramCommand(self, field, instance):
        if not self.is_working:
            return "<em>unavailable</em>"
        if not self.is_external_conv:
            return "<em>internal</em>"
        elif self.program_found == False:
            return self.preview_path + " " + self.preview_arguments + "<br/><strong>not found</strong>"
            return self.preview_path + " " + self.preview_arguments

    def getError(self, field, instance):
        if self.error:
            return "error"
        return ""

#                                                                       #
#                       External programs interface                     #
#                                                                       #

PACKAGE_HOME = App.Common.package_home(globals())
if sys.platform == 'win32':
    # Windows platform
    def getConverterProgram(converter):
        conv_type = converter.converter_type
        conv_path = converter.index_path
        is_external_conv = converter.is_external_conv

        if conv_path is None:
            return None
        if conv_path.lower() == "type":
            return "type"

        program = '"' + os.path.join(PACKAGE_HOME, "converters", conv_type, "win32", conv_path, ) + '"'
        if os.path.isfile(program[1:-1]):
            Log(LOG_DEBUG, "Using '%s' program to convert %s attachments." % (program, conv_type))
            converter.program_found = True
                "Converter program '%s' not found for '%s' attachments! Indexing and preview won't work." % (
                program, conv_type,
            program = None
            converter.error = True

        return program

    def getConverterProgram(converter):
        conv_type = converter.converter_type
        conv_path = converter.index_path
        is_external_conv = converter.is_external_conv
        # Unix platform
        import commands
        if not is_external_conv:
            return None         # In case we don't need a program

        if "(internal)" == conv_path or conv_path is None:
            raise RuntimeError("converter path is invalid, but external conv has been set.")

        # Try to find the real full path of the program
        program = conv_path
        program = commands.getoutput("which %s" % (program))
        if not program:
                "Converter program '%s' not found for '%s' attachments! Indexing and preview won't work." % (
                    conv_path, conv_type,
            converter.error = True
            program = string.strip(program)
            Log(LOG_DEBUG, "Using '%s' program to convert %s attachments." % (program, conv_type))
            converter.program_found = True

        return program

Generated by  Doxygen 1.6.0   Back to index