differentiate extension for each export filter, otherwise the result can be unpredict...
[cloudooo.git] / cloudooo / handler / ooo / mimemapper.py
1 ##############################################################################
2 #
3 # Copyright (c) 2009-2010 Nexedi SA and Contributors. All Rights Reserved.
4 # Gabriel M. Monnerat <gabriel@tiolive.com>
5 #
6 # WARNING: This program as such is intended to be used by professional
7 # programmers who take the whole responsibility of assessing all potential
8 # consequences resulting from its eventual inadequacies and bugs
9 # End users who are looking for a ready-to-use solution with commercial
10 # guarantees and support are strongly adviced to contract a Free Software
11 # Service Company
12 #
13 # This program is Free Software; you can redistribute it and/or
14 # modify it under the terms of the GNU General Public License
15 # as published by the Free Software Foundation; either version 2
16 # of the License, or (at your option) any later version.
17 #
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 # GNU General Public License for more details.
22 #
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
26 #
27 ##############################################################################
28
29 import pkg_resources
30 from re import findall
31 from subprocess import Popen, PIPE
32 from subprocess import STDOUT
33 from zope.interface import implements
34 from filter import Filter
35 from os import environ, path
36 from cloudooo.interfaces.mimemapper import IMimemapper
37 from types import InstanceType
38 import json
39
40
41 class MimeMapper(object):
42 """Load all filters from OOo. You can get the filter you want or all
43 filters of the specific extension.
44 """
45 implements(IMimemapper)
46
47 def __init__(self):
48 """When it is instantiated, it creates a structure to store filters.
49 And lists to store the tags xml.
50 """
51 self._loaded = False
52 self._filter_by_extension_dict = {}
53 self._extension_list_by_type = {}
54 self._doc_type_list_by_extension = {}
55 self._mimetype_by_filter_type = {}
56 self._document_type_dict = {}
57
58 def _addFilter(self, filter):
59 """Add filter in mimemapper catalog."""
60 extension = filter.getExtension()
61 self._filter_by_extension_dict.setdefault(extension, []).append(filter)
62
63 def _typeToDocumentService(self, document_type):
64 """Returns the document service according to document type."""
65 for k, v in self._document_type_dict.iteritems():
66 if k.startswith(document_type):
67 return v
68
69 def _getElementNameByService(self, uno_service, ignore_name_list=[]):
70 """Returns an dict with elements."""
71 name_list = uno_service.getElementNames()
72 service_dict = {}
73 for name in iter(name_list):
74 element_dict = {}
75 element_list = uno_service.getByName(name)
76 for obj in iter(element_list):
77 if obj.Name in ignore_name_list:
78 continue
79 elif type(obj.Value) == InstanceType:
80 continue
81 element_dict[obj.Name] = obj.Value
82 service_dict[name] = element_dict
83
84 return service_dict
85
86 def isLoaded(self):
87 """Verify if filters were loaded"""
88 return self._loaded
89
90 def loadFilterList(self, hostname, port, **kw):
91 """Load all filters of openoffice.
92 Keyword arguments:
93 hostname -- host of OpenOffice
94 port -- port to connects by socket
95 **kw:
96 uno_path -- full path to uno library
97 office_binary_path -- full path to openoffice binary
98 ooo_disable_filter_name_list -- a list of filter names which are disabled
99 """
100 alternative_extension_dict = {
101 'Microsoft Excel 2007/2010 XML':'ms.xlsx',
102 'Microsoft Excel 5.0 Template':'5.xlt',
103 'Microsoft Excel 5.0':'5.xls',
104 'Microsoft Excel 95 Template':'95.xlt',
105 'Microsoft Excel 95':'95.xls',
106 'Microsoft PowerPoint 2007/2010 XML AutoPlay':'ms.ppsx',
107 'Microsoft PowerPoint 2007/2010 XML Template':'ms.potm',
108 'Microsoft PowerPoint 2007/2010 XML':'ms.pptx',
109 'Microsoft Word 2007/2010 XML':'ms.docx',
110 'Microsoft Word 6.0':'6.doc',
111 'Microsoft Word 95':'95.doc',
112 }
113 uno_path = kw.get("uno_path", environ.get('uno_path'))
114 office_binary_path = kw.get("office_binary_path",
115 environ.get('office_binary_path'))
116 python = path.join(office_binary_path, "python")
117 command = [path.exists(python) and python or "python",
118 pkg_resources.resource_filename(__name__,
119 path.join("helper", "unomimemapper.py")),
120 "--uno_path=%s" % uno_path,
121 "--office_binary_path=%s" % office_binary_path,
122 "--hostname=%s" % hostname,
123 "--port=%s" % port]
124
125 process = Popen(command, stdout=PIPE, stderr=STDOUT, close_fds=True)
126 stdout, stderr = process.communicate()
127 if process.returncode:
128 raise ValueError(stdout)
129 filter_dict, type_dict = json.loads(stdout)
130
131 ooo_disable_filter_name_list = kw.get("ooo_disable_filter_name_list") or ()
132 for filter_name, value in filter_dict.iteritems():
133 if filter_name in ooo_disable_filter_name_list:
134 continue
135 flag = value.get("Flags")
136 # http://api.openoffice.org/docs/DevelopersGuide/OfficeDev/OfficeDev.xhtml#1_2_4_2_10_Properties_of_a_Filter
137 # Import:0x01, Export:0x02, Template:0x04, Internal:0x08,
138 # OwnTemplate:0x10, Own:0x20, Alien:0x40,
139 # UsesOptions (deprecated):0x80, Default:0x100,
140 # NotInFileDialog:0x1000, NotInChooser:0x2000,
141 # ThirdParty:0x80000, Preferred:0x10000000
142 if flag & 0x08 or flag & 0x1000 or flag & 0x2000:
143 continue
144 ui_name = value.get('UIName')
145 # Hardcode blacklisted filters
146 # XXX It should be done in configuration file instead
147 if value.get('Name') in [
148 'Text', # Use 'Text Encoded' instead
149 'Text (Writer/Web)', # Use 'Text Encoded (Writer/Web)' instead
150 'XHTML Calc File',
151 'XHTML Impress File',
152 'XHTML Writer File',
153 'XHTML Draw File',
154 ]:
155 continue
156 filter_type = value.get('Type')
157 filter_type_dict = type_dict.get(filter_type)
158 if not ui_name:
159 ui_name = filter_type_dict.get("UIName")
160 filter_extension_list = filter_type_dict.get("Extensions")
161 mimetype = filter_type_dict.get("MediaType")
162 if not (filter_extension_list and mimetype):
163 continue
164 preferred = filter_type_dict.get("Preferred")
165 document_service_str = value.get('DocumentService')
166 # these document services are not supported for now.
167 if document_service_str in (
168 'com.sun.star.text.GlobalDocument',
169 'com.sun.star.formula.FormulaProperties',
170 'com.sun.star.sdb.OfficeDatabaseDocument'):
171 continue
172 sort_index = flag
173
174 doc_type = document_service_str.split('.')[-1]
175 split_type_list = findall(r'[A-Z][a-z]+', doc_type)
176 if len(split_type_list) > 2:
177 doc_type = ''.join(split_type_list[:2]).lower()
178 else:
179 doc_type = split_type_list[0].lower()
180
181 if doc_type not in self._document_type_dict:
182 self._document_type_dict[doc_type] = document_service_str
183
184 # for Export filters
185 if flag & 0x02:
186 if filter_type not in self._mimetype_by_filter_type:
187 self._mimetype_by_filter_type[filter_type] = mimetype
188 # for export filters, one extension is enough.
189 # In LibreOffice 3.6, ExportExtension is available.
190 export_extension = value.get('ExportExtension', filter_extension_list[0])
191 for ext in [export_extension,]:
192 ext = alternative_extension_dict.get(ui_name, ext)
193 # Add (extension, ui_name) tuple by document_type.
194 # e.g {'com.sun.star.text.TextDocument': [('txt', 'Text'),]}
195 local_extension_list = self._extension_list_by_type.setdefault(document_service_str, [])
196 if (ext, ui_name) not in local_extension_list:
197 local_extension_list.append((ext, ui_name))
198 # register an export filter
199 filter = Filter(ext, filter_name, mimetype, document_service_str,
200 preferred=preferred, sort_index=sort_index,
201 label=ui_name)
202 self._addFilter(filter)
203
204 # for Import filters
205 if flag & 0x01:
206 # for import filters, we care all possible extensions.
207 for ext in filter_extension_list:
208 # Add a document type by extension.
209 # e.g {'doc': ['com.sun.star.text.TextDocument']}
210 service_list = self._doc_type_list_by_extension.setdefault(ext, [])
211 if document_service_str not in service_list:
212 service_list.append(document_service_str)
213
214 # hardcode 'extension -> document type' mappings according to
215 # soffice behaviour for extensions having several candidates.
216 self._doc_type_list_by_extension.update({
217 'rtf': ['com.sun.star.text.TextDocument'],
218 'sxd': ['com.sun.star.drawing.DrawingDocument'],
219 'txt': ['com.sun.star.text.TextDocument'],
220 'odg': ['com.sun.star.drawing.DrawingDocument'],
221 'html': ['com.sun.star.text.WebDocument'],
222 'sda': ['com.sun.star.drawing.DrawingDocument'],
223 'sdd': ['com.sun.star.drawing.DrawingDocument'],
224 'pdf': ['com.sun.star.drawing.DrawingDocument'],
225 'xls': ['com.sun.star.sheet.SpreadsheetDocument'],
226 })
227 self.document_service_list = self._extension_list_by_type.keys()
228 self._loaded = True
229
230 def getFilterName(self, extension, document_service):
231 """Get filter name according to the parameters passed.
232 Keyword arguments:
233 extension -- expected a string of the file format.
234 document_type -- expected a string of the document type.
235 e.g
236 >>> mimemapper.getFilterName("sdw", "com.sun.star.text.TextDocument")
237 'StarWriter 3.0'
238 """
239 filter_list = [filter for filter in self.getFilterList(extension) \
240 if filter.getDocumentService() == document_service]
241 if len(filter_list) > 1:
242 for filter in iter(filter_list):
243 if filter.isPreferred():
244 return filter.getName()
245 else:
246 for filter in iter(filter_list):
247 if filter.getName().endswith("Export"):
248 return filter.getName()
249 filter_list.sort(key=lambda x: x.getSortIndex())
250 return filter_list[-1].getName()
251 else:
252 return filter_list[0].getName()
253
254 def getFilterList(self, extension):
255 """Search filter by extension, and return the filter as string.
256 Keyword arguments:
257 extension -- expected a string of the file format.
258 e.g
259 >>> mimemapper.getFilterList("doc")
260 [<filter.Filter object at 0x9a2602c>,
261 <filter.Filter object at 0x9a21d6c>,
262 <filter.Filter object at 0x9a261ec>]
263 """
264 return self._filter_by_extension_dict.get(extension, [])
265
266 def getAllowedExtensionList(self, extension=None, document_type=None):
267 """Returns a list with extensions which can be used to export according to
268 document type passed.
269 e.g
270 >>> mimemapper.getAllowedExtensionList({"extension":"doc"})
271 or
272 >>> mimemapper.getAllowedExtensionList({"document_type":"text"})
273 (('rtf', 'Rich Text Format'),
274 ('htm', 'HTML Document'),)
275 If both params are passed, document_type is discarded.
276 """
277 allowed_extension_list = []
278 document_type_list = []
279 if extension:
280 document_type_list.extend(self._doc_type_list_by_extension.get(extension, []))
281 elif document_type:
282 document_type = self._typeToDocumentService(document_type)
283 allowed_extension_list.extend(self._extension_list_by_type.get(document_type, []))
284 # gets list of extensions of each document type if document_type_list isn't
285 # empty.
286 for type in iter(document_type_list):
287 # gets list of extensions with key document type
288 extension_list = self._extension_list_by_type.get(type)
289 for ext in iter(extension_list):
290 if not ext in allowed_extension_list:
291 allowed_extension_list.append(ext)
292 return tuple(allowed_extension_list)
293
294 mimemapper = MimeMapper()