#! /usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2009-2011 Sebastian Pipping <sebastian@pipping.org>
# Licensed under GPLv3 or later

import sys
import xml.dom.minidom
import logging
import os

PACKAGE = 'svgstrip'
VERSION = '0.4.0'

STDIN_FILENAME = '/dev/stdin'


class SvgStripper:
	def __init__(self):
		self.pretend = False
		self.inplace = False
		self.input_filename = None
		self.path_warning_presented = False

	def main(self, params):
		run, error = self.parse_command_line(params)
		if error:
			return 1
		elif run:
			return self.run()
		else:
			return 0

	@staticmethod
	def print_version():
		print "%s v%s" % (PACKAGE, VERSION)

	@staticmethod
	def print_usage():
		print >> sys.stderr, """\
USAGE
  svgstrip [-v|--verbose] [-p|--pretend] strip [[-i|--in-place] <filename>]
  svgstrip -h|--help
  svgstrip -V|--version

EXAMPLES
  svgstrip drawing.svg > stripped-drawing.svg
  svgstrip --in-place drawing.svg
"""

	def relative_path(self, abs_path):
		if self.input_filename == STDIN_FILENAME:
			# No docname available, keep basename only
			if not self.path_warning_presented:
				self.path_warning_presented = True
				logging.warn('No document base available when processing input from stdin. Relative paths may be broken.')
			return abs_path.split('/')[-1]
		else:
			return os.path.relpath(abs_path, os.path.dirname(self.input_filename))

	def parse_command_line(self, params):
		loglevel = logging.WARNING
		run, error = (True, False)
		for v in params:
			if v in ('--verbose', '-v'):
				loglevel = logging.INFO
			elif v in ('--version', '-V'):
				SvgStripper.print_version()
				run = False
				break
			elif v in ('--help', '-h'):
				SvgStripper.print_usage()
				run = False
				break
			elif v in ('--pretend', '-p'):
				self.pretend = True
			elif v in ('--in-place', '--inplace', '-i'):
				self.inplace = True
			else:
				if self.input_filename == None:
					self.input_filename = v
				else:
					# More than one filename given
					error = True
					break
		if self.input_filename == None:
			if self.inplace:
				# Stdin and in-place don't mix well
				error = True
			else:
				self.input_filename = STDIN_FILENAME
		if error:
			SvgStripper.print_usage()
		logging.basicConfig(format='%(levelname)s: %(message)s', level=loglevel)
		return (run, error)

	def process_tree(self, node, name_parts):
		full_name = '/' + '/'.join(name_parts)

		for v in ('sodipodi:docbase', 'sodipodi:docname', \
				'inkscape:export-filename'):
			if not node.hasAttribute(v):
				continue

			if not self.pretend:
				node.removeAttribute(v)
			logging.info("Attribute '%s' on node '%s' deleted" % (v, full_name))

		for v in ('sodipodi:absref', 'xlink:href'):
			if not node.hasAttribute(v):
				continue

			if len(name_parts) == 1:
				# Root node, informative only, remove
				if not self.pretend:
					node.removeAttribute(v)
				logging.info("Attribute '%s' on node '%s' deleted" % (v, full_name))
			else:
				# Non-root node, integrating other content, make relative
				before = node.getAttribute(v)

				if before.startswith('file:///'):
					after = self.relative_path(before[len('file://'):])
				elif before.startswith('/'):
					after = self.relative_path(before)
				else:
					continue

				if not self.pretend:
					node.setAttribute(v, after)
				logging.info("Attribute '%s' on node '%s' rewritten from '%s' to '%s'" % (v, full_name, before, after))

		name_to_count = dict()
		for child in node.childNodes:
			if child.nodeType != node.ELEMENT_NODE:
				continue

			count = name_to_count.get(child.tagName, 1)
			self.process_tree(child, name_parts + ('%s[%d]' % (child.tagName, count), ))
			name_to_count[child.tagName] = count + 1

	def run(self):
		try:
			dom = xml.dom.minidom.parse(self.input_filename)
		except IOError, e:
			logging.error("Could not read from file '%s': %s" \
					% (self.input_filename, e.strerror))
			return 1
		except xml.parsers.expat.ExpatError:
			logging.error("File '%s' does not contain valid XML" % self.input_filename)
			return 1
		try:
			root_node = dom.getElementsByTagName("svg")[0]
		except IndexError:
			logging.error("File '%s' does not contain valid SVG" % self.input_filename)
			return 1

		self.process_tree(root_node, ('svg', ))

		output_filename = self.inplace \
				and self.input_filename \
				or '/dev/stdout'

		# Determine target encoding
		for i in ('actualEncoding', 'encoding'):
			try:
				target_encoding = getattr(dom, i)
			except AttributeError:
				continue
			if target_encoding:
				break
		if not target_encoding:
			target_encoding = 'UTF-8'

		if not (self.inplace and self.pretend):
			try:
				output_file = open(output_filename, 'w')
				try:
					raw_bytes = dom.toxml(encoding=target_encoding)
					logging.info('Encoding used: %s' % target_encoding)
				except UnicodeEncodeError:
					logging.info('Encoding used: ASCII + numeric character references')
					unicode_string = dom.toxml(encoding='UTF-32').decode('UTF-32')
					raw_bytes = unicode_string.encode('ascii', 'xmlcharrefreplace')
				output_file.write(raw_bytes)
				print >> output_file  # Trailing newline
				output_file.close()
			except IOError, e:
				logging.error("Could not write to file '%s': %s" \
						% (output_filename, e.strerror))
				return 1
		return 0

if __name__ == "__main__":
	stripper = SvgStripper()
	res = stripper.main(sys.argv[1:])
	sys.exit(res)
