<?php
/**
 * Script-defined constants.
**/
define('PACKET_GEN_DIR', realpath(dirname(__FILE__)));
define('PACKET_TEMPLATES_DIR', PACKET_GEN_DIR . '/templates');
define('PACKET_TEMP_DIR', PACKET_GEN_DIR . '/temp');
define('PACKET_FILESTORE_DIR', PACKET_GEN_DIR . '/filestore');

/**
 * First, check to see if the script is running. If we're running then the PID file will be present
 * and we'll exit. If the PID file isn't present we'll create it and continue execution.
**/
if (file_exists(PACKET_TEMP_DIR . '/packet_gen.pid')) {
	exit();
} else {
	file_put_contents(PACKET_TEMP_DIR . '/packet_gen.pid', '0');
}

/**
 * Attach to the database
**/
$dbc = mysql_connect ('', '', '') OR trigger_error('could not connect to mysql: ' . mysql_error() );
mysql_select_db ('') OR trigger_error('could not select database: ' . mysql_error());
mysql_query("SET NAMES 'utf8';");

/**
 * Declare any variables used in the script.
 *  - $html : contents of the document
 *  - $html_body : body contents of the document (temporary storage for document assmebly)
 *  - $html_docs : array of HTML document to convert
 *  - $packet_id : the packets table record identifer
 *  - $page : start page of the packet items
 *  - $pages : array of html snippets containing the packet pages
 *  - $filenames : array of filenames for the exported document(s)
**/
$html = "";
$html_body = "";
$html_docs = array();
$packet_id = 0;
$page = 0;
$pages = array();
$filenames = array();

/**
 * Include the DOMPDF module for creating PDF from an HTML document
**/
require_once(PACKET_GEN_DIR . "/dompdf/dompdf_config.inc.php");

while ($packet_gen_info = get_packet_id()) {
	if ($packet_gen_info['form_ref'] > 0) {
		$packet_id = $packet_gen_info['form_ref'];
		$packet_options = unserialize($packet_gen_info['options']);
		$packet_source = 'F';
	} else {
		continue;
	}
	
	/**
	 * Connect to the database and pull to packet metadata.
	**/
	$mysql_query = "";
	$mysql_result = @mysql_query($mysql_query);
	$mysql_row = mysql_fetch_assoc($mysql_result);
	$packet_gen_info['packet_code'] = 'fid' . $packet_id;
	
	/**
	 * If the export format has already been generated let's get rid of it.
	**/
	if (file_exists(PACKET_FILESTORE_DIR . '/' . $packet_gen_info['packet_code'] . $packet_options['format'])) {
		unlink(PACKET_FILESTORE_DIR . '/' . $packet_gen_info['packet_code'] . $packet_options['format']);
	}
	
	/**
	 * Connect to the database and pull the items contained within the packet.
	**/
	$mysql_query = "";
	$mysql_result = @mysql_query($mysql_query);
	
	/**
	 * First pull in the stylesheet
	**/
	$pages[$page] = file_get_contents(PACKET_TEMPLATES_DIR . "/form_stylesheet.css");
	
	/**
	 * Now pull in each page and the associated questionnaire.
	 * Replace variables specific to each page (item code, item content, etc).
	**/
	while ($mysql_row=mysql_fetch_assoc($mysql_result)) {
		$page++;
		$pages[$page] = file_get_contents(PACKET_TEMPLATES_DIR . "/form_item_template.htm");
		$pages[$page] = str_replace("%ITEM_CODE%", $mysql_row['code'], $pages[$page]);
		$pages[$page] = str_replace("%ITEM_CONTENT%", $mysql_row['text'], $pages[$page]);
		/**
		 * Set the filename
		**/
		$filenames[$page] = strtoupper(substr($mysql_row['code'], 0, 2)) . sprintf('%03u', substr($mysql_row['code'], 2, strpos($mysql_row['code'], '-')-2)) . '-' . sprintf('%02u', substr($mysql_row['code'], strpos($mysql_row['code'], '-')+1));
		
		/**
		 * Some HTML modifications need to be made in order to accommodate certain 
		 * requirements and/or limitations of DOMPDF.
		 *
		 * First, since DOMPDF doesn't supported ordered lists we'll change them into tables.
		 *
		 * We'll automate the ordered list creation by using an auto-incrementing ascii code.
		 * To enable the auto-increment we'll use the preg-replace function with the "e" modifier.
		 * This tells PHP to treat the replacement string similar to a PHP echo statment.
		 * Because of this (and I guess to avoid syntax problems) the string is escaped prior to 
		 * being executed. I've found that using single quotes around the replacement string makes it 
		 * easier to compensate for the auto-escaping. For example, using double quotes causes problems 
		 * if you use a back reference (i.e. $1). See the PHP docs on preg_replace for more information.
		 *
		 * Doing this here instead of in the global replacement section makes it more difficult to write 
		 * the matching string since we haven't yet run the HTML through tidy. Unfortunately, I haven't had 
		 * time to figure out how to get the auto-increment working on a per-page basis.
		**/
		$ascii_code = 65;
		$pages[$page] = preg_replace('/\<li.*?\>(.*?)\<\/li\>/ise', '"<tr><td align=\"right\" valign=\"top\" class=\"olistnum\">".chr(\$ascii_code++).".</td><td>&nbsp;</td><td align=\"left\" valign=\"top\" class=\"olistitem\">$1</td></tr>"', $pages[$page]);
		$pages[$page] = preg_replace('/\<ol.*?type=\"A\".*?\>(.*?)\<\/ol\>/is', '<table class="olist">$1</table>', $pages[$page]);
	}
	
	if (count($pages) == 0) {
		continue;
	}
	
	/**
	 * Now that the pages have been collected and formatted, assemble the document.
	 * We're either creating a single big document or many individual documents.
	 * The only situation where we're creating a single document is when we're 
	 * placing multiple items on a page (pagesaver == TRUE)
	**/
	if ($packet_options['format'] == 'pdf' && $packet_options['pagesaver'] && !$packet_options['splitpdfs']) {
		$html = file_get_contents(PACKET_TEMPLATES_DIR . "/form_shell.htm");
		$html = str_replace("%DOCUMENT_STYLESHEET%", $pages[0], $html);
		$html = str_replace("%DOCUMENT_CONTENT%", implode("\r\n\r\n", array_slice($pages, 1)), $html);
		$html_docs[1] = $html;
		$filenames[1] = $packet_gen_info['packet_code'];
	} else {
		for ($page = 1; $page < count($pages); $page++) {
			$html = file_get_contents(PACKET_TEMPLATES_DIR . "/form_shell.htm");
			$html = str_replace("%DOCUMENT_STYLESHEET%", $pages[0], $html);
			$html = str_replace("%DOCUMENT_CONTENT%", $pages[$page], $html);
			$html_docs[$page] = $html;
		}
	}
	/**
	 * Now render the compiled HTML documents to PDF
	**/
	foreach ($html_docs as $html_doc_index => $html_doc) {
		if (create_export($html_doc, $filenames[$html_doc_index], $packet_options['format']) === FALSE) {
			continue(2);
		}
	}
	
	/**
	 * If this is a single PDF form export we'll combine the 
	 * resulting page PDFs into a single document. Otherwise we'll collect the export 
	 * files into a ZIP archive.
	**/
	$return_code = 0;
	if (isset($packet_options) && $packet_options['format'] == 'pdf' && $packet_options['splitpdfs'] == FALSE) {
		$exec_cmd = 'pdftk';
		for ($html_doc = 1; $html_doc <= count($html_docs); $html_doc++) {
			$exec_cmd .= ' ' . PACKET_TEMP_DIR . '/' . $filenames[$html_doc] . '.pdf';
		}
		$exec_cmd .= ' cat output ' . PACKET_FILESTORE_DIR . '/' . $packet_gen_info['packet_code'] . '.pdf';
		exec($exec_cmd, $pdftk_output, $pdftk_return_code);
		$return_code = $pdftk_return_code;
	} else {
		$zip = new ZipArchive();
		$zip_return_code = $zip->open(PACKET_FILESTORE_DIR . '/' . $packet_gen_info['packet_code'] . '.' . $packet_options['format'] . '.zip', ZIPARCHIVE::CREATE);
		if ($zip_return_code === TRUE) {
			for ($html_doc = 1; $html_doc <= count($html_docs); $html_doc++) {
				if (!$zip->addFile(PACKET_TEMP_DIR . '/' . $filenames[$html_doc] . '.' . $packet_options['format'])) {
					$zip_return_code = 600;
					continue;
				}
			}
			$zip->close();
		}
		$return_code = ($zip_return_code === TRUE ? 0 : $zip_return_code);
	}
	
	/**
	 * If we've successfully created the export format let's delete all the support documents.
	**/
	if ($return_code == 0) {
		for ($html_doc = 1; $html_doc <= count($html_docs); $html_doc++) {
			unlink(PACKET_TEMP_DIR . '/' . $filenames[$html_doc] . '.pdf');
			unlink(PACKET_TEMP_DIR . '/' . $filenames[$html_doc] . '.htm');
			if ($packet_options['format'] == 'eps') { unlink(PACKET_TEMP_DIR . '/' . $filenames[$html_doc] . '.eps'); }
			if ($packet_options['format'] == 'tif') { unlink(PACKET_TEMP_DIR . '/' . $filenames[$html_doc] . '.tif'); }
		}
	} else {
		continue;
	}
}
cleanup();

function get_packet_id () {
	$mysql_stmt = "";
	$mysql_result = mysql_query($mysql_stmt);
	if ($mysql_result && mysql_num_rows($mysql_result) > 0) {
		$mysql_row = mysql_fetch_assoc($mysql_result);
		return $mysql_row;
	} else {
		return FALSE;
	}
}

function cleanup () {
	unlink(PACKET_TEMP_DIR . '/packet_gen.pid');
	exit();
}

function create_export ($html, $filename, $format = 'pdf') {
	/**
	 * For some reason you can't reuse the DOMPDF class once you have rendered
	 * a PDF. Rather than debug this we'll go ahead and set up the object each 
	 * time this function is called rather than set it up once at the beginning 
	 * of the script. This uses a lot of memory, however, so once TIDY and DOMPDF 
	 * have done their work we'll go ahead and destroy the objects to free up space.
	**/
	
	/**
	 * Declare any variables used in the function.
	 *  - $tidy_config : tidy configuration options (see http://tidy.sourceforge.net/docs/quickref.html)
	**/
	$tidy_config = array(
		'ascii-chars' => TRUE,
		'clean' => TRUE,
		'drop-proprietary-attributes' => TRUE,
		'enclose-block-text' => TRUE,
		'indent' => TRUE,
		'indent-attributes' => TRUE,
		'input-encoding' => 'utf8',
		'join-classes' => FALSE,
		'join-styles' => FALSE,
		'merge-divs' => FALSE,
		'numeric-entities' => TRUE,
		'output-encoding' => 'ascii',
		'output-html' => TRUE,
		'output-xhtml' => FALSE,
		'word-2000' => TRUE,
		'wrap' => 167,
		'wrap-attributes' => FALSE
	);
	
	/**
	 * Instantiate loadable modules
	 *  - tidy for cleaning and formatting the HTML document
	 *    (this will help minimize errors in the PDF creation process)
	 *  - DOMPDF
	**/
	$tidy = new tidy();
	$dompdf = new DOMPDF();
	
	/**
	 * Before we go any further we need to strip out some tags that tidy is having trouble 
	 * dealing with:
	 *  - <o:p></o:p> : Word tag. tidy is removing the contents.
	**/
	$html = preg_replace("/<(\/?)o:p>/", "<$1span>", $html);
	
	/**
	 * Run tidy on the document to clean up the HTML formatting. This is necessary to
	 * ensure that the HTML is friendly to DOMPDF.
	**/
	$tidy->parseString($html, $tidy_config, 'utf8');
	$tidy->cleanRepair();
	$html = tidy_get_output($tidy);
	
	/**
	 * Some global HTML modifications need to be made in order to accommodate certain 
	 * requirements and/or limitations of DOMPDF.
	 *
	 * First we need to make sure image references include the domain and absolute path.
	 * DOMPDF treats each image reference as a file system reference unless an external-style 
	 * reference is used.
	 * 
	 * Previously we used $_SERVER['HTTP_HOST'] to find the host, but that was when this script 
	 * was run interactively. Leaving this out for now as we need to think about the best way 
	 * to perform this replacement. Also, we'll need to update it to take into account 
	 * template variables at the beginning of the URL.
	**/
	//$html = preg_replace('/src=([^>]*?)\"\//is','src="http://' . $_SERVER['HTTP_HOST'] . '/', $html);
	//$html = preg_replace('/url\(\//is','url(http://' . $_SERVER['HTTP_HOST'] . '/', $html);
	$patterns = array(
		'/src=([^>]*?)\"\/(?!' . str_replace('/', '\/', ltrim(PACKET_TEMPLATES_DIR, '/')) . ')/is',
		'/url\(\/(?!' . str_replace('/', '\/', ltrim(PACKET_TEMPLATES_DIR, '/')) . ')/is'
	);
	$replacements = array(
		'src="http://www.eclecticgeek.com/',
		'url(http://www.eclecticgeek.com/'
	);
	$html = preg_replace($patterns, $replacements, $html);
	
	/**
	 * Next, DOMPDF likes to work in inches, and it doesn't really like the width attribute of tags. 
	 * This is particularly important for images, but we've also run into some table troubles as well. 
	 * Since pretty much everything is specified in pixels (pt in some cases ... but we'll ignore those for now)
	 * let's try to rewrite all width attributes as inches specified in a style attribute. Current DPI seems to 
	 * be 120; divide pixel width by 120 to get number of inches.
	**/
	$html = preg_replace('/<([^>]*?width=\"(\d*?)\"[^>]*?)>/ise', '"<$1 style=\"width: " . round(($2/120), 3) . "in;\">"', $html);
	/**
	 * The width styler for images is causing some apostrphes to be escaped. Currently unaware of 
	 * the cause as it is inconsistent. This will unescape any escaped apostrophes.
	**/
	$html = str_replace("\'", "'", $html);
	
	/**
	 * Run tidy again to make sure that any code replacement is nicely formatted.
	**/
	$tidy->parseString($html, $tidy_config, 'utf8');
	$tidy->cleanRepair();
	$html = tidy_get_output($tidy);
	
	/**
	 * The preserve-entities option for tidy doesn't appear to be supported. In order to avoid 
	 * problems with "smart" punctuation with DOMPDF (which doesn't appear to handle it well) we'll
	 * go ahead and manually convert any known characters to HTML entity. The standard string 
	 * replacement functions don't work since these characters are multi-byte.
	 * Currently these lines aren't doing anything since I have the ascii-chars option for tiday set 
	 * because DOMPDF is having trouble with multi-byte characters (such as smart punctuation).
	**/
	$html = preg_replace("/\x{2018}/isu", "&#8216;", $html);
	$html = preg_replace("/\x{2019}/isu", "&#8217;", $html);
	$html = preg_replace("/\x{201C}/isu", "&#8220;", $html);
	$html = preg_replace("/\x{201D}/isu", "&#8221;", $html);
	$html = preg_replace("/\x{2013}/isu", "&#8211;", $html);
	$html = preg_replace("/\x{2014}/isu", "&#8212;", $html);
	
	/**
	 * Output the HTML so we can check for errors.
	**/
	file_put_contents(PACKET_TEMP_DIR . '/' . $filename . '.htm', $html);
	
	/**
	 * Output the PDF if the PDF variable is set to true. Otherwise output the final HTML.
	**/
	$dompdf->load_html($html);
	$dompdf->render();
	$pdf = $dompdf->output();
	file_put_contents(PACKET_TEMP_DIR . '/' . $filename . '.pdf', $pdf);
	
	/**
	 * Unless we deallocate the memory used by tidy and DOMPDF we will run out of memory.
	**/ 
	unset($tidy);
	unset($dompdf);
	
	if ($format == 'eps') {
		// $exec_cmd = 'pdftops -f 1 -l 1 -nocrop -noshrink -eps ' . $tmpfile . ' ' . $tmpfile . '.eps';
		// GhostScript needs a high resolution (-r4800, or higher), otherwise it will convert small fonts to bitmap
		$exec_cmd = 'gs -dSAFER -dBATCH -dNOPAUSE -sDEVICE=epswrite -dLanguageLevel=2 -dEPSCrop -r4800 -sOutputFile=' . PACKET_TEMP_DIR . '/' . $filename . '.eps ' . PACKET_TEMP_DIR . '/' . $filename . '.pdf';
		exec($exec_cmd, $pdfconv_output, $pdfconv_return_code);
		$exec_cmd = 'epstool --add-tiff6p-preview --device bmpgray ' . PACKET_TEMP_DIR . '/' . $filename . '.eps ' . PACKET_TEMP_DIR . '/' . $filename . '.eps';
		exec($exec_cmd, $epstool_output, $epstool_return_code);
	}
	if ($format == 'tif') {
		$exec_cmd = 'gs -dSAFER -dBATCH -dNOPAUSE -sDEVICE=tiff24nc -r150 -sOutputFile=' . PACKET_TEMP_DIR . '/' . $filename . '.tif ' . PACKET_TEMP_DIR . '/' . $filename . '.pdf';
		exec($exec_cmd, $pdfconv_output, $pdfconv_return_code);
	}
	
	return (file_exists(PACKET_TEMP_DIR . '/' . $filename . '.' . $format) ? TRUE : FALSE);
}
?>