Source: algorithm/kekule.structures.canonicalizers.js

/**
 * @fileoverview
 * Canonicalizers for chemical structures, especially for ctab based ones.
 * The general process of canonicalization includes three phrases:
 *   1. An indexer object is called and assign index to each node in structure.
 *   2. An node sorter object is called and sort nodes according to canonicalization index
 *    in previous step.
 *   3. An connector sorter object is called to sort connectors according to sorted
 *    nodes.
 * Different canonicalization method may requires different indexer or node sorter.
 * Connector sorter usually do not need to vary.
 * @author Partridge Jiang
 */


/*
 * requires /core/kekule.common.js
 * requires /core/kekule.utils.js
 * requires /data/kekule.structures.js
 * requires /core/kekule.chemUtils.js
 * requires /algorithm/kekule.graph.js
 * requires /algorithm/kekule.structures.comparers.js
 */

(function()
{

var K = Kekule;
var AU = Kekule.ArrayUtils;
var BT = Kekule.BondType;

/**
 * An abstract class to assign index to nodes in connection table.
 * Different canonicalization method need different concrete indexes classes.
 * @class
 */
Kekule.CanonicalizationIndexer = Class.create(ObjectEx,
/** @lends Kekule.CanonicalizationIndexer# */
{
	/** @private */
	CLASS_NAME: 'Kekule.CanonicalizationIndexer',
	/** @constructs */
	initialize: function($super)
	{
		$super();
	},
	/**
	 * Execute and assign index to each node in connection tab.
	 * @param {Variant} ctabOrStructFragment
	 */
	execute: function(ctabOrStructFragment)
	{
		var ctab = (ctabOrStructFragment instanceof Kekule.StructureFragment)?
			ctabOrStructFragment.getCtab(): ctabOrStructFragment;
		if (ctab)
			return this.doExecute(ctab);
		else
			return null;
	},
	/**
	 * Do actual job of indexing.
	 * Descendants should override this method.
	 * @param {Kekule.StructureConnectionTable} ctab
	 */
	doExecute: function(ctab)
	{
		// do nothing here
	}
});

/**
 * An abstract class to sort nodes according to canonicalization index.
 * Different canonicalization method need different concrete node sorter classes.
 * @class
 */
Kekule.CanonicalizationNodeSorter = Class.create(ObjectEx,
/** @lends Kekule.CanonicalizationNodeSorter# */
{
	/** @private */
	CLASS_NAME: 'Kekule.CanonicalizationNodeSorter',
	/**
	 * Execute and sort nodes according to canonicalization index.
	 * @param {Variant} ctabOrStructFragment
	 */
	execute: function(ctabOrStructFragment)
	{
		var ctab = (ctabOrStructFragment instanceof Kekule.StructureFragment)?
			ctabOrStructFragment.getCtab(): ctabOrStructFragment;
		if (ctab)
			return this.doExecute(ctab, ctab.getNodes());
		else
			return null;
	},
	/**
	 * Do actual job of node sorting.
	 * Descendants should override this method.
	 * @param {Kekule.StructureConnectionTable} ctab
	 */
	doExecute: function(ctab)
	{
		// do nothing here
	}
});

/**
 * An abstract class to sort connectors according to sorted nodes.
 * @class
 */
Kekule.CanonicalizationConnectorSorter = Class.create(ObjectEx,
/** @lends Kekule.CanonicalizationConnectorSorter# */
{
	/** @private */
	CLASS_NAME: 'Kekule.CanonicalizationConnectorSorter',
	/**
	 * Execute and sort connectors according connected nodes.
	 * @param {Variant} ctabOrStructFragment
	 */
	execute: function(ctabOrStructFragment)
	{
		var ctab = (ctabOrStructFragment instanceof Kekule.StructureFragment)?
			ctabOrStructFragment.getCtab(): ctabOrStructFragment;
		if (ctab)
			return this.doExecute(ctab, ctab.getConnectors());
		else
			return null;
	},
	/**
	 * Do actual job of connector sorting.
	 * Descendants should override this method.
	 * @param {Kekule.StructureConnectionTable} ctab
	 */
	doExecute: function(ctab)
	{
		// do nothing here
	}
});

/**
 * An abstract class to sort connected objects of each connector according to sorted nodes.
 * @class
 */
Kekule.CanonicalizationConnectorConnectedObjsSorter = Class.create(ObjectEx,
/** @lends Kekule.CanonicalizationConnectorConnectedObjsSorter# */
{
	/** @private */
	CLASS_NAME: 'Kekule.CanonicalizationConnectorConnectedObjsSorter',
	/**
	 * Execute and sort connectors according connected nodes.
	 * @param {Variant} ctabOrStructFragment
	 */
	execute: function(ctabOrStructFragment)
	{
		var ctab = (ctabOrStructFragment instanceof Kekule.StructureFragment)?
				ctabOrStructFragment.getCtab(): ctabOrStructFragment;
		if (ctab)
			return this.doExecute(ctab, ctab.getConnectors());
		else
			return null;
	},
	/**
	 * Do actual job of connector sorting.
	 * Descendants should override this method.
	 * @param {Kekule.StructureConnectionTable} ctab
	 */
	doExecute: function(ctab)
	{
		// do nothing here
	}
});

/**
 * Base class for do a custom molecule canonicalization job (do not use indexer, node or connector sorter).
 * @class
 */
Kekule.CanonicalizationCustomExecutor = Class.create(ObjectEx,
/** @lends Kekule.CanonicalizationCustomExecutor# */
{
	/** @private */
	CLASS_NAME: 'Kekule.CanonicalizationCustomExecutor',
	/**
	 * Execute canonicalization process on connection table.
	 * @params {Kekule.StructureConnectionTable} ctab
	 * @returns {Kekule.StructureConnectionTable}
	 */
	execute: function(ctab)
	{
		var newCtab = this.doExecute(ctab);
		// handle connected objects of each connector
		if (newCtab)
			this.doCanonicalizeConnectedObjs(newCtab);
		return newCtab;
	},
	/**
	 * Do actual work of {@link Kekule.CanonicalizationExecutor.execute}.
	 * Descendants should override this method.
	 * @param {Kekule.StructureConnectionTable} ctab
	 * @returns {Kekule.StructureConnectionTable}
	 * @private
	 */
	doExecute: function(ctab)
	{
		return ctab;
	},
	/** @private */
	doCanonicalizeConnectedObjs: function(ctab)
	{
		if (!ctab)
			return;
		/*
		var sortFunc = function(a, b)
		{
			var indexA = ctab.indexOfChild(a);
			var indexB = ctab.indexOfChild(b);
			return indexA - indexB;
		};
		for (var i = 0, l = ctab.getConnectorCount(); i < l; ++i)
		{
			var conn = ctab.getConnectorAt(i);
			var connObjs = conn.getConnectedObjs();
			connObjs.sort(sortFunc);
		}
		*/
		var connObjsSorter = new Kekule.CanonicalizationGeneralConnectorConnectedObjsSorter();
		connObjsSorter.execute(ctab);
	}
});

/**
 * An general class to sort connectors according to sorted nodes.
 * @arguments Kekule.CanonicalizationConnectorSorter
 * @class
 */
Kekule.CanonicalizationGeneralConnectorSorter = Class.create(Kekule.CanonicalizationConnectorSorter,
/** @lends Kekule.CanonicalizationGeneralConnectorSorter# */
{
	/** @private */
	CLASS_NAME: 'Kekule.CanonicalizationGeneralConnectorSorter',
	/** @ignore */
	doExecute: function(ctab)
	{
		var connectedNodeSeqMap = new Kekule.MapEx();
		try
		{
			// assign comparation values
			for (var i = 0, l = ctab.getConnectorCount(); i < l; ++i)
			{
				var conn = ctab.getConnectorAt(i);
				var connectedObjs = conn.getConnectedObjs();
				var mvalues = [];
				for (var j = 0, k = connectedObjs.length; j < k; ++j)
				{
					var obj = connectedObjs[j];
					var mvalue;
					if (obj instanceof Kekule.ChemStructureNode)
					{
						mvalue = ctab.indexOfNode(obj);
						mvalues.push(mvalue);
					}
					else  // bypass connected connectors
					{

					}
				}
				mvalues.sort( function(a, b) { return a - b; });
				connectedNodeSeqMap.set(conn, mvalues);
			}
			// sort connectors
			ctab.sortConnectors(function(c1, c2){
				var mvalues1 = connectedNodeSeqMap.get(c1);
				var mvalues2 = connectedNodeSeqMap.get(c2);
				var result = AU.compare(mvalues1, mvalues2);
				if (result === 0)
					result = -(c1.getConnectedObjCount() - c2.getConnectedObjCount());
				return result;
			});
			/*
			// sort connected objs in connectors
			// Now this job is moved to CanonicalizationGeneralConnectorConnectedObjsSorter
			for (var i = 0, l = ctab.getConnectorCount(); i < l; ++i)
			{
				var conn = ctab.getConnectorAt(i);
				if (conn.getConnectedObjCount() === 2)  // usual connector, connect with two nodes
				{
					var o1 = conn.getConnectedObjAt(0);
					var o2 = conn.getConnectedObjAt(1);
					if (ctab.indexOfChild(o1) > ctab.indexOfChild(o2)) // swap two nodes, may reverse wedge direction also
						conn.reverseConnectedObjOrder();
				}
				else
					conn.sortConnectedObjs(function(o1, o2){
						return ctab.indexOfChild(o1) - ctab.indexOfChild(o2);
					});
			}
			*/
		}
		finally
		{
			connectedNodeSeqMap.finalize();
		}
	}
});

/**
 * An general sorter class to sort connected objects of each connector according to sorted nodes.
 * @arguments Kekule.CanonicalizationConnectorConnectedObjsSorter
 * @class
 */
Kekule.CanonicalizationGeneralConnectorConnectedObjsSorter = Class.create(Kekule.CanonicalizationConnectorConnectedObjsSorter,
/** @lends Kekule.CanonicalizationGeneralConnectorConnectedObjsSorter# */
{
	/** @private */
	CLASS_NAME: 'Kekule.CanonicalizationGeneralConnectorConnectedObjsSorter',
	/** @ignore */
	doExecute: function(ctab)
	{
		var connectedNodeSeqMap = new Kekule.MapEx();
		try
		{
			// sort connected objs in connectors
			for (var i = 0, l = ctab.getConnectorCount(); i < l; ++i)
			{
				var conn = ctab.getConnectorAt(i);
				if (conn.getConnectedObjCount() === 2)  // usual connector, connect with two nodes
				{
					var o1 = conn.getConnectedObjAt(0);
					var o2 = conn.getConnectedObjAt(1);
					var cIndex1 = ctab.indexOfChild(o1);
					var cIndex2 = ctab.indexOfChild(o2);
					// if o1 or o2 is outside ctab (may ocurr in crossConnector of subgroup,
					// this approach will compare their canonicalization index (the order in shadow flattern structure).
					if (cIndex1 < 0 || cIndex2 < 0)
					{
						cIndex1 = (o1.getCanonicalizationIndex? -o1.getCanonicalizationIndex(): 0) || 0;
						cIndex2 = (o2.getCanonicalizationIndex? -o2.getCanonicalizationIndex(): 0) || 0;
					}
					if (cIndex1 > cIndex2) // swap two nodes, may reverse wedge direction also
					{
						conn.reverseConnectedObjOrder();
					}
				}
				else
				{
					conn.sortConnectedObjs(function(o1, o2)
					{
						var cIndex1 = ctab.indexOfChild(o1);
						var cIndex2 = ctab.indexOfChild(o2);
						if (cIndex1 < 0 || cIndex2 < 0)  // same index, may be all -1, o1, o2 outside subgroup, compare their canonicalization index
						{
							cIndex1 = (o1.getCanonicalizationIndex ? -o1.getCanonicalizationIndex() : 0) || 0;
							cIndex2 = (o2.getCanonicalizationIndex ? -o2.getCanonicalizationIndex() : 0) || 0;
							result = (cIndex1 || 0) - (cIndex2 || 0);
						}
						var result = ctab.indexOfChild(o1) - ctab.indexOfChild(o2);
						return result;
					});
				}
			}
		}
		finally
		{
			connectedNodeSeqMap.finalize();
		}
	}
});

/**
 * An class to assign index to nodes in connection table by a modification of Morgan algorithm.
 * Morgan algorithm on molecule graph is first execute to calculate all ec values. Then from smaller to
 * larger, nodes are sorted by their ec values. If some node have the same ec value, other features will
 * be used to sort.
 * @arguments Kekule.CanonicalizationIndexer
 * @class
 */
Kekule.CanonicalizationMorganIndexer = Class.create(Kekule.CanonicalizationIndexer,
/** @lends Kekule.CanonicalizationMorganIndexer# */
{
	/** @private */
	CLASS_NAME: 'Kekule.CanonicalizationMorganIndexer',
	/** @ignore */
	doExecute: function(ctab)
	{
		// clear old indexes
		/*
		for (var i = 0, l = ctab.getNodeCount(); i < l; ++i)
		{
			ctab.getNodeAt(i).setCanonicalizationIndex(null);
		}
		*/
		// turn ctab into pure graph first (with sub structure degrouped)
		var graph = Kekule.GraphAdaptUtils.ctabToGraph(ctab, null, {'expandSubStructures': true, ignoreBondedHydrogen: true});
		if (!graph)
			return null;
		//console.log('graph size', graph.getVertexes().length);
		// calc EC values of graph
		var ecResult = this._calcGraphFinalECs(graph);
		var ecMapping = ecResult.ecMapping;
		var vertexGroup = ecResult.vertexGroup;
		/*
		if (ecResult.ecCount < graph.getVertexes().length)  // some vertexes has same ec value, need to index it further
		{

		}
		else
		{

		}
		*/

		/*
		var sortedNodes = [];
		var vertexGroups = this._groupVertexesByEcValue(graph, ecMapping);
		for (var i = 0, l = vertexGroups.length; i < l; ++i)
		{
			var vertexGroup = vertexGroups[i];
			var nodes = this._vertexesToNodes(vertexGroup.vertexes);
			if (nodes.length === 1)
				AU.pushUnique(sortedNodes, nodes[0]);
			else
			{
				var groups = this._groupNodesWithSameEcValue(nodes, sortedNodes);
				sortedNodes = sortedNodes.concat(groups);
			}
		}
		*/
		var sortedNodes = this._sortNodeByEcMapping(graph, ecMapping, vertexGroup);
		// at last assign indexes
		this._setCanonicalizationIndexToNodeGroups(sortedNodes);
	},

	/** @private */
	_sortNodeByEcMapping: function(graph, ecMapping, ecVertexGroup)
	{
		var sortedNodes1st = [];
		var vertexGroups = ecVertexGroup;  // || this._groupVertexesByEcValue(graph, ecMapping);
		// 1st pass, from top to bottom
		for (var i = 0, l = vertexGroups.length; i < l; ++i)
		{
			var vertexGroup = vertexGroups[i];
			var nodes = this._vertexesToNodes(vertexGroup.vertexes);
			if (nodes.length === 1)
				AU.pushUnique(sortedNodes1st, nodes[0]);
			else
			{
				var groups = this._groupNodesWithSameEcValue(nodes, sortedNodes1st);
				sortedNodes1st = sortedNodes1st.concat(groups);
			}
		}

		// 2nd pass, from top to bottom
		var sortedNodes = [];
		var handledNodes = [];
		for (var i = 0, l = sortedNodes1st.length; i < l; ++i)
		{
			var currNodes = sortedNodes1st[i];
			if (!AU.isArray(currNodes))   // only one node, input into sortedNodes
			{
				sortedNodes.push([currNodes]);
				handledNodes.push(currNodes);
			}
			else  // need compare
			{
				var _getSortLevel = function(node)
				{
					for (var i = 0, l = sortedNodes.length; i < l; ++i)
					{
						if (!AU.isArray(sortedNodes[i]))
						{
							if (sortedNodes[i] === node)
								return i;
						}
						else if (sortedNodes[i].indexOf(node) >= 0)
							return i;
					}
					return -1;
				};
				var _getSortLevels = function(centerNode, connectedNodes)
				{
					var result = [];
					for (var i = 0, l = connectedNodes.length; i < l; ++i)
					{
						result.push(_getSortLevel(connectedNodes[i]));
					}
					result.sort(function(a,b){ return b-a; });
					//console.log(result);
					return result;
				};
				var _compFunc = function(n1, n2)
				{
					var connNodes1 = AU.intersect(n1.getLinkedObjs(), handledNodes);
					var connNodes2 = AU.intersect(n2.getLinkedObjs(), handledNodes);
					// compare connected node count
					var result = connNodes1.length - connNodes2.length;
					// compare connected node sorted level
					if (result === 0 && connNodes1.length > 0)
					{
						var sortLevels1 = _getSortLevels(n1, connNodes1);
						var sortLevels2 = _getSortLevels(n2, connNodes2);
						result = AU.compare(sortLevels1, sortLevels2);
					}
					return result;
				};
				var groupedCurrNodes = AU.group(currNodes, _compFunc);
				sortedNodes = sortedNodes.concat(groupedCurrNodes);
			}
			handledNodes = handledNodes.concat(currNodes);
		}

		// at last, standardize array
		var result = [];
		for (var i = 0, l = sortedNodes.length; i < l; ++i)
		{
			var currNodes = sortedNodes[i];
			if (!AU.isArray(currNodes))   // only one node, input into sortedNodes
				result.push(currNodes);
			else if (currNodes.length === 1)
				result.push(currNodes[0]);
			else
				result.push(currNodes);
		}

		return result;
	},

	/** @private */
	_setCanonicalizationIndexToNodeGroups: function(sortedNodes)
	{
		for (var i = 0, l = sortedNodes.length; i < l; ++i)
		{
			var vIndex = i + 1;
			var item = sortedNodes[i];
			if (AU.isArray(item))
			{
				for (var j = 0, k = item.length; j < k; ++j)
				{
					//console.log('set cano index', item[j].getSymbol(), vIndex, j, k);
					item[j].setCanonicalizationIndex(vIndex);
				}
			}
			else
			{
				//console.log('set cano index', item.getSymbol(), vIndex);
				item.setCanonicalizationIndex(vIndex);
			}
		}
	},

	/** @private */
	_vertexesToNodes: function(vertexes)
	{
		var result = [];
		for (var i = 0, l = vertexes.length; i < l; ++i)
		{
			var node = vertexes[i].getData('object');
			result.push(node);
		}
		return result;
	},
	/**
	 * Calculate EC value of each vertex in graph and store the values into newECMapping.
	 * Set isInitialRun to true to set the initial EC values.
	 * @param graph
	 * @param newECMapping
	 * @param oldECMapping
	 * @param isInitialRun
	 * @returns {Int} Different EC values calculated in process.
	 * @private
	 */
	_processECs: function(graph, newECMapping, oldECMapping, isInitialRun)
	{
		if (isInitialRun)
			return this._initECs(graph, newECMapping);

		var ecValues = [];
		var ecTable = [];
		var vertexes = graph.getVertexes();
		for (var i = 0, l = vertexes.length; i < l; ++i)
		{
			var vertex = vertexes[i];
			var newEC = this._usePrimeECs? 1: 0;
			if (this._usePrimeECs)
				var primes = this._getPrimeArray();
			var neighbors = vertex.getNeighbors();
			for (var j = 0, k = neighbors.length; j < k; ++j)
			{
				if (this._usePrimeECs)
				{
					newEC *= primes[(oldECMapping.get(neighbors[j]) - 1) || 0];
				}
				else
					newEC += oldECMapping.get(neighbors[j]) || 0;
			}
			AU.pushUnique(ecValues, newEC);
			if (!ecTable[newEC])
				ecTable[newEC] = [vertex];
			else
				ecTable[newEC].push(vertex);
		}
		//console.log('ecValues-Old', ecValues);
		ecValues.sort(function(a, b){ return a - b; });
		//console.log('ecValues-New', ecValues);
		var vertexGroup = [];
		for (var i = 0, l = ecValues.length; i <l; ++i)
		{
			var ecValue = ecValues[i];
			if (ecTable[ecValue])
				vertexGroup.push(ecTable[ecValue]);
		}
		this._setECValueByVertexGroup(vertexGroup, newECMapping);
		//console.log('ECs: ', ecValues);
		return vertexGroup.length;
	},
	/*
	_processECs_OLD: function(graph, newECMapping, oldECMapping, isInitialRun)
	{
		if (isInitialRun)
			return this._initECs(graph, newECMapping);

		var ecValues = [];
		var vertexes = graph.getVertexes();
		for (var i = 0, l = vertexes.length; i < l; ++i)
		{
			var vertex = vertexes[i];
			var sum = 0;
			//if (!isInitialRun)
			{
				var neighbors = vertex.getNeighbors();
				for (var j = 0, k = neighbors.length; j < k; ++j)
				{
					sum += oldECMapping.get(neighbors[j]) || 0;
				}
			}
			newECMapping.set(vertex, sum);
			AU.pushUnique(ecValues, sum);
		}
		//console.log('ECs: ', ecValues);
		return ecValues.length;
	},
	*/
	/** @private */
	_initECs: function(graph, newECMapping)
	{
		// using Unique SMILES method to group EC
		var ecValueTable = [];
		var vertexes = graph.getVertexes();
		// calc connection count first
		for (var i = 0, l = vertexes.length; i < l; ++i)
		{
			var vertex = vertexes[i];
			var edgeCount = vertex.getEdgeCount();
			if (!ecValueTable[edgeCount])
			{
				ecValueTable[edgeCount] = [vertex];
			}
			else
				ecValueTable[edgeCount].push(vertex);
		}
		//console.log(ecValueTable.length, ecValueTable);
		// then consider the difference of atoms
		var vertexGroup = [];
		for (var i = 0, l = ecValueTable.length; i < l; ++i)
		{
			var vertexes = ecValueTable[i];
			//console.log(vertexes);
			if (vertexes)
			{
				if (vertexes.length === 1)  // single vertex, put in directly
					vertexGroup.push(vertexes);
				else  // a group of same edge count, need group them further by atoms
				{
					var subGroups = AU.group(vertexes, function(v1, v2){
						var node1 = v1.getData('object');
						var node2 = v2.getData('object');
						var result = node1.getLinkedConnectorCount() - node2.getLinkedConnectorCount();
						if (!result)
						{
							result = node1.compareStructure(node2);
							if (!result)
								result = (node1.getHydrogenCount? node1.getHydrogenCount(true): 0) - (node2.getHydrogenCount? node2.getHydrogenCount(true): 0);
						}
						return result;
					});
					//console.log(vertexes, subGroups);
					for (var j = 0, k = subGroups.length; j < k; ++j)
					{
						vertexGroup.push(AU.toArray(subGroups[j]));
					}
				}
			}
		}
		//console.log(vertexGroup);
		// at last assign EC values to each vertexes
		this._setECValueByVertexGroup(vertexGroup, newECMapping);
		return vertexGroup.length;
	},
	/** @private */
	_setECValueByVertexGroup: function(vertexGroup, ECMapping)
	{
		for (var i = 0, l = vertexGroup.length; i < l; ++i)
		{
			var vertexes = vertexGroup[i];
			for (var j = 0, k = vertexes.length; j < k; ++j)
			{
				ECMapping.set(vertexes[j], i + 1);
			}
		}
	},
	/** @private */
	_getPrimeArray: function()
	{
		//return [];
		if (!Kekule.CanonicalizationMorganIndexer.primes)
			Kekule.CanonicalizationMorganIndexer.primes = Kekule.NumUtils.getPrimes(10000);
		return Kekule.CanonicalizationMorganIndexer.primes;
	},

	/**
	 * Calculate the final EC value mapping of a graph.
	 * @param {Object} graph
	 * @returns {Hash} {ecCount: Int, ecMapping: Kekule.MapEx}
	 * @private
	 */
	_calcGraphFinalECs: function(graph)
	{
		var ecMappings = [
			new Kekule.MapEx(), new Kekule.MapEx()
		];
		try
		{
			if (graph.getVertexes().length < this._getPrimeArray().length)  // can use prime EC values
				this._usePrimeECs = true;
			else
				this._usePrimeECs = false;

			var index = 0;
			var currECMapping = ecMappings[0];
			var oldECMapping = ecMappings[0];
			var oldEcMemberCount = 0;
			var lastVertexGroup, currVertexGroup;
			var storedVertexGroups = [];  // stores vertex groups of all same EC length
			var ecMemberCount = this._processECs(graph, currECMapping, oldECMapping, true);
			//var currVertexGroup = this._groupVertexesByEcValue(graph, currECMapping);
			var doProcess = true;
			//while (ecMemberCount >= oldEcMemberCount)
			while (doProcess)
			{
				oldEcMemberCount = ecMemberCount;
				lastVertexGroup = currVertexGroup;
				oldECMapping = currECMapping;
				++index;
				//var j = index % 2;
				currECMapping = ecMappings[index % 2];
				//oldECMapping = ecMappings[(index + 1) % 2];
				ecMemberCount = this._processECs(graph, currECMapping, oldECMapping);
				currVertexGroup = null;

				doProcess = ecMemberCount > oldEcMemberCount;  // if ec count arises, continue process

				if (ecMemberCount === oldEcMemberCount)  // some times occurs
				{
					currVertexGroup = this._groupVertexesByEcValue(graph, currECMapping);
					if (!lastVertexGroup && oldECMapping)  // calculate only when needed
					{
						lastVertexGroup = this._groupVertexesByEcValue(graph, oldECMapping);
					}
					storedVertexGroups.push(lastVertexGroup);
					//doProcess = this._compareEcVertexGroup(currVertexGroup, lastVertexGroup) !== 0;
					doProcess = !this._hasVertexGroup(currVertexGroup, storedVertexGroups);
				}
				else  // new group count differs from last, clear stored vertex groups
				{
					storedVertexGroups = [];
				}
			}

			//console.log(oldECMapping);
			// debug, mark EC values
			/*
			 var vertexes = graph.getVertexes();
			 for (var i = 0, l = vertexes.length; i < l; ++i)
			 {
				 var node = vertexes[i].getData('object');
				 var value = oldECMapping.get(vertexes[i]);
				 //node.setRenderOption('customLabel', '' + value);
				 node.setCharge(value);
			 }
      */
		}
		finally
		{
			currECMapping.finalize();
		}

		// finally get actual ec values
		//var actualEcMapping = oldEcMapping;
		return {ecMapping: oldECMapping, ecCount: oldEcMemberCount,
			vertexGroup: lastVertexGroup || this._groupVertexesByEcValue(graph, oldECMapping)};
	},

	/** @private */
	_groupVertexesByEcValue: function(graph, ecMapping)
	{
		var groups = [];
		var ecValues = [];
		var vertexes = graph.getVertexes();
		for (var i = 0, l = vertexes.length; i < l; ++i)
		{
			var v = vertexes[i];
			var ecValue = ecMapping.get(v);
			AU.pushUnique(ecValues, ecValue);
			if (!groups[ecValue])
				groups[ecValue] = [];
			groups[ecValue].push(v);
		}
		ecValues.sort(function(a, b) { return a - b; } );
		var result = [];
		for (var i = 0, l = ecValues.length; i < l; ++i)
		{
			var vs = groups[ecValues[i]];
			result.push({
				ecValue: ecValues[i],
				vertexes: vs,
				vertexesCount: vs.length
			});
		}
		return result;
	},
	/** @private */
	_hasVertexGroup: function(group, groups)
	{
		for (var i = 0, l = groups.length; i < l; ++i)
		{
			if (this._compareEcVertexGroup(group, groups[i]) === 0)
				return true;
		}
		return false;
	},
	/** @private */
	_compareEcVertexGroup: function(group1, group2)
	{
		var count = group1.length;
		var result = count - group2.length;
		if (result === 0)  // further check
		{
			var vSame;
			for (var i = 0; i < count; ++i)
			{
				result = group1[i].vertexes.length - group2[i].vertexes.length;
				if (result === 0)
					result = AU.exclude(group1[i].vertexes, group2[i].vertexes).length;
				if (result !== 0)
					break;
			}
		}
		return result;
	},
	/** @private */
	_groupNodesWithSameEcValue: function(nodes, sortedNodes)
	{
		var formAssocCompareArray = function(node, sortedNodes)
		{
			var linkedObjs = node.getLinkedObjs();
			var linkedNodes = AU.intersect(linkedObjs, sortedNodes);
			var ncompareValues = [];  // calc from node
			var ccompareValues = [];  // calc from connector
			for (var i = 0, l = linkedNodes.length; i < l; ++i)
			{
				var n = linkedNodes[i];
				ncompareValues.push(sortedNodes.indexOf(n));
			}
			ncompareValues.sort( function(a, b) { return a - b; });
			for (var i = 0, l = ncompareValues.length; i < l; ++i)
			{
				var n = sortedNodes[ncompareValues[i]];
				var conn = node.getConnectorTo(n);
				var connOrder = conn.getBondOrder? conn.getBondOrder() || 0: 0;
				ccompareValues.push(connOrder);
			}
			return {
				nodeOrders: ncompareValues, connectorOrders: ccompareValues
			};
		};

		var nodeGroups = AU.group(nodes, function(n1, n2)
			{
				//var result = Kekule.UnivChemStructObjComparer.compare(n1, n2);
				var result = n1.compareStructure(n2);
				if (result === 0)  // same compare value, need further check
				{
					var compareValue1 = formAssocCompareArray(n1, sortedNodes);
					var compareValue2 = formAssocCompareArray(n2, sortedNodes);
					result = AU.compare(compareValue1.nodeOrders, compareValue2.nodeOrders);
					if (result === 0)  // connected node can not distinguish, need to check linked connectors
					{
						result = AU.compare(compareValue1.connectorOrders, compareValue2.connectorOrders);
					}
				}
				return result;
			});
		//console.log('group nodes', nodes, nodeGroups);

		return nodeGroups;
	}
});

/**
 * An class to sort nodes by morgan algorithm.
 * Different canonicalization method need different concrete node sorter classes.
 * @arguments Kekule.CanonicalizationNodeSorter
 * @class
 */
Kekule.CanonicalizationMorganNodeSorter = Class.create(Kekule.CanonicalizationNodeSorter,
/** @lends Kekule.CanonicalizationMorganNodeSorter# */
{
	/** @private */
	CLASS_NAME: 'Kekule.CanonicalizationMorganNodeSorter',
	/**
	 * Do actual job of node sorting.
	 * Descendants should override this method.
	 * @param {Kekule.StructureConnectionTable} ctab
	 */
	doExecute: function(ctab)
	{
		var sortedNodes = this._getNodeSortedArray(ctab);
		var sortedNodesLength = sortedNodes.length;
		var self = this;
		ctab.sortNodes(function(a, b){
			var indexA = sortedNodes.indexOf(a);
			var indexB = sortedNodes.indexOf(b);
			// sortedNodes may not contain hydrogen atoms, put it to tail
			if (indexA < 0)
				indexA = sortedNodesLength;
			if (indexB < 0)
				indexB = sortedNodesLength;
			var result = indexA - indexB;
			if (result === 0 && indexA === sortedNodesLength)  // H hydrogen, need compare
			{
				result = self._getNeighborNodesMinIndex(a, sortedNodes) - self._getNeighborNodesMinIndex(b, sortedNodes);
			}
			return result;
		});
	},
	/** @private */
	_getNeighborNodesMinIndex: function(node, sortedNodes)
	{
		var neighbors = node.getLinkedChemNodes();
		var result = sortedNodes.length;
		for (var i = 0, l = neighbors.length; i < l; ++i)
		{
			var index = sortedNodes.indexOf(neighbors[i]);
			if (index > 0 && index < result)
				result = index;
		}
		return result;
	},
	/** @private */
	_getNodeSortedArray: function(ctab)
	{
		var result = [];
		/*
		var nodeSeq = AU.clone(ctab.getNodes());
		nodeSeq.sort(function(a, b){
			return ((a.getCanonicalizationIndex() || -1) - (b.getCanonicalizationIndex() || -1));
		});
		*/
		/*
		var sortFunc = function(n1, n2)
		{
			return nodeSeq.indexOf(n1) - nodeSeq.indexOf(n2);
		};
		*/
		var nodeCompareFunc = function(startingNode, n1, n2)
		{
			var result;
			var cIndex1 = n1.getCanonicalizationIndex();
			var cIndex2 = n2.getCanonicalizationIndex();
			// console.log('INDEX', n1.getSymbol(), cIndex1, n2.getSymbol(), cIndex2);
			result = (cIndex1 || -1) - (cIndex2 || -1);

			if (result === 0)  // canonicalization index is same, compare connector\
			{
				if (startingNode)
				{
					var connector1 = startingNode.getConnectorTo(n1);
					var connector2 = startingNode.getConnectorTo(n2);
					//result = Kekule.UnivChemStructObjComparer.compare(connector1, connector2);
					result = connector1.compareStructure(connector2);
				}
				/*
				if (result === 0)  // same, check other connected objects of n1/n2
				{
					var neighborNodesOfN1 = n1.getLinkedObjs();
					var neighborNodesOfN2 = n2.getLinkedObjs();
					//if ()
				}
				*/
				if (result === 0) // still same, compare coord if possible
				{
					if (n1.hasCoord3D(true) && n2.hasCoord3D(true))  // allow borrow from 2D
					{
						var deltaCoord = Kekule.CoordUtils.substract(n1.getAbsCoord3D(true), n2.getAbsCoord3D(true));
						result = deltaCoord.z || deltaCoord.y || deltaCoord.x;
					}
				}
			}
			return result;
		};
		//var nodeIndexMap = new Kekule.MapEx();
		try
		{
			//var remainingNodes = AU.clone(nodeSeq);
			var remainingNodes = AU.clone(ctab.getNodes());
			/*
			remainingNodes.forEach(function(n) {
				console.log('node', n.getSymbol(), n.getCanonicalizationIndex());
			});
			*/
			// first seek out the starting node with highest canonicalization index
			var currNode, currNodeIndex;
			for (var i = 0, l = remainingNodes.length; i < l; ++i)
			{
				var node = remainingNodes[i];
				if (!currNode)
				{
					currNode = node;
					currNodeIndex = i;
				}
				else
				{
					if (nodeCompareFunc(null, node, currNode) > 0)
					{
						currNode = node;
						currNodeIndex = i;
					}
				}
			}

			//console.log('starting node', currNode.getSymbol(), i);

			//var currNode = nodeSeq[nodeSeq.length - 1];
			//remainingNodes.splice(nodeSeq.length - 1, 1);
			remainingNodes.splice(currNodeIndex, 1);
			var sortedNodes = [];
			var sortedUnindexedNodes = [];  // sorted hydrogen atom that has no cannonicalization index

			if (currNode)
				sortedNodes.push(currNode);
			//nodeIndexMap.set(currNode, 0);

			for (var i = 0; i < sortedNodes.length; ++i)
			{
				var currNode = sortedNodes[i];
				if (i !== 0)
				{
					var vIndex = remainingNodes.indexOf(currNode);
					if (vIndex >= 0)
					{
						sortedNodes.push(currNode);
						//nodeIndexMap.set(currNode, sortedNodes.length - 1);
						remainingNodes.splice(vIndex, 1);
					}
				}
				var neighbors = currNode.getLinkedObjs();
				neighbors = AU.intersect(neighbors, remainingNodes);
				if (neighbors.length)
				{
					//neighbors.sort(sortFunc);
					neighbors.sort(function(a, b){
						var startingNode = currNode;
						return nodeCompareFunc(startingNode, a, b);
					});
					for (var j = neighbors.length - 1; j >= 0; --j)
					{
						var neighbor = neighbors[j];
						vIndex = remainingNodes.indexOf(neighbor);
						if (vIndex >= 0)
						{
							var canoIndex = neighbor.getCanonicalizationIndex();
							if (Kekule.ObjUtils.isUnset(canoIndex))
								sortedUnindexedNodes.push(neighbor);
							else
								sortedNodes.push(neighbor);
							//nodeIndexMap.set(neighbors[j], sortedNodes.length - 1);
							remainingNodes.splice(vIndex, 1);
						}
					}
				}
			}

			sortedNodes = sortedNodes.concat(sortedUnindexedNodes);

			return sortedNodes;
		}
		finally
		{
			//nodeIndexMap.finalize();
		}
	}
});

/**
 * A entry class to execute molecule canonicalization job.
 * User should use this class rather than call concrete CanonicalizationExecutor directly.
 * @class
 */
Kekule.Canonicalizer = Class.create(
/** @lends Kekule.Canonicalizer# */
{
	/** @private */
	CLASS_NAME: 'Kekule.Canonicalizer',
	/** @constructs */
	initialize: function()
	{
		this._executorClasses = {};
		this._executorInstances = {};
		this._defExecutorId = null;
	},
	/**
	 * Register a canonicalization executor class.
	 * Executor class should be a descendant of {@link Kekule.CanonicalizationExecutor}.
	 * @param {String} id
	 * @param {Variant} executorClasses Array of three related classes: [Indexer, NodeSorter, ConnectorSorter],
	 *   or a custom canonicalization class deprived from {@link Kekule.CanonicalizationCustomExecutor}.
	 * @param {Bool} asDefault Whether this executor should be the default one to canonicalize molecule.
	 */
	registerExecutor: function(id, executorClasses, asDefault)
	{
		if (AU.isArray(executorClasses))
		{
			this._executorClasses[id] = {
				'indexer': executorClasses[0],
				'nodeSorter': executorClasses[1],
				'connectorSorter': executorClasses[2] || Kekule.CanonicalizationGeneralConnectorSorter,
				'connectorConnectedObjsSorter': executorClasses[3] || Kekule.CanonicalizationGeneralConnectorConnectedObjsSorter
			};
		}
		else
			this._executorClasses[id] = {'customExecutor': executorClasses};
		if (asDefault)
			this._defExecutorId = id;
	},
	/**
	 * Returns a instance of registered executor class.
	 * @param {String} id
	 * @returns {Kekule.CanonicalizationExecutor}
	 */
	getExecutor: function(id)
	{
		if (!id)
			return null;
		var result = this._executorInstances[id];
		if (!result)
		{
			var eClasses = this._executorClasses[id];
			if (eClasses)
			{
				if (eClasses.customExecutor)
				{
					result = {'customExecutor': new (eClasses.customExecutor)()};
				}
				else
				{
					result = {
						'indexer': new (eClasses.indexer)(),
						'nodeSorter': new (eClasses.nodeSorter)(),
						'connectorSorter': new (eClasses.connectorSorter)(),
						'connectorConnectedObjsSorter': new (eClasses.connectorConnectedObjsSorter)()
					};
				}
				this._executorInstances[id] = result;
			}
		}
		return result;
	},
	/**
	 * Use specified method or default one (if executorId is not set) to canonicalize a structure fragment or ctab.
	 * @param {Variant} structFragmentOrCtab
	 * @param {String} executorId
	 */
	canonicalize: function(structFragmentOrCtab, executorId)
	{
		structFragmentOrCtab.beginUpdate();
		try
		{
			var id = executorId || this._defExecutorId;
			var executor = this.getExecutor(id);
			if (!executor)
			{
				Kekule.error(Kekule.$L('ErrorMsg.REGISTERED_CANONICALIZATION_EXECUTOR_NOT_FOUND')/*Kekule.ErrorMsg.REGISTERED_CANONICALIZATION_EXECUTOR_NOT_FOUND*/);
			}
			else
			{
				var struct = (structFragmentOrCtab instanceof Kekule.StructureFragment)? structFragmentOrCtab: null;
				var canoInfo = struct.getCanonicalizationInfo();
				if (canoInfo && canoInfo.id === id)   // already do a cano job, no need to run again
				{
					return structFragmentOrCtab;
				}
				var ctab = structFragmentOrCtab.getCtab? structFragmentOrCtab.getCtab(): structFragmentOrCtab;
				if (!ctab || ctab.isEmpty())  // empty structure
					return structFragmentOrCtab;
				var structFragment = ctab.getParent();
				if (executor.customExecutor)
					executor.customExecutor.execute(ctab);
				else  // use default approach
				{
					this._doDefaultCanonicalize(executor, ctab, structFragment);
				}
				struct.setCanonicalizationInfo({'id': id});  // save cano info
			}
		}
		finally
		{
			structFragmentOrCtab.endUpdate();
		}
		return structFragmentOrCtab;
	},
	/**
	 * Default approach to do canonicalization.
	 * @private
	 */
	_doDefaultCanonicalize: function(executor, ctab, structFragment)
	{
		// ensure the canonicalization is executed on flattened structure (without subgroups)
		var flatternedStruct = structFragment.getFlattenedShadowFragment();

		/*
		var data = Kekule.IO.saveFormatData(flatternedStruct, 'mol');
		console.log('FLATTERN');
		console.log(data);
		*/
		var ctab = flatternedStruct.getCtab();
		ctab.beginUpdate();
		try
		{
			executor.indexer.execute(ctab);
			executor.nodeSorter.execute(ctab);
			executor.connectorSorter.execute(ctab);
			executor.connectorConnectedObjsSorter.execute(ctab);
		}
		finally
		{
			ctab.endUpdate();
		}

		// if structFragment has subgroup (flattenedShadow not self), handle it
		if (!structFragment.getFlattenedShadowOnSelf())
		{
			structFragment.beginUpdate();
			try
			{
				// now the flatterned structure is canonicalization, we index the original structure based on it
				this._sortSrcStructBaseOnShadow(structFragment, flatternedStruct/*, structFragment.getFlattenedShadow()*/);
				// at last sort connected objs of each connector
				executor.connectorConnectedObjsSorter.execute(structFragment.getCtab());
			}
			finally
			{
				structFragment.endUpdate();
			}
		}
	},
	/** @private */
	_sortSrcStructBaseOnShadow: function(srcStruct, shadowStruct/*, shadowInfo*/)
	{
		var FLATTERN_INDEX_KEY = '__$flattern_index$__';
		var getFlatternIndex = function(obj)
		{
			return obj[FLATTERN_INDEX_KEY];
		};
		var setFlatternIndex = function(obj, value)
		{
			obj[FLATTERN_INDEX_KEY] = value;
		};
		var _setSrcNodeFlatternIndex = function(rootStruct, node, index, allChildrenCount)
		{
			//node.setCanonicalizationIndex(index);
			setFlatternIndex(node, index);
			// if node is in a subgroup, set index of its parent
			var parent = node.getParent();
			if (parent.isChildOf(srcStruct))
			{
				//var pIndex = parent.getCanonicalizationIndex();
				var pIndex = getFlatternIndex(parent);
				var newPIndex = index + allChildrenCount;  // ensure subgroup sort after all single nodes
				if (Kekule.ObjUtils.isUnset(pIndex) || pIndex > newPIndex)
				{
					//parent.setCanonicalizationIndex(newPIndex);
					_setSrcNodeFlatternIndex(rootStruct, parent, newPIndex, allChildrenCount);
				}
			}
		};
		var _sortSrcStructure = function(struct)
		{
			struct.beginUpdate();
			try
			{
				// sort first level children
				struct.sortNodes(function(a, b)
				{
					//return a.getCanonicalizationIndex() - b.getCanonicalizationIndex();
					return getFlatternIndex(a) - getFlatternIndex(b);
				});
				struct.sortConnectors(function(a, b)
				{
					return getFlatternIndex(a) - getFlatternIndex(b);
				});
				// then handle nested subgroups
				for (var i = 0, l = struct.getNodeCount(); i < l; ++i)
				{
					var node = struct.getNodeAt(i);
					if (node instanceof Kekule.StructureFragment) // is subgroup, sort
						_sortSrcStructure(node);
				}
			}
			finally
			{
				struct.endUpdate();
			}
		};

		// first save index of each flattern shadow / source children (nodes and connectors)
		srcStruct.cascadeOnChildren(function(obj){
			/*
			if (obj.setCanonicalizationIndex)
				obj.setCanonicalizationIndex(null);
			*/
			if (obj)
				setFlatternIndex(obj, null);
		});
		// the subgroup canonicalization index is calculated based on its own child nodes
		var shadowChildCount = shadowStruct.getChildCount();
		srcStruct.beginUpdate();
		try
		{
			for (var i = 0; i < shadowChildCount; ++i)
			{
				var childObj = shadowStruct.getChildAt(i);
				if (childObj instanceof Kekule.ChemStructureNode)
				{
					var shadowNode = childObj;
					var srcNode = srcStruct.getFlatternedShadowSourceObj(shadowNode);
					if (srcNode)
					{
						_setSrcNodeFlatternIndex(srcStruct, srcNode, i, shadowChildCount);
						srcNode.setCanonicalizationIndex(shadowNode.getCanonicalizationIndex());
					}
				}
				else if (childObj instanceof Kekule.ChemStructureConnector)
				{
					var shadowConn = childObj;
					var srcConn = srcStruct.getFlatternedShadowSourceObj(shadowConn);
					//srcConn.setCanonicalizationIndex(i);
					setFlatternIndex(srcConn, i);
				}
			}

			// then sort source struct cascadlly
			_sortSrcStructure(srcStruct);
		}
		finally
		{
			srcStruct.endUpdate();
		}
	}
});
Kekule.ClassUtils.makeSingleton(Kekule.Canonicalizer);
/**
 * A singleton instance of {@link Kekule.Canonicalizer}.
 */
Kekule.canonicalizer = Kekule.Canonicalizer.getInstance();

// extend ctab and molecule class for a easy way to do canonicalization
// even add method to ChemObject, make it easy to canonicalize all children
/** @ignore */
ClassEx.extend(Kekule.ChemObject,
	/** @lends Kekule.ChemObject# */
	{
	/**
	 * Canonicalize object and all possible children by canonicalizer. If canonicalizerId is not set,
	 * the default one will be used.
	 * @param {String} canonicalizerId
	 */
	canonicalize: function(canonicalizerId)
	{
		// find out all molecule
		var mols = Kekule.ChemStructureUtils.getAllStructFragments(this, true);
		for (var i = 0, l = mols.length; i < l; ++i)
		{
			mols[i].canonicalize(canonicalizerId);
		}
		return this;
	}
});

/** @ignore */
ClassEx.extend(Kekule.StructureConnectionTable,
/** @lends Kekule.StructureConnectionTable# */
{
	/**
	 * Canonicalize a structure fragment by canonicalizer. If canonicalizerId is not set,
	 * the default one will be used.
	 * @param {String} canonicalizerId
	 */
	canonicalize: function(canonicalizerId)
	{
		Kekule.canonicalizer.canonicalize(this);
		return this;
	}
});
/** @ignore */
ClassEx.extend(Kekule.StructureFragment,
/** @lends Kekule.StructureFragment# */
{
	/**
	 * Canonicalize a structure fragment by canonicalizer. If canonicalizerId is not set,
	 * the default one will be used.
	 * @param {String} canonicalizerId
	 */
	canonicalize: function(canonicalizerId)
	{
		Kekule.canonicalizer.canonicalize(this);
		//console.log('do canonicalize to', this.getClassName(), this.getId());
		return this;
	}
});

// A special property to store cano-label of atoms or bonds
ClassEx.defineProp(Kekule.ChemStructureObject, 'canonicalizationIndex', {'dataType': DataType.INT, 'serializable': false, 'scope': Class.PropertyScope.PUBLISHED});


// register morgan as default
Kekule.canonicalizer.registerExecutor('morgan', [Kekule.CanonicalizationMorganIndexer, Kekule.CanonicalizationMorganNodeSorter], true);



})();