Source: io/smiles/kekule.io.smiles.js

/**
 * @fileoverview
 * File for supporting reading/writing molecule to SMILES format.
 * @author Partridge Jiang
 */

/*
 * requires /lan/classes.js
 * requires /core/kekule.common.js
 * requires /core/kekule.elements.js
 * requires /core/kekule.electrons.js
 * requires /core/kekule.structures.js
 * requires /algorithm/kekule.graph.js
 * requires /algorithm/kekule.aromatics.js
 * requires /io/kekule.io.js
 * requires /localization
 */

(function(){
"use strict";

var AU = Kekule.ArrayUtils;

/**
 * Some constants for SMILES supporting.
 * @class
 */
Kekule.IO.SMILES = {
	ATOM_BRACKET_LEFT: '[',
	ATOM_BRACKET_RIGHT: ']',
	ATOM_WILDCARD: '*',
	ATOM_H: 'H',
	ORGAN_SUBSET_ATOMS: ['B', 'C', 'N', 'O', 'S', 'P', 'F', 'Cl', 'Br', 'I'],
	AROMATIC_SUBSET_ATOMS: ['B', 'C', 'N', 'O', 'S', 'P'],
	BOND_SINGLE: '-',
	BOND_DOUBLE: '=',
	BOND_TRIPLE: '#',
	BOND_QUAD: '$',
	BOND_AROMATIC: ':',
	BOND_FAKE: '.',
	RING_BOND_TWO_DIGIT_NO_PREFIX: '%',
	BRANCH_BRACKET_LEFT: '(',
	BRANCH_BRACKET_RIGHT: ')',
	ROTATION_DIR_CLOCKWISE: '@@',
	ROTATION_DIR_ANTICLOCKWISE: '@',
	DIRECTION_BOND_SYMBOLS: ['/', '\\']
};

/** @private */
var SMI = Kekule.IO.SMILES;


/**
 * A helper class for SMILES I/O.
 * @class
 */
Kekule.IO.SmilesUtils = {
	/**
	 * Create a depth first spanning tree, and record the longest path from startingVertex
	 * as the main chain for SMILES.
	 * @param {Kekule.Graph} graph
	 * @param {Kekule.GraphVertex} startingVertex
	 * @returns {Array} Each item is a hash that contains the follow fields:
	 *   {
	 *     vertexes: Array of all found vertexes.
	 *     edges: Array of all found edges.
	 *     longestPath: {
	 *       vertexes,
	 *       //edges,
	 *       length: edge count of longest path
	 *     }
	 *   }
	 *   As the graph may be unconnected, so spanning tree result may be more than one.
	 */
	createGraphDepthSpanningTreesEx: function(graph, startingVertex)
	{
		var VISITED_KEY = '__$visitedEx__';

		var _doTravers = function(startingVertex)
		{
			var result = {
				vertexes: [],
				edges: [],
				longestPath: {vertexes: [], edges: [], length: 0}
			};
			var vertex = startingVertex;

			//var vertexUnvisitedBefore = false;
			if (!vertex.getData(VISITED_KEY))
			{
				result.vertexes.push(vertex);
				vertex.setData(VISITED_KEY, true);
				//vertexUnvisitedBefore = true;
			}

			var edges = vertex.getEdges();
			var unvisitedVertexes = [];
			var longestTargetNeighborEdge;
			for (var i = 0, l = edges.length; i < l; ++i)
			{
				var edge = edges[i];
				var neighbor = vertex.getNeighborOnEdge(edge);
				if (!neighbor.getData(VISITED_KEY))
				{
					result.vertexes.push(neighbor);
					result.edges.push(edge);
					neighbor.setData(VISITED_KEY, true);
					//result.longestPath.length

					var nextResult = _doTravers(neighbor);
					result.vertexes = result.vertexes.concat(nextResult.vertexes);
					result.edges = result.edges.concat(nextResult.edges);
					if (nextResult.longestPath.length > result.longestPath.length || !result.longestPath.length)
					{
						result.longestPath.vertexes = nextResult.longestPath.vertexes;
						result.longestPath.edges = nextResult.longestPath.edges;
						result.longestPath.length = nextResult.longestPath.length;
						longestTargetNeighborEdge = edge;
					}
				}
			}

			if (longestTargetNeighborEdge)
			{
				result.longestPath.edges.unshift(longestTargetNeighborEdge);
				result.longestPath.length = (result.longestPath.length || 0) + 1;
			}
			result.longestPath.vertexes.unshift(vertex);

			return result;
		};

		var remainingVertexes = AU.clone(graph.getVertexes());
		// init
		for (var i = 0, l = remainingVertexes.length; i < l; ++i)
		{
			remainingVertexes[i].setData(VISITED_KEY, false);
		}
		var result = [];
		while (remainingVertexes.length)
		{
			var currVertex;
			if (startingVertex && (remainingVertexes.indexOf(startingVertex) >= 0))
				currVertex = startingVertex;
			else
				currVertex = remainingVertexes[0];
			//while (remainingVertexes.length)
			{
				var seq = {
					vertexes: [],
					edges: [],
					longestPath: {vertexes: [], edges: [], length: 0}
				};
				var partialResult = _doTravers(currVertex);
				seq.vertexes = seq.vertexes.concat(partialResult.vertexes);
				seq.edges = seq.edges.concat(partialResult.edges);
				seq.longestPath.length = (seq.longestPath.length || 0) + partialResult.longestPath.length;
				seq.longestPath.vertexes = seq.longestPath.vertexes.concat(partialResult.longestPath.vertexes);
				seq.longestPath.edges = seq.longestPath.edges.concat(partialResult.longestPath.edges);
				remainingVertexes = AU.exclude(remainingVertexes, partialResult.vertexes);
			}
			result.push(seq);
		}
		return result;
	}
};


/**
 * Writer for saving molecule to SMILES format text data.
 * Use smilesMolWriter.writeData(mol) to save molecule to SMILES text.
 * @class
 * @augments Kekule.IO.ChemDataWriter
 */
Kekule.IO.SmilesMolWriter = Class.create(Kekule.IO.ChemDataWriter,
/** @lends Kekule.IO.SmilesMolWriter# */
{
	/** @private */
	CLASS_NAME: 'Kekule.IO.SmilesMolWriter',
	/** @private */
	doWriteData: function(obj, dataType, format, options)
	{
		/*
		 if (dataType != Kekule.IO.ChemDataType.TEXT) // can not understand data other than text
		 {
		 Kekule.error(Kekule.ErrorMsg.MDL_OUTPUT_DATATYPE_NOT_TEXT);
		 return null;
		 }
		 else
		 */
		{
			/*
			var writer = new Kekule.IO.MdlMoleculeWriter(this.getMdlVersion(), this.getCoordMode());
			return writer.writeBlock(obj);
			*/
			var mol = this.getMolecule(obj);
			return mol? this.writeStructFragment(mol, options): '';
		}
	},
	/**
	 * Get molecule data from an object.
	 * @param {Variant} obj
	 */
	getMolecule: function(obj)
	{
		return Kekule.ChemStructureUtils.getTotalStructFragment(obj);
	},

	/** @private */
	writeStructFragment: function(mol, options)
	{
		if (mol.hasCtab() && !mol.isEmpty())
		{
			// standardize molecule and mark the aromatic rings
			var dupMol = mol.clone();
			//var aromaticRings = dupMol.perceiveAromaticRings();
			if (dupMol.standardize)
			{
				dupMol.standardize();
			}
			var aromaticNodes = [];
			var aromaticConnectors = [];
			if (dupMol.getAromaticRings)
			{
				var aromaticRings = dupMol.getAromaticRings();
				for (var i = 0, l = aromaticRings.length; i < l; ++i)
				{
					var ring = aromaticRings[i];
					AU.pushUnique(aromaticNodes, ring.nodes);
					AU.pushUnique(aromaticConnectors, ring.connectors);
				}
			}

			var molGraph = Kekule.GraphAdaptUtils.ctabToGraph(dupMol.getCtab(), null, {expandSubStructures: true, ignoreBondedHydrogen: true});
			// get the longest path as the main chain, starting from the last vertex
			var graphVertexes = molGraph.getVertexes();
			var graphEdges = molGraph.getEdges();
			var startingVertex = graphVertexes[graphVertexes.length - 1];
			var depthSpanningTrees = Kekule.IO.SmilesUtils.createGraphDepthSpanningTreesEx(molGraph, startingVertex);

			// mark all the ring edges
			//var ringEdges = AU.exclude(graphEdges, depthSpanningTree.edges);


			var ringEdgeRepo = [];
			var result;
			for (var i = 0, l = depthSpanningTrees.length; i < l; ++i)
			{
				var depthSpanningTree = depthSpanningTrees[i];
				var mainChainPath = depthSpanningTree.longestPath;
				//console.log(mainChainPath);
				var partResult = '';
				// enumeration all vertexes and connectors
				//var remainingVertexes = AU.clone(depthSpanningTree.vertexes);
				//var remainingEdges = AU.clone(depthSpanningTree.edges);
				var currVertex = mainChainPath.vertexes[0]; //startingVertex;
				partResult = this._writeMolVertex(currVertex, null, mainChainPath.edges, depthSpanningTree.edges, ringEdgeRepo, aromaticNodes, aromaticConnectors, options);

				if (i === 0)
					result = partResult;
				else
					result += SMI.BOND_FAKE + partResult;
			}
			return result;
		}
		else
			return '';
	},
	/** @private */
	_writeMolVertex: function(vertex, prevEdge, mainChainEdges, spanningTreeEdges, ringEdgeRepo, aromaticNodes, aromaticRingConnectors, options)
	{
		var node = vertex.getData('object');
		var edges = vertex.getEdges();
		//var edges = AU.intersect(vertex.getEdges(), spanningTreeEdges);
		var mainChainStr;
		var branchStrs = [];
		var mainChainVertex;
		var branchVertexes = [];
		var ringStrs = [];
		var ringedVertexes = [];
		var connectorParity;
		var nextBondStereoStr = '';
		var ignoreBondStereo = options.ignoreStereoBond;
		if (Kekule.ObjUtils.isUnset(ignoreBondStereo))
			ignoreBondStereo = options.ignoreStereo || false;
		for (var i = edges.length - 1; i >= 0; --i)
		{
			var edge = edges[i];
			if (edge === prevEdge)
				continue;

			var connector = edge.getData('object');
			if (!ignoreBondStereo && connector.getParity && connector.getParity())  // next is stereo double bond
			{
				var keyNodes = Kekule.MolStereoUtils.getStereoBondKeyNodes(connector);
				if (keyNodes)
				{
					var nextVertex = vertex.getNeighborOnEdge(edge);
					var nextNode = nextVertex.getData('object');
					var initDirSymbols = this._getStereoDoubleBondInitialDirectionSymbols(connector);
					var dirSymbol = initDirSymbols[0];
					if (keyNodes.indexOf(nextNode) < 0)
						dirSymbol = this._getInvertBondDirectionSymbol(dirSymbol);
				}
				nextBondStereoStr = dirSymbol;
			}

			if (spanningTreeEdges.indexOf(edge) >= 0)  // edge on spanning tree, not ring edge
			{
				var nextVertex = vertex.getNeighborOnEdge(edge);
				var str = this._writeMolVertex(nextVertex, edge, mainChainEdges, spanningTreeEdges, ringEdgeRepo, aromaticNodes, aromaticRingConnectors, options);
				if (mainChainEdges.indexOf(edge) >= 0)  // edge on main chain
				{
					mainChainVertex = nextVertex;
					mainChainStr = str;
				}
				else
				{
					branchVertexes.push(nextVertex);
					branchStrs.push(str);
				}
			}
			else  // ring edge
			{
				// TODO: currently more than 99 ring edges and ring number reuse are not considered
				var ringEdgeIndex = ringEdgeRepo.indexOf(edge);
				if (ringEdgeIndex < 0)  // not registered
				{
					ringEdgeIndex = ringEdgeRepo.length;
					ringEdgeRepo.push(edge);
				}
				ringedVertexes.push(vertex.getNeighborOnEdge(edge));
				var ringStr = this._outputConnectorStr(connector,
					edge.getVertexes()[0].getData('object'), edge.getVertexes()[1].getData('object'), aromaticNodes, aromaticRingConnectors);
				ringEdgeIndex = ringEdgeIndex + 1;  // avoid index 0
				ringStr += (ringEdgeIndex < 10)? ringEdgeIndex: SMI.RING_BOND_TWO_DIGIT_NO_PREFIX + ringEdgeIndex;
				ringStrs.push(ringStr);
			}
		}

		// form result string
		var result = '';
		var prevNode;
		var prevBondStereoStr = '';
		if (prevEdge)
		{
			if (nextBondStereoStr)
				result += nextBondStereoStr;

			var prevVertex = vertex.getNeighborOnEdge(prevEdge);
			prevNode = prevVertex.getData('object');
			var prevConnector = prevEdge.getData('object');

			if (!ignoreBondStereo && prevConnector.getParity && prevConnector.getParity())  // curr vertex is the end vertex of stereo bond
			{
				var keyNodes = Kekule.MolStereoUtils.getStereoBondKeyNodes(prevConnector);
				if (keyNodes)
				{
					var initDirSymbols = this._getStereoDoubleBondInitialDirectionSymbols(prevConnector);
					var dirSymbol = initDirSymbols[1];
					if (keyNodes.indexOf(prevNode) < 0)
						dirSymbol = this._getInvertBondDirectionSymbol(dirSymbol);
				}
				prevBondStereoStr += dirSymbol;
			}

			result += this._outputConnectorStr(prevConnector,
				prevEdge.getVertexes()[0].getData('object'), prevEdge.getVertexes()[1].getData('object'), aromaticNodes, aromaticRingConnectors); // + result;
		}
		var nextNodes = [];
		for (var i = 0, l = ringedVertexes.length; i < l; ++i)
		{
			nextNodes.push(ringedVertexes[i].getData('object'));
		}
		for (var i = 0, l = branchVertexes.length; i < l; ++i)
		{
			nextNodes.push(branchVertexes[i].getData('object'));
		}
		if (mainChainVertex)
			nextNodes.push(mainChainVertex.getData('object'));
		result += this._outputNodeStr(node, aromaticNodes.indexOf(node) >= 0, prevNode, nextNodes, options);

		for (var i = 0, l = ringStrs.length; i < l; ++i)
		{
			result += ringStrs[i];
		}
		if (!mainChainStr && branchStrs.length)  // vertex not on main chain, regard the first branch as sub-main chain
			mainChainStr = branchStrs.shift();
		for (var i = 0, l = branchStrs.length; i < l; ++i)
		{
			result += SMI.BRANCH_BRACKET_LEFT + branchStrs[i] + SMI.BRANCH_BRACKET_RIGHT;
		}
		result += prevBondStereoStr;
		if (mainChainStr)
			result += mainChainStr;
		return result;
	},
	/** @private */
	_outputNodeStr: function(node, isAromatic, prevNode, nextNodes, options)
	{
		var result;
		var symbol;
		// symbol
		if (node instanceof Kekule.Atom)
		{
			symbol = node.getSymbol();
			result = symbol;
			if (isAromatic)
				result = result.toLowerCase();
		}
		else
		{
			result = SMI.ATOM_WILDCARD;
		}

		// chiral?
		var ignoreAtomStereo = options.ignoreStereoAtom;
		if (Kekule.ObjUtils.isUnset(ignoreAtomStereo))
			ignoreAtomStereo = options.ignoreStereo || false;
		var schiralRot;
		if (!ignoreAtomStereo && node.getParity && node.getParity())
		{
			// calc rotation direction
			if (nextNodes && nextNodes.length)
			{
				// check if there is a bonded H atom, as it may affects the stereo and are ignored in vertex graph
				var bondedHAtoms = node.getLinkedHydrogenAtoms();
				if (bondedHAtoms && bondedHAtoms.length === 1 && nextNodes.indexOf(bondedHAtoms[0]) < 0)
				{
					nextNodes.push(bondedHAtoms[0]);
				}
				//var hcount = node.getHydrogenCount? (node.getHydrogenCount(true) || 0): 0;  // calc bonded Hs, as they are excluded from graph
				var hcount = node.getHydrogenCount? (node.getHydrogenCount(false) || 0): 0;  // calc implicit Hs, as they are excluded from graph

				if (bondedHAtoms.length && hcount)  // has both implicit and explicit H, this should not be a chiral center
					schiralRot = '';
				else
				{

					// looking from prev node, calc rotation of nextNodes, if implicit H exists, it should be considered as first or last next node (result are same)
					var dir = Kekule.MolStereoUtils.calcTetrahedronChiralCenterRotationDirection(null, node, prevNode, nextNodes, !!hcount, false, {allowExplicitVerticalHydrogen: true});
					var schiralRot = (dir === Kekule.RotationDir.CLOCKWISE) ? SMI.ROTATION_DIR_CLOCKWISE :
							(dir === Kekule.RotationDir.ANTICLOCKWISE) ? SMI.ROTATION_DIR_ANTICLOCKWISE :
									'';
				}
				if (schiralRot)
				{
					result += schiralRot;
				}
			}
		}

		// hydrogen, show if explicit H count is set or non-C aromatic atom link with H
		var explicitHCount;
		if (schiralRot)  // if chiral center, H is always be listed
			explicitHCount = node.getHydrogenCount? (node.getHydrogenCount(true) || 0): 0;  // calc bonded Hs, as they are excluded from graph
		else
		{
			explicitHCount = node.getExplicitHydrogenCount ? node.getExplicitHydrogenCount() : 0;
			if (!explicitHCount && isAromatic && (symbol !== 'C') && node.getImplicitHydrogenCount)
			{
				explicitHCount = node.getImplicitHydrogenCount();
			}
		}
		// write explicit H count after chiral
		if (explicitHCount)
		{
			result += SMI.ATOM_H;
			var hcount = Math.round(explicitHCount);
			if (hcount > 1)
				result += hcount;
		}

		// charge
		var charge = Math.round(node.getCharge());
		if (charge)
		{
			var chargeStr = (charge > 0)? '+': '-';
			if (charge > 1 || charge < -1)
				chargeStr = Math.abs(charge) + chargeStr;
			result += chargeStr;
		}

		// isotope
		var massNum = node.getMassNumber? node.getMassNumber(): null;
		if (massNum)
			result = Math.abs(massNum) + result;

		var simpleOrgAtom = false;
		if (!explicitHCount && !charge && !massNum && !schiralRot)  // no special property is set
		{
			if ((!isAromatic &&SMI.ORGAN_SUBSET_ATOMS.indexOf(symbol) >= 0)
				|| (isAromatic && SMI.AROMATIC_SUBSET_ATOMS.indexOf(symbol) >= 0))
			{
				simpleOrgAtom = true;
			}
		}
		if (!simpleOrgAtom)
			result = SMI.ATOM_BRACKET_LEFT + result + SMI.ATOM_BRACKET_RIGHT;
		return result;
	},
	/** @private */
	_outputConnectorStr: function(connector, connectedNode1, connectedNode2, aromaticNodes, aromaticConnectors)
	{
		if (connector instanceof Kekule.Bond)
		{
			if (connector.getBondType() === Kekule.BondType.COVALENT)
			{
				if (aromaticConnectors.indexOf(connector) >= 0)   // connector on aromatic ring
					return '';
				else
				{
				  var connectBothAromaticNodes = (aromaticNodes.indexOf(connectedNode1) >= 0) && (aromaticNodes.indexOf(connectedNode2) >= 0);
					var bondOrder = connector.getBondOrder();
					var BO = Kekule.BondOrder;
					return (bondOrder === BO.DOUBLE)? SMI.BOND_DOUBLE:
						(bondOrder === BO.TRIPLE)? SMI.BOND_TRIPLE:
						(bondOrder === BO.QUAD)? SMI.BOND_QUAD:
						connectBothAromaticNodes? SMI.BOND_SINGLE: '';  // default, single, no need to add bond string
					// TODO: bond stereo
				}
			}
		}
		// default
		return SMI.BOND_FAKE;
	},
	/** @private */
	_getStereoDoubleBondInitialDirectionSymbols: function(bond)
	{
		var parity = bond.getParity();
		if (parity === Kekule.StereoParity.EVEN)
			return [SMI.DIRECTION_BOND_SYMBOLS[0], SMI.DIRECTION_BOND_SYMBOLS[0]];
		else
			return [SMI.DIRECTION_BOND_SYMBOLS[0], SMI.DIRECTION_BOND_SYMBOLS[1]];
	},
	/** @private */
	_getInvertBondDirectionSymbol: function(dirSymbol)
	{
		if (dirSymbol === SMI.DIRECTION_BOND_SYMBOLS[1])
			return SMI.DIRECTION_BOND_SYMBOLS[0];
		else
			return SMI.DIRECTION_BOND_SYMBOLS[1];
	}
});

// extents mime type consts
Kekule.IO.MimeType.SMILES = 'chemical/x-daylight-smiles';

// register chem data formats
Kekule.IO.DataFormat.SMILES = 'smi';
var smilesFmtId = 'smi';

Kekule.IO.DataFormatsManager.register(Kekule.IO.DataFormat.SMILES, Kekule.IO.MimeType.SMILES, ['smi', 'smiles'],
	Kekule.IO.ChemDataType.TEXT, 'SMILES format');

var suitableClasses = [Kekule.StructureFragment, Kekule.ChemObjList, Kekule.ChemStructureObjectGroup, Kekule.ChemSpaceElement, Kekule.ChemSpace];
Kekule.IO.ChemDataWriterManager.register('SMILES', Kekule.IO.SmilesMolWriter,
	suitableClasses,
	[Kekule.IO.DataFormat.SMILES]);
})();