// SFM has 2 types of tokens: single markers, and paired markers (open and close)
//
// Single markers supported:
//    Single marker
//      \id JHN -- Description      3 char identifier of book
//      \h  header
//      \toc1,2,3   table of contents
//      \mt     marks title (ignore for Stuart's as he also uses \h with redundant content)
//      \c      chapter
//      \s1     section heading (not in the original text)
//      \r      cross references for a section (appear as a subheading)
//      \ms     major section -- marks "Book" headings in the Psalms
//      \d      'superscription in the Psalms'
//      \p \pi \m \mi \li \li1 \q \q1 \q2 \b    para pInset, margin mInset, list lInset,
//              poetry + inset & double inset, blank
//
//    Double markers (open with \x, close with \x*)
//
//    Sections
//      \v  verse number e.g. 5:3
//      \va [5:3-7b] \va*   verse number with complex structure
//      \f  footnote  \f*   can contain open ended parts, closed by \f*
//          \fr footnote reference  \ft footnote text (can contain \add or \sls)
//      \add added text \add*
//      \bk book title \bk* format as a book title => italics (same as add)
//
//    Font Style
//      \bd \it \bdit  bold, italic, both
//
// code detects improper ordering of markers as well as unknown markers

// (1) fetch all lines and convert to single string data, changing tag indicator from \ to !~
// (2) process data into paragraphs
// (3) convert paragraphs into HTML and append to output div
// (4) track div's so as to enable jumping to a verse
// (5) FUTURE: allow starting not at top of book or chapter, and track what div holds
//     so that can prepend on reverse scroll, append on forward scroll
//     I.e. each Chapter and verse div gets an ID to allow element.ScrollIntoView
//     If not yet rendered, generate new html
// (6) FUTURE: cache converted html to allow quick hopping to
//     previously rendered paragraphs

// following maps all sfm tags to broad processing types p, b, v, c, f
const sfmTypes = {
	p: "p",
	b: "b",
	m: "p",
	mi: "p",
	li: "p",
	li1: "p",
	li2: "p",
	q: "p",
	q1: "p",
	q2: "p",
	ms: "p",
	ms1: "p",
	h: "p",
	c: "c",
	pi: "p",
	mt: "x",
	id: "x",
	toc1: "x",
	toc2: "x",
	toc3: "x",
	s1: "p",
	s: "p",
	r: "p",
	d: "p",
	v: "v",
	va: "v",
	bd: "sc",
	it: "sc",
	bdit: "sc",
	f: "f",
	add: "embed",
	sls: "embed",
	bk: "embed", // style changing embed
	fig: "fig"
};
var chapter = 0;
var newChap = false;

function sfmToHtml(data) {
	const regex = /\\/g;
	var buffer1 = data.replaceAll(regex, "!~"); // change markers to leading !~
	var buffer = buffer1.replaceAll("\n", " "); // remove line markers
	var index = buffer.indexOf("!~"); // process starting at first tag
	var length = buffer.length;
	var result = "";
	var nextTag = null;
	var nPara = 0;
	console.log("starting sfmToHtml... " + data.length);

	if (data.length < 2000 && buffer.indexOf("<html") >= 0) {
		console.log("invalid file or invalid server response");
		return "Error, invalid file or server response";
	}

	while (index < length && nPara < 10000) {
		var partial = null;
		[nextTag, index, partial] = sfmParaToHtml(buffer, index, nextTag);
		// if (nPara < 10) console.log("partial "+partial);
		result += partial;
		nPara += 1;
	}
	//console.log("nPara "+nPara);
	//console.log("total length "+result.length);
	return result;
}
function sfmParaToHtml(source, start, startTag) {
	// convert to HTML and return as <div>...</div>
	var index = start; // tracks next character to process
	var endpoint = source.length;
	if (startTag === null || startTag.length === 0) startTag = getTag(source, index);
	while (sfmTypes[startTag] === "x")
		[startTag, index] = getNextTag(source, index + 2);
	var tag = startTag;
	var [nextTag, nextIndex] = getNextTag(source, index + tag.length + 2);
	// if (index < 120) {
	//     console.log("processing "+index+" "+tag);
	//     console.log("initial nextTag "+nextTag);
	//     console.log("newChap "+newChap);
	// }
	var result = "";
	var results = "";
	//var counter = 0;    // prevents infinite loops processing one paragraph
	while (nextIndex < endpoint) {
		// process from 'tag' to 'nextTag' or end of data
		if (nextIndex > index) {
			var skipLength = tag ? tag.length + 2 : 0;
			results += source.slice(index + skipLength, nextIndex).trim(); // capture text up to next marker
			// if (index < 120) console.log(results);
		}
		const sfmType = sfmTypes[nextTag];
		// console.log("nextTag "+nextTag+" type "+sfmType);
		switch (sfmType) {
			case "x": // next tag type x is treated like p as far as processing goes
			case "p": // a new paragraph starts next, so return current results to caller
				var divType = getDivType(startTag);
				// if (divType !== "p") console.log("divType "+divType+" at "+chapter);
				// if (results.length <= 0) console.log("no content for startTag "+startTag+" at "+nextIndex);
				// suppress content for \c followed by an x or p type:
				let partial = "";
				if (startTag === "c") {
					newChap = true;
					chapter = results; // capture chapter number for later
				} else {
					partial =
						"<" +
						divType +
						" class='sfm-" +
						startTag +
						"'>" +
						results +
						"</" +
						divType +
						">";
				}
				return [nextTag, nextIndex, partial];
			case "b":
				result = "<br/>";
				nextTag = null;
				nextIndex += 3; // including following space
				break;
			case "v":
				//var temp = nextIndex;
				[nextTag, nextIndex, result] = parseVerse(source, nextIndex, nextTag);
				//console.log("verse absorbed from "+temp+" to "+nextIndex);
				break;
			case "sc":
				[nextTag, nextIndex, result] = parseStyledChars(
					source,
					nextIndex,
					nextTag
				);
				break;
			case "c":
				var snippet = source.slice(nextIndex + 4, nextIndex + 7); // up to chapter 99
				chapter = snippet.match(/[0-9]+/);
				//console.log("chapter "+chapter);
				result = ""; //"<span class='sfm-c' id='c-"+chapter+"'/>";
				newChap = true; // text generated at next verse
				nextIndex += 3 + chapter.length;
				break;
			case "f":
				[nextTag, nextIndex, result] = parseFootnote(
					source,
					nextIndex,
					nextTag
				);
				break;
			case "embed":
				[nextTag, nextIndex, result] = parseEmbed(source, nextIndex, nextTag);
				break;
			case "fig":
				[nextTag, nextIndex, result] = parseFigure(source, nextIndex, nextTag);
				break;
			default:
				result = "unknown tag type " + nextTag + " at " + nextIndex;
				console.log(result);
				nextIndex = endpoint;
				return [null, endpoint, null];
			// correct the index and keep going?  write a routine to absorb unknown?
		}
		results += result; // still inside current paragraph
		tag = nextTag;
		index = nextIndex;
		if (index < endpoint) [nextTag, nextIndex] = getNextTag(source, index); // next tag beyond index
		//counter += 1;
	}
	//console.log("at end of data "+index+" "+nextIndex);
	if (nextIndex > index) {
		skipLength = tag ? tag.length + 2 : 0;
		results += source.slice(index + skipLength, nextIndex).trim(); // capture text up to next marker
		//console.log(results);
	}
	divType = getDivType(startTag);
	return [null, nextIndex, "<" + divType + ">" + results + "</" + divType + ">"];
}

function getTag(source, index) {
	// get tag at current position
	if (index > source.length - 2) return null;
	const buff = source.slice(index, index + 6); // tags are no longer than 6 : \add* => !~add*
	const regex = RegExp(/!~[a-z]+[0-3]?[*]?/); // regex to detect conforming tag
	var tag = regex.exec(buff);
	//console.log("got tag "+tag[0]);
	var bareTag = tag[0].slice(2);
	//console.log("bare tag "+bareTag);
	return bareTag; // removed leading marker
}

function getNextTag(source, index) {
	if (index > source.length - 2) return [source.length, null];
	const next = source.indexOf("!~", index);
	//console.log("getNextTag next "+next);
	if (next < 0) return [null, source.length];
	return [getTag(source, next), next];
}

function getDivType(tag) {
	var type = sfmTypes[tag];
	if (type === "p") return "p";
	else if (type === "b") return "br";
	else return "span";
}

function parseVerse(buffer, index, tag) {
	//console.log("parseVerse at "+index+", tag "+tag);
	var prefix = newChap ? "<span class='sfm-c'>" + chapter + "</span>" : "";
	newChap = false;
	if ("v" === tag) {
		// text should be just a verse number following
		var snippet = buffer.slice(index + 4, index + 11); // up to verse 999
		//console.log("verse snippet "+snippet);
		var verse = snippet.match(/[0-9]+/);
		var verse2 = snippet.match(/[0-9]+-[0-9]+/); // range of verse numbers
		if (verse2) verse = verse2;
		var id = chapter + ":" + verse[0];
		var result =
			prefix + "<span class='sfm-verse' id='" + id + "'>" + verse[0] + "</span>";
		// console.log(id);
		return [null, index + verse[0].length + 4, result];
	} else if ("va" === tag) {
		// can now have a range of verse numbers & part letters
		var [nextTag, next] = getNextTag(buffer, index + 2);
		if ("va*" === nextTag) {
			snippet = buffer.slice(index + 5, next).trim();
			//console.log("va special "+snippet);
			var matched = snippet.match(/\[[0-9]+[a-f]?\]/);
			// if (matched) {
			//     console.log("matched "+matched.length+" "+matched.length>1? matched[0] : "");
			// }
			//var matched2 = snippet.match(/\[[0-9]+[a-f]?[-,][0-9]*[a-f]?\]/);
			// if (matched2) {
			//     matched = matched2;
			//     console.log(matched2);
			// }
			if (matched) verse = matched[0];
			else {
				//console.log("bad va? "+snippet);
				verse = snippet;
			}
			result = prefix + "<span class='sfm-verse'>" + verse + "</span>";
			return [null, next + 5, result];
		} else {
			console.log("!~va* not found at " + index);
			return [null, buffer.length, "va* not found"];
		}
	} else {
		console.log("Error: unknown tag in parseVerse " + tag);
	}
}

// following could be made recursive to handle nesting of bold and italic
// could also be folded into 'embed' type
function parseStyledChars(buffer, index, tag) {
	var [nextTag, next] = getNextTag(buffer, index + 2); // find closing tag
	if (tag + "*" !== nextTag)
		console.log("Error " + tag + " mis-paired with " + nextTag);
	var snippet = buffer.slice(index + tag.length + 2, next).trim();
	var styled = null;
	if ("bd" === tag) styled = "<b>" + snippet + "</b>";
	else if ("it" === tag) styled = "<i>" + snippet + "</i>";
	else if ("bdit" === tag) styled = "<b><i>" + snippet + "</i></b>";
	else console.log("bad tag to parseStyledChars");
	return [null, next + nextTag.length + 2, styled];
}

function parseFootnote(buffer, index, tag) {
	// allowed internal tags: \fr footnote reference, \ft footnote text
	// footnote is converted to popup with fr as header, ft as text
	// Stuart always as fn as follows:
	//   \f + \fr footnote-reference \ft footnote-text\f*
	// Note: \add ... \add* or \sls ... \sls* can occur within the footnote-text
	//console.log("inside parseFootnote "+buffer.slice(index,index+11));
	if (buffer.slice(index, index + 11) === "!~f + !~fr ") {
		var [nextTag, next] = getNextTag(buffer, index + 11); // get end of \fr
		//var reference = buffer.slice(index+11,next).trim();
		//console.log("footnote "+nextTag);
		if (nextTag === "ft") {
			var [nextTag2, next2] = getNextTag(buffer, next + 4);
			var text = buffer.slice(next + 4, next2).trim();
			if (nextTag2 === "add" || nextTag2 === "sls") {
				//console.log(nextTag2 + " embedded in ft, after text="+text);
				let embed = "";
				let next3 = 0;
				[, next3, embed] = parseEmbed(buffer, next2, nextTag2);
				text += " " + embed + " ";
				//console.log("after embed:"+text);
				[nextTag2, next2] = getNextTag(buffer, next3);
				text += buffer.slice(next3, next2).trim();
				//console.log("with everything:"+text);
				//console.log(" and nextTag2="+nextTag2);
			}
			if (nextTag2 === "f*") {
				// everything is good
				let html =
					"<span class='sfm-footnote'>*<span class='tooltiptext'>" +
					text +
					"</span></span>";
				// console.log("html:"+html);
				return [null, next2 + 4, html];
				//    "<footnote ref='"+reference+"'>"+text+"</footnote>"]
			} else {
				console.log("did NOT get \\f* !!!!!!!!!!!!!!!");
			}
		}
	}
	return [null, buffer.length, "<div>bad footnote at" + index + "</div>"];
}

function parseEmbed(buffer, index, tag) {
	var [nextTag, next] = getNextTag(buffer, index + 2); // find closing tag
	var snippet = buffer.slice(index + tag.length + 2, next);
	if (tag + "*" !== nextTag) {
		// try to deal with some recursive embedding...
		if (nextTag === "f") {
			var [, next2, footnote] = parseFootnote(buffer, next, nextTag);
			console.log("footnote in Embed " + footnote);
			snippet += footnote;
			console.log("snippet " + snippet);
			[nextTag, next] = getNextTag(buffer, next2);
			console.log("subsequent tag " + nextTag);
		}
		if (tag + "*" !== nextTag)
			console.log("Error " + tag + " mis-paired with " + nextTag);
	}
	return [
		null,
		next + nextTag.length + 2,
		"<span class='sfm-" + tag + "'>" + snippet + "</span> "
	];
}

function parseFigure(buffer, index, tag) {
	var [nextTag, next] = getNextTag(buffer, index + 2); // find closing tag
	if (tag + "*" !== nextTag)
		console.log("Error " + tag + " mis-paired with " + nextTag);
	// var snippet = buffer.slice(index+tag.length+2,next); /* 7 '|' separated fields
	var snippet = "";
	return [
		null,
		next + nextTag.length + 2,
		"<span class='sfm-fig'>" + snippet + "</span> "
	];
}

export default sfmToHtml;
