import arsd.cgi; void newmain(Cgi cgi) { cgi.write("This project is now dead since Vladimir Panteleev's program did the goal better and in less time.


If you really want it, contact me and maybe we can work something out. destructionator@gmail.com will reach me."); } mixin GenericMain!newmain; version(none): /* A newsreader and poster for the web Use last checked time to mark new things in web interface use html5 history to change url as you scroll. TODO: Bold/italic/underline from ascii BBCode on input from web Walter Bright: 1. Can use web interface or nntp interface 2. web interface looks sort of like reddit, i.e. all posts on a thread 3. users can post anonymously 4. web interfaces supports logins - logged in users can vote up or down on posts 5. web interface can mark posts as read or unread - fixing my beef with reddit that there's no reasonable way to scan a thread for new posts 6. an easy way for moderators to delete spam 7. runs on 64 bit FreeBSD (what the Digital Mars server runs on), yes, I know that means I have to get 64 bit dmd on FreeBSD working! Andrej Mitrovic: 8. Search functionality digitalmars uses google for searching the NG archive, but I've no idea how to do custom searches. I.e. I'd like to search for a keyword in the topic title only, how would I do that? */ import arsd.web; import arsd.mysql; import arsd.htmltotext; import arsd.bbcode; import std.exception; import std.random; import std.uri; import std.date; import std.base64; static import std.regex; static import std.algorithm; import helpers; // FIXME great name! enum string sqlSource = ` CREATE TABLE posts ( -- all of these are Message-ID values -- FIXME: this is liable to be sloooooow as the db grows messageId VARCHAR(60) PRIMARY KEY, inReplyTo VARCHAR(60), threadRoot VARCHAR(60), -- the numeric identifier, if we know it articleId INTEGER, datePosted BIGINT NOT NULL, newsgroup VARCHAR(40) NOT NULL, author VARCHAR(80) NOT NULL, subject VARCHAR(120) NOT NULL, message TEXT ) ENGINE=InnoDB DEFAULT CHARSET=utf8; CREATE TABLE assorted_data ( id INTEGER AUTO_INCREMENT, name VARCHAR(80), value VARCHAR(80), PRIMARY KEY(id) ) DEFAULT CHARSET=utf8; `; alias DataObjectFromSqlCreateTable!(sqlSource, "posts") PostBase; class Post : PostBase { Newsreader newsreader; this(Database db, Newsreader newsreader) { super(db); this.newsreader = newsreader; } override string toString() { return subject ~ " by " ~ author; } Element makeHtmlElement(Document document = null) { if(document is null) document = new Document(); auto container = document.createElement("div"); container.appendChild( new Link("thread-index?newsgroup=" ~ this.newsgroup, this.newsgroup)); if(this.threadRoot.length) { container.addChild("br"); container.appendText("Thread: "); container.appendChild( linkToPost( newsreader.getMessage(this.newsgroup, this.threadRoot) )); } if(this.inReplyTo.length && this.threadRoot != this.inReplyTo) { container.addChild("br"); container.appendText("In reply to: "); container.appendChild( linkToPost( newsreader.getMessage(this.newsgroup, this.inReplyTo) )); } auto originalHolder = document.createElement("div"); originalHolder.setAttribute("class", "originalHolder"); container.appendChild(originalHolder); auto linkToThis = linkToPost(this); auto viewMode = getUserPreference(newsreader.cgi, "view-mode", "parsed"); if(viewMode == "original") { originalHolder.appendChild(new Link("switch-view-preference?positional-arg-0=parsed", "View parsed")); } else { originalHolder.appendChild(new Link("switch-view-preference?positional-arg-0=original", "View original")); } container.addChild("h2", this.subject); container.addChild("p", beautifyName(this.author)).setAttribute("class", "author").setAttribute("title", this.author); Element htmlElement; if(viewMode == "original") container.addChild("pre", this.message); else { htmlElement = messageToHtmlElement(document, this.message); container.appendChild(htmlElement); } //foreach(paragraph; this.message().split("\n\n")) { // container.addChild("p", paragraph); //} //container.addChild("pre", this.message); // also check for any children bool outputted = false; foreach(post; newsreader.getReplies(this.messageId)) { if(outputted) container.appendChild(document.createElement("br")); else outputted = true; container.appendChild(linkToPost(post, "Reply by " ~ post.author)); } auto replyForm = cast(Form) container.appendChild( createAutomaticForm(document, newsreader.reflection.functions["post-message"], [ "threadRoot" : "hidden", "inReplyTo" : "hidden", "message" : "textarea-12", "newsgroup" : "hidden" ])); replyForm.setValue("threadRoot", this.threadRoot.length ? this.threadRoot : this.messageId); replyForm.setValue("inReplyTo", this.messageId); replyForm.setValue("newsgroup", this.newsgroup); replyForm.setValue("subject", this.subject().indexOf("Re:") == -1 ? "Re: " ~ this.subject() : this.subject()); replyForm.setValue("from", getUserPreference(newsreader.cgi, "from")); replyForm.setValue("email", getUserPreference(newsreader.cgi, "email")); if(htmlElement is null || getUserPreference(newsreader.cgi, "reply-format") == "plain" || ( getUserPreference(newsreader.cgi, "reply-format") is null && getUserPreference(newsreader.cgi, "view-mode") == "original" )) { replyForm.setValue("message", beautifyName(this.author) ~ " wrote:\n> " ~ replace(this.message, "\n", "\n> ")); } else { replyForm.setValue("message", `[quote="`~beautifyName(this.author)~`"]` ~ htmlToBbCode(htmlElement) ~ `[/quote]`); replyForm.setValue("allowBbCode", "1"); } return container; } } // If a message is requested and it isn't in our database, we can fetch // it from the actual server. // If we post, we should add it to the database ourselves class Newsreader : ApiProvider { static Database db; override void _initialize() { db = new MySql("localhost", "d_site", "s4$#dsf", "d_site"); } override Element _getGenericContainer() { auto document = new Document(import("d-programming-language.org.htm"), true, true); return document.getElementById("content").addClass("newsgroup-viewer"); } Post getMessage(string newsgroup, string messageId) { newsgroup = sanitizeNewsgroupName(newsgroup); Post post; auto res = db.query("SELECT * FROM posts WHERE messageId = ?", messageId); if(res.empty) { // need to get it off the NNTP server post = getIndividualPostFromNewsServer(db, newsgroup, messageId, this); post.commitChanges(); // we want to store it locally for later } else { post = objectFromResult!(Post)(db, res.front, this); } return post; } void switchViewPreference(string preference) { setUserPreference(cgi, "view-mode", preference); redirect(cgi.referrer); } enum ThreadOrdering { ByParent, /** A tree view, from first to last */ ByParentDescending, /** Tree view, top level posts being newest first */ ByDate, /** Thread replies by date, first to last, ignoring In Reply To */ ByScore /** Top score posts first */ } struct Thread { string rootId; Post[] posts; ThreadOrdering orderedBy; } /*Thread*/ Document getThread(string newsgroup, string messageId, ThreadOrdering ordering) { sanitizeNewsgroupName(newsgroup); Post[] posts; final switch(ordering) { case ThreadOrdering.ByParent: case ThreadOrdering.ByParentDescending: auto root = getMessage(newsgroup, messageId); bool[string] alreadyGot; Post[] getTree(Post r) { Post[] ps; ps ~= r; alreadyGot[r.messageId] = true; auto replies = getReplies(r, true); foreach(post; replies) { if(post.messageId() !in alreadyGot) ps ~= getTree(post); else assert(0, "Cycle detected in reply tree"); } return ps; } if(ordering == ThreadOrdering.ByParent) posts = getTree(root); else { /* reverse order */ assert(0, "Not implemented"); } break; case ThreadOrdering.ByDate: posts = arrayFromResult!Post(db, db.query(" SELECT * FROM posts WHERE newsgroup = ? AND (messageId = ? OR threadRoot = ?) ORDER BY datePosted ", newsgroup, messageId, messageId), this); break; case ThreadOrdering.ByScore: assert(0, "Not implemented"); } assert(posts.length); Thread ret; ret.posts = posts; ret.rootId = posts[0].threadRoot; ret.orderedBy = ordering; return getThread_Page(ret); } Document getThread_Page(Thread thread) { auto container = _getGenericContainer(); auto document = container.parentDocument; assert(thread.posts.length); document.title = thread.posts[0].subject ~ " - " ~ document.title; container.appendChild( new Link("thread-index?newsgroup=" ~ thread.posts[0].newsgroup, thread.posts[0].newsgroup)); if(thread.orderedBy == ThreadOrdering.ByDate) document.mainBody.setAttribute("class", "full-thread-by-date"); else document.mainBody.setAttribute("class", "full-thread"); container.addChild("h1", thread.posts[0].subject); if("expanded" in cgi.get) { container.addChild("a", "Collapse All", cgi.scriptName ~ cgi.pathInfo ~ "?" ~ cgi.queryString.replace("&expanded", "")); } else { container.addChild("a", "Expand All", cgi.scriptName ~ cgi.pathInfo ~ "?" ~ cgi.queryString ~ "&expanded"); } auto threadContainer = document.createElement("div"); container.appendChild(threadContainer); threadContainer.setAttribute("class", "root thread-container"); string[] lastThreads = [null];//, thread.posts[0].messageId]; foreach(i, post; thread.posts) { // We don't do sub-threads if ordered by date if(thread.orderedBy != ThreadOrdering.ByDate) if(i && post.inReplyTo != lastThreads[$-1]) { // need to go up the tree... bool wentUp = false; foreach(idx, tid; lastThreads) { if(tid == post.inReplyTo) { wentUp = true; lastThreads = lastThreads[0 .. idx]; foreach(a; 0 .. idx + 1) if(threadContainer.parentNode is null) break; else threadContainer = threadContainer.parentNode; break; } } if(!wentUp) { lastThreads ~= post.inReplyTo; threadContainer = threadContainer.addChild("div", ""); threadContainer.setAttribute("class", "thread-container"); } } auto messageContainer = document.createElement("div"); messageContainer.setAttribute("class", "message"); threadContainer.appendChild(messageContainer); if("expanded" in cgi.get) { auto byline = document.createElement("h2"); byline.appendChild(linkToPost(post, beautifyName(post.author))); messageContainer.appendChild(byline); } else { auto link = linkToPost(post, ""); link.addChild("span", post.subject).addClass("subject"); link.appendText(" "); link.addChild("span", beautifyName(post.author)).addClass("author"); messageContainer.appendChild(link); } //messageContainer.addChild("p", post.messageId ~ " :: " ~ post.inReplyTo); if("expanded" in cgi.get) { if("original" in cgi.get) messageContainer.addChild("pre", post.message); else messageContainer.appendChild(messageToHtmlElement(document, post.message)); } } return document; } /*Post[]*/ Document threadIndex(string newsgroup) { return threadIndex_Page(arrayFromResult!(Post)(db, db.query(" SELECT messageId, subject, author, datePosted, newsgroup FROM posts WHERE threadRoot IS NULL AND newsgroup = ? ORDER BY datePosted DESC", sanitizeNewsgroupName(newsgroup)), this)); } Document threadIndex_Page(Post[] posts) { auto container = _getGenericContainer(); auto document = container.parentDocument; auto table = cast(Table) document.createElement("table"); container.appendChild(table); foreach(post; posts) { table.appendRow( linkToPost(post, post.subject), post.author, formatDate(post.datePosted), new Link("get-thread?newsgroup=" ~ post.newsgroup ~ "&messageId=" ~ std.uri.encodeComponent(post.messageId) ~ "&ordering=ByParent", "[Tree]"), new Link("get-thread?newsgroup=" ~ post.newsgroup ~ "&messageId=" ~ std.uri.encodeComponent(post.messageId) ~ "&ordering=ByDate", "[Linear]") ); } return document; } Post postMessage(string newsgroup, string from, string email, string subject, string message, string threadRoot, string inReplyTo, bool allowBbCode, bool preview, bool commitToNewsServer) { enforce(cgi.requestMethod == Cgi.RequestMethod.POST); sanitizeNewsgroupName(newsgroup); auto post = new Post(db, this); post.mode = UpdateOrInsertMode.AlwaysInsert; post.newsgroup = newsgroup; post.author = setUserPreference(cgi, "from", from) ~ " <" ~ setUserPreference(cgi, "email", email) ~ ">"; post.subject = subject; if(allowBbCode) post.message = bbCodeToText(message); else post.message = message; if(threadRoot.length) post.threadRoot = threadRoot; if(inReplyTo.length) post.inReplyTo = inReplyTo; post.datePosted = getUTCtime(); post.messageId = to!string(getUTCtime()) ~ "@localhost"; if(!preview && commitToNewsServer) { auto f = openNetwork("news.digitalmars.com", 119); f.readln(); // skip the hello line f.writeln("POST"); string res = f.readln(); // 340 Ok, recommended ID string check = "340 Ok, recommended ID "; enforce(res.length > check.length && res[0 .. check.length] == check); post.messageId = res[check.length .. $]; //use the recommended ID f.writeln(formatPostAsEmail(this, post)); f.writeln("."); // need to terminate the message res = f.readln(); enforce(res == "240 article posted ok", res); f.writeln("QUIT"); f.readln(); // it says goodbye too } if(!preview) { post.commitChanges(); redirect("get-message?newsgroup=" ~ sanitizeNewsgroupName(newsgroup) ~ "&messageId=" ~ std.uri.encodeComponent(post.messageId)); } return post; } Post[] getReplies(string messageId, bool includeExtendedData = false) { return arrayFromResult!(Post)(db, db.query(" SELECT " ~ ( includeExtendedData ? "*" : "author, messageId, newsgroup, subject" ) ~ " FROM posts WHERE inReplyTo = ?", messageId), this); } Post[] getReplies(Post message, bool includeExtendedData = false) { return getReplies(message.messageId, includeExtendedData); } void checkForNewPosts(string newsgroup) { newsgroup = sanitizeNewsgroupName(newsgroup); long lastChecked; auto result = db.query("SELECT value FROM assorted_data WHERE name = ?", "lastChecked-" ~ newsgroup); if(result.empty) { lastChecked = getUTCtime() - 1000 * 60 * 60 * 24; // get 1 day db.query("INSERT INTO assorted_data (name, value) VALUES (?, ?)", "lastChecked-" ~ newsgroup, getUTCtime()); } else { lastChecked = to!long(result.front[0]); db.query("UPDATE assorted_data SET value = ? WHERE name = ?", getUTCtime(), "lastChecked-" ~ newsgroup); } if(getUTCtime() - lastChecked < 1000 * 60 * 30) return; // don't check more than once every half hour auto f = openNetwork("news.digitalmars.com", 119); f.readln(); // skip the hello line f.writeln("group " ~ newsgroup); auto res = f.readln(); // we don't really care about this either f.writefln("newnews %s %s", newsgroup, dateToNewNewsFormat(lastChecked)); auto response = f.readln().strip; enforce(response == "230 New news follows", response); string[] newPosts; response = f.readln().strip; while(response != ".") { newPosts ~= response.idup; response = f.readln().strip; } foreach(post; newPosts) { auto postres = db.query("SELECT messageId FROM posts WHERE messageId = ?", post); if(!postres.empty) continue; // we already have it f.writeln("article " ~ post); response = f.readln().strip; enforce(response[0 .. 5] == "220 0"); string postContents; response = f.readln().stripRight; // when reading posts, whitespace on the left might be indented code! while(response != ".") { postContents ~= response.idup ~ "\n"; response = f.readln().stripRight; } auto p = postFromArticleText(db, postContents, newsgroup, this); p.commitChanges(); } f.writeln("QUIT"); // say goodbye f.readln(); // it says bye too } } string formatPostAsEmail(Newsreader r, Post post) { sanitizeNewsgroupName(post.newsgroup); // Need to get the proper headers together: // From, Newsgroups, Subject, Message-ID, References // and I believe, Content-Type and Content-Transfer-Encoding string text; text ~= "From: " ~ post.author.strip; text ~= "\n"; text ~= "Subject: " ~ post.subject.strip; text ~= "\n"; text ~= "Newsgroups: " ~ post.newsgroup.strip; text ~= "\n"; text ~= "References: " ~ getReferences(r, r.db, post).strip; text ~= "\n"; // We always post utf8 strings like D uses text ~= "Content-Type: text/plain; charset=UTF-8"; text ~= "\n"; text ~= "Content-Transfer-Encoding: 8bit"; text ~= "\n"; text ~= "Message-ID: " ~ post.messageId.strip; text ~= "\n"; text ~= "\n"; text ~= post.message; return text; } string getReferences(Newsreader r, Database db, Post post) { assert(post !is null); string references; while(post.inReplyTo.length) { references = post.inReplyTo ~ " " ~ references; post = r.getMessage(post.newsgroup, post.inReplyTo); //objectFromResult!(Post)(db, db.query(" // SELECT messageId, inReplyTo FROM posts WHERE messageId = ?", post.inReplyTo)); } return references; } int quoteCount(string line) { line = line.strip; int count = 0; foreach(i, dchar c; line) { if(c == '>') count++; else if(c != ' ') break; } return count; } string trimQuote(string line) { string l = line.strip; int count = 0; bool skipped = false; foreach(i, dchar c; l) { if(c == '>') { skipped = true; continue; } if(l[i] == ' ') i++; return l[i .. $]; } if(skipped) return ""; return line; // no change if the other thing doesn't match } Element messageToHtmlElement(Document document, string message) { // Lines with a lot of non-traditional characters need to be
	// since it's probably ASCII art

	// \n\n needs to be preserved so paragraphs work out

	// Quote blocks need to be preserved by line (or put into a blockquote)

	// Blocks that look like D code need to be put in 

	Element giveUp() { // if we can't figure it out with confidence, give up and
			// give the user some 
 text. It's probably easier to read
			// than butchered, wrong markup
		auto element = document.createElement("pre");
		element.innerText = message;
		return element;
	}

	try {

		auto holder = document.createElement("div");
		assert(holder !is null);

		auto lines = message.split("\n");
		int position = 0;
		string peek(int n = 1) {
			if(position + n >= lines.length)
				return null;
			return lines[position + n];
		}

		if(lines.length == 0)
			return giveUp;

		Element current = document.createElement("p");

		void newElement(string tagName) {
			auto length = current.innerText.length;
			if(length)
				holder.appendChild(current);
			if(length > 256) // some display modes might want to hide long content, this is about a paragraph
				current.setAttribute("class", current.getAttribute("class") ~ " long");

			if(tagName.length)
				current = document.createElement(tagName);
		}

		void newHolder(string tagName) {
			newElement(null);
			holder = holder.addChild(tagName);
			assert(holder !is null);
			current = document.createElement("p");
		}

		void popHolder() {
			if(holder.parentNode is null)
				return; // to avoid crash
			newElement(null); // ensure the thing actually gets attached
			current = document.createElement("p");
			holder = holder.parentNode;
			assert(holder  !is null);
		}

		int lastQuoteCount = 0;

		//string currentCitation;
		string[] pendingCitations;

		bool inCodeBlock = false;

		for(string line = lines[0]; position < lines.length; ++position, line = position < lines.length ? lines[position] : null) {
			int newQuoteCount = quoteCount(line);
			if(newQuoteCount > 0)
				line = trimQuote(line);

			bool lineIsConsumed = false;

			// if the next line is a new quote, this might be a citation...
			if((quoteCount(peek()) > newQuoteCount) ||
				// some people put a blank line after the citation...
				(trimQuote(peek()).length == 0 && quoteCount(peek(2)) > newQuoteCount))
			{
				// we might be looking at a citation

				//string weHave = currentCitation.strip;
				//if(weHave.length && weHave[$-1] == ':') {
					// we already have a good citation!
				//} else
				if(looksLikeCitation(trimQuote(line).strip)) {
					pendingCitations ~= trimQuote(line).strip;
					lineIsConsumed = true;
				}
			}

			if(newQuoteCount > lastQuoteCount) {
				newHolder("blockquote");
				if(pendingCitations.length) {
					string currentCitation = pendingCitations[0];
					pendingCitations = pendingCitations[1 .. $];

					holder.addChild("cite", currentCitation);
				}
				//currentCitation = null;
			} else if(newQuoteCount < lastQuoteCount) {
				popHolder();
			}

			lastQuoteCount = newQuoteCount;

			if(lineIsConsumed)
				continue; // we already used this line elsewhere

			if(line == "/* **************** */") {
				inCodeBlock = !inCodeBlock;
				if(inCodeBlock) {
					newElement("pre");
					current.setAttribute("class", "d_code");
				}
			}

			if(inCodeBlock) {
				current.appendText(line ~ "\n");
				continue;
			}

			/+
			// if we're inside a blockquote, we don't need the quote character...
			if(current.tagName == "blockquote" && line.length >= 1 && line[0] == '>') {
				// cut off the quote character
				line = line[1..$];

				if(line.length == 0) {
					// we're still inside the block, but want a new paragraph
					current = current.addChild("p", "");

				}
			}
			+/

			if(line.strip.length == 0) {
				if(current.tagName == "pre" && looksLikeCode(peek()))
					current.appendText("\n"); // keep the same code block...
				else
					newElement("p"); // we're going to a new paragraph
				continue;
			}

			// command line goes first because some command lines look like code
			// but really aren't. But not many code things match the command line
			// heuristic
			if(line.looksLikeCommandLine && current.tagName != "pre") {
				newElement("pre");
				current.setAttribute("class", "command_line");
			}

			// checks next line for brace too, to accommodate brace on own line style
			if((line.looksLikeCode || peek().strip.length == 1 && peek().strip()[$-1] == '{') && current.tagName != "pre") {
				newElement("pre");
				current.setAttribute("class", "d_code");
			}

			// lowest priority is generic ascii art - whitespace or
			// symbols that look significant. If the next line looks
			// like art and this line is short, assume it is part of the art.
			if((line.looksLikeAsciiArt || (peek.looksLikeAsciiArt && line.length < 25)) && current.tagName != "pre") {
				newElement("pre");
				current.setAttribute("class", "art");
			}

			// if there's two short lines in a row outside a 
 block,
			// let's assume the user wanted that break for some reason
			// and honor it too. (I've never seen anyone deliberately wrap
			// normal text to less than about 25 characters, so I'll use that
			// number as the abnormally short value)

			// Note it excludes blockquotes because they are currently original
			// whitespace preserved anyway, and we don't want to double up
			assert(current !is null);
			assert(holder  !is null);
			if(current.tagName != "pre" && holder.tagName != "blockquote" && line.length < 25 && peek().length < 25) {
				// if we're at the end of input or beginning of a paragraph,
				// this is meaningless
				if(current.innerText.length > 0 && peek().length > 0) {
					current.appendChild(document.createElement("br"));
				}
			}

			/*
			// never want a 
inside a pre! if(line[0] == '>' && current.tagName != "pre") { current.appendChild(document.createElement("br")); } // if we're in a blockquote, we can chop off the quote character at this point, // since the HTML tag is doing its job if(current.tagName == "blockquote" && line.length >= 1 && line[0] == '>') line = line[1..$]; */ // looks like an ordered list -- make sure there's at least newlines for it so it is legible if(/*current.tagName != "ol" &&*/ line.length > 3 && (line[1..3] == ". " || line[1..3] == ") ")) { if(current.tagName != "pre" && current.childNodes.length) // there should never be a break at the beginning! current.appendChild(document.createElement("br")); } //auto urlFinder = std.regex.regex(std.regex.url); // FIXME: this regex sucks! //auto matches = std.regex.match(line, urlFinder); //if(matches.empty || matches.front.hit.length == 0) addSomeText(line, current); current.appendText("\n"); // we want to keep these lines around so view source is easy // and they might take the place of a space anyway } newElement(null); // make sure the current is appended before we return // make sure we get all the way to the top.... while(holder.parentNode !is null) holder = holder.parentNode; assert(holder !is null); // if we didn't detect any paragraphs, parsing probably failed if(holder.getElementsByTagName("p").length == 0) return giveUp; return holder; } catch(Exception e) { auto holder = giveUp; holder.addChild("pre", "Exception: \n" ~ e.toString()); return holder; } } bool looksLikeCitation(string line) { if(line is null) return false; line = line.strip; if(line.length != 0 && (line[$-1] == ':' || line.indexOf("wrote") != -1 || line.indexOf("news:") != -1)) { return true; } return false; } void addSomeText(string line, Element current) { // returns true if you should try something else bool tryAMatch(ItemMatch function(string) thingToTry) { auto match = checkForUrls(line); if(match.empty) { return true; } else { addSomeText(match.pre, current); current.appendChild(new Link(match.hit, match.hit)); addSomeText(match.post, current); } return false; } if(tryAMatch(&checkForUrls)) if(tryAMatch(&checkForBold)) // finally... current.appendText(line); } ItemMatch checkForBold(string line) { ItemMatch u; u.empty = true; return u; } struct ItemMatch { bool empty; string pre, hit, post; ItemMatch front() { return this;} } ItemMatch checkForUrls(string line) { ItemMatch u; int idx = line.indexOf("http://"); if(idx == -1) idx = line.indexOf("https://"); if(idx == -1) idx = line.indexOf("ftp://"); if(idx == -1) idx = line.indexOf("www."); if(idx == -1) idx = line.indexOf("digitalmars.com"); // if all else fails, look for some common D domains if(idx == -1) idx = line.indexOf("d-programming-language.org"); if(idx == -1) idx = line.indexOf("dprogramming.com"); if(idx == -1) idx = line.indexOf("dsource.org"); if(idx == -1) idx = line.indexOf("prowiki.org"); // the D wiki resides here if(idx == -1) idx = line.indexOf("d.puremagic.com"); if(idx == -1) idx = line.indexOf("dpldocs.info"); if(idx == -1) { u.empty = true; return u; } u.empty = false; u.pre = line[0..idx]; line = line[idx .. $]; // URLs in prose tend to end with a >, whitespace, or a period int space = line.indexOf(" "); if(space == -1 || space == 0) // the beginning of the string being a space is nonsense space = int.max; else { // if there's a period right before the space, don't // include it in the url, it's probably punctuation if(line[space - 1] == '.') space--; // exclude it } int bracket = line.indexOf(">"); if(bracket == -1) bracket = int.max; int ending = std.algorithm.min(space, bracket); // whichever comes first // no apparent ending, use the rest of the string if(ending == int.max) { u.hit = line; return u; } u.hit = line[0 .. ending]; u.post = line[ending .. $]; return u; } bool looksLikeCommandLine(string line) { // if it starts with line = line.strip; if(line.length < 3) // too short return false; // the common unix prompts have spaces - most English uses don't if(line[0..2] == "$ " || line[0..2] == "# ") return true; return false; } bool looksLikeAsciiArt(string line) { // if the author put in this much whitespace, it is probably // deliberate, so let's honor it if(line.indexOf(" ") != -1 || line.indexOf("\t") != -1) return true; return false; } bool looksLikeCode(string line) { if(line.length == 0) return false; // D ends with semicolons or braces at the end of a line // much more often than English // but it does sometimes happen in English. So // we'll check for an English style period in the line // to try to differentiate if((line[$-1] == ';' && line.indexOf(". ") == -1) || line[$-1] == '{' || line[$-1] == '}') return true; // an comment marker is pretty rare in non-code too, // so probably a safe bet if(line.indexOf("/*") != -1 || line.indexOf("/+") != -1) return true; // one big exception: URLs have the single line comment marker, // but there's a colon right before it - not common in code auto idx = line.indexOf("//"); if(idx != -1 && (idx == 0 || line[idx-1] != ':')) return true; return false; } mixin FancyMain!(Newsreader); Link linkToPost(Post post, string text = null) { if(text is null) text = format("%s by %s", post.subject, post.author); return new Link ( "get-message?newsgroup=" ~ std.uri.encodeComponent(post.newsgroup) ~ "&messageId=" ~ std.uri.encodeComponent(post.messageId), text); } Link linkToPost(string newsgroup, string messageId, string text) { return new Link ( "get-message?newsgroup=" ~ std.uri.encodeComponent(newsgroup) ~ "&messageId=" ~ std.uri.encodeComponent(messageId), text); } Post getIndividualPostFromNewsServer(Database db, string newsgroup, string messageID, Newsreader newsreader) { newsgroup = sanitizeNewsgroupName(newsgroup); auto f = openNetwork("news.digitalmars.com", 119); f.readln(); // skip the hello line f.writeln("group " ~ newsgroup); f.readln(); // we don't really care about this either f.writeln("article " ~ messageID); auto response = f.readln().strip; if(response == "430 No such article") throw new Exception("no such article"); enforce(response[0 .. 5] == "220 0"); string postContents; response = f.readln().stripRight; while(response != ".") { postContents ~= response.idup ~ "\n"; response = f.readln().stripRight; } f.writeln("QUIT"); // say goodbye f.readln(); // it says bye too return postFromArticleText(db, postContents, newsgroup, newsreader); } Post postFromArticleText(Database db, string contents, string newsgroup, Newsreader newsreader) { newsgroup = sanitizeNewsgroupName(newsgroup); auto post = new Post(db, newsreader); post.mode = UpdateOrInsertMode.AlwaysInsert; post.newsgroup = newsgroup; string contentType; string encoding; contents = contents.strip; int idx = contents.indexOf("\n\n"); enforce(idx != -1); string headers = contents[0..idx]; string message = contents[idx+2 .. $]; string currentHeader; foreach(header; headers.split("\n")) { int colon = header.indexOf(":"); if(colon == -1) currentHeader ~= header.replace("\t", " "); else { if(currentHeader.length == 0) currentHeader = header; // load up the contents of this header colon = currentHeader.indexOf(":"); if(colon == -1) continue; // not a header if(colon + 2 >= currentHeader.length) continue; // not a name/value pair apparently string name = currentHeader[0 .. colon].strip; string value = currentHeader[colon + 2 .. $].strip; switch(name) { case "Content-Type": contentType = value; break; case "Content-Transfer-Encoding": encoding = value; break; case "Xref": auto parts = value.split(" ") [1]; // [0] is the server, which we don't care about post.articleId = to!int(parts.split(":")[1]); // [0] is the newsgroup name which we already know break; case "Date": post.datePosted = std.date.parse(value); break; case "Subject": post.subject = translateEncodedWord(value); break; case "References": string[] refs; foreach(reference; value.split(" ")) if(reference.indexOf("localhost") == -1) // localhost ids aren't usable, so cut them out refs ~= reference.strip; if(refs.length) { post.threadRoot = refs[0].strip; post.inReplyTo = refs[$ - 1].strip; } break; case "From": post.author = translateEncodedWord(value); break; case "Message-ID": post.messageId = value; break; default: // we aren't interested in it } currentHeader = header; } } if(contentType.indexOf("multipart") != -1) { // it is a text and html posting, we only care for the text portion (the first part) string boundary = contentType[contentType.indexOf("boundary") + "boundary".length + 1 .. $]; if(boundary[0] == '"') boundary = boundary[1 .. $ - 1]; // cutting off the quotes if(message.indexOf(boundary) != -1) { message = message[message.indexOf(boundary) .. $]; // cut off until the boundary auto mimeHeaders = message[0 .. message.indexOf("\n\n")]; if(mimeHeaders.indexOf("quoted-printable\n\n") != -1) encoding = "quoted-printable"; // kinda hackish if(mimeHeaders.indexOf("base64\n\n") != -1) encoding = "base64"; // kinda hackish message = message[message.indexOf("\n\n") .. $]; // cut off the portion's MIME headers message = message[0 .. message.indexOf(boundary) - 4]; // cut off everything after the boundary. Note it is preceded message = translateTransferEncoding(message, encoding); } // by \n--\n which we also slice off } else { if(contentType.indexOf("html") != -1) message = htmlToText( translateTransferEncoding(message, encoding)); else message = translateTransferEncoding(message, encoding); } // FIXME: strip attachments out too, we don't care about that // FIXME: it ignores character sets, just assuming everything is utf8 post.message = message; return post; } string translateEncodedWord(string message) { ubyte[] ret; bool inThing = false; int starting; foreach(i, c; message) { if(c == '=') { if(!inThing) starting = i; else { if(i && message[i - 1] != '?') continue; // not at the end yet... string stuff = message[starting + 2 .. i - 1]; // cutting off the delimiters auto parts = stuff.split("?"); assert(parts.length == 3, stuff ~ " :::: " ~ message); // parts[0] // FIXME: ignores charset, assuming utf8 if(parts[1] == "Q") { // quoted-printable int count; bool inside; foreach(a, b; parts[2]) { if(inside) { count--; if(count == 0) { inside = false; //assert(0, parts[2]); int character = parse!int(parts[2][a - 1 .. a + 1], 16u); ret ~= cast(ubyte) character; } } else { if(b == '=') { inside = true; count = 2; continue; } if(b == '_') ret ~= ' '; else ret ~= b; } } } else { // base 64 while(parts[2].length % 3) parts[2] ~= "="; ret ~= Base64.decode(parts[2]); } } inThing = !inThing; continue; } if(!inThing) ret ~= c; } auto ret2 = cast(string) ret; return ret2; } string beautifyName(string name) { auto idx = name.indexOf("<"); if(idx != -1) name = name[0 .. idx]; name = name.strip(); if(name.length > 2 && name[0] == '"' && name[$-1] == '"') name = name[1 .. $-1]; return name; } string translateTransferEncoding(string message, string encoding) { message = message.replace("\r\n", "\n"); switch(encoding) { case "quoted-printable": ubyte[] ret; int es, escapeCount; bool escaping; foreach(i, c; message) { if(c == '=') { escaping = true; es = i; escapeCount = 2; continue; } if(escaping) { if(c == '\n') escaping = false; else { escapeCount--; if(escapeCount == 0) { try { int character = parse!int(message[es + 1 .. i + 1], 16u); ret ~= cast(ubyte) character; escaping = false; } catch(Throwable t) { assert(0, "parsing " ~ message); } } } } else ret ~= c; } message = cast(string) ret; break; default: } return message; } string sanitizeNewsgroupName(string ng) { if(ng.indexOf("digitalmars.") == -1) throw new Exception("invalid newsgroup"); foreach(dchar d; ng) { if(!( (d >= 'A' && d <= 'Z') || (d >= 'a' && d <= 'z') || (d >= '0' && d <= '9') || (d == '.' || d == '+' || d == '_') )) throw new Exception("invalid newsgroup"); } return ng; } string getUserPreference(Cgi cgi, string key, string def = null) { if("arsd_newsgroup_viewer-" ~ key in cgi.cookies) return cgi.cookies["arsd_newsgroup_viewer-" ~ key]; return def; } string setUserPreference(Cgi cgi, string key, string value) { cgi.setCookie("arsd_newsgroup_viewer-" ~ key, value, 1000 * 3600 * 7); return value; } /////////////////////////// T objectFromResult(T)(Database db, Row r, Newsreader newsreader) { auto obj = new T(db, newsreader); foreach(k, v; r) { obj.fields[k] = v; } // obj.mode = UpdateOrInsertMode.AlwaysUpdate; return obj; } T[] arrayFromResult(T)(Database db, ResultSet r, Newsreader newsreader) { T[] ret; foreach(item; r) { ret ~= objectFromResult!(T)(db, item, newsreader); } return ret; }