// since it's probably ASCII art
// \n\n needs to be preserved so paragraphs work out
// Quote blocks need to be preserved by line (or put into a blockquote)
// Blocks that look like D code need to be put in
Element giveUp() { // if we can't figure it out with confidence, give up and
// give the user some text. It's probably easier to read
// than butchered, wrong markup
auto element = document.createElement("pre");
element.innerText = message;
return element;
}
try {
auto holder = document.createElement("div");
assert(holder !is null);
auto lines = message.split("\n");
int position = 0;
string peek(int n = 1) {
if(position + n >= lines.length)
return null;
return lines[position + n];
}
if(lines.length == 0)
return giveUp;
Element current = document.createElement("p");
void newElement(string tagName) {
auto length = current.innerText.length;
if(length)
holder.appendChild(current);
if(length > 256) // some display modes might want to hide long content, this is about a paragraph
current.setAttribute("class", current.getAttribute("class") ~ " long");
if(tagName.length)
current = document.createElement(tagName);
}
void newHolder(string tagName) {
newElement(null);
holder = holder.addChild(tagName);
assert(holder !is null);
current = document.createElement("p");
}
void popHolder() {
if(holder.parentNode is null)
return; // to avoid crash
newElement(null); // ensure the thing actually gets attached
current = document.createElement("p");
holder = holder.parentNode;
assert(holder !is null);
}
int lastQuoteCount = 0;
//string currentCitation;
string[] pendingCitations;
bool inCodeBlock = false;
for(string line = lines[0]; position < lines.length; ++position, line = position < lines.length ? lines[position] : null) {
int newQuoteCount = quoteCount(line);
if(newQuoteCount > 0)
line = trimQuote(line);
bool lineIsConsumed = false;
// if the next line is a new quote, this might be a citation...
if((quoteCount(peek()) > newQuoteCount) ||
// some people put a blank line after the citation...
(trimQuote(peek()).length == 0 && quoteCount(peek(2)) > newQuoteCount))
{
// we might be looking at a citation
//string weHave = currentCitation.strip;
//if(weHave.length && weHave[$-1] == ':') {
// we already have a good citation!
//} else
if(looksLikeCitation(trimQuote(line).strip)) {
pendingCitations ~= trimQuote(line).strip;
lineIsConsumed = true;
}
}
if(newQuoteCount > lastQuoteCount) {
newHolder("blockquote");
if(pendingCitations.length) {
string currentCitation = pendingCitations[0];
pendingCitations = pendingCitations[1 .. $];
holder.addChild("cite", currentCitation);
}
//currentCitation = null;
} else if(newQuoteCount < lastQuoteCount) {
popHolder();
}
lastQuoteCount = newQuoteCount;
if(lineIsConsumed)
continue; // we already used this line elsewhere
if(line == "/* **************** */") {
inCodeBlock = !inCodeBlock;
if(inCodeBlock) {
newElement("pre");
current.setAttribute("class", "d_code");
}
}
if(inCodeBlock) {
current.appendText(line ~ "\n");
continue;
}
/+
// if we're inside a blockquote, we don't need the quote character...
if(current.tagName == "blockquote" && line.length >= 1 && line[0] == '>') {
// cut off the quote character
line = line[1..$];
if(line.length == 0) {
// we're still inside the block, but want a new paragraph
current = current.addChild("p", "");
}
}
+/
if(line.strip.length == 0) {
if(current.tagName == "pre" && looksLikeCode(peek()))
current.appendText("\n"); // keep the same code block...
else
newElement("p"); // we're going to a new paragraph
continue;
}
// command line goes first because some command lines look like code
// but really aren't. But not many code things match the command line
// heuristic
if(line.looksLikeCommandLine && current.tagName != "pre") {
newElement("pre");
current.setAttribute("class", "command_line");
}
// checks next line for brace too, to accommodate brace on own line style
if((line.looksLikeCode || peek().strip.length == 1 && peek().strip()[$-1] == '{') && current.tagName != "pre") {
newElement("pre");
current.setAttribute("class", "d_code");
}
// lowest priority is generic ascii art - whitespace or
// symbols that look significant. If the next line looks
// like art and this line is short, assume it is part of the art.
if((line.looksLikeAsciiArt || (peek.looksLikeAsciiArt && line.length < 25)) && current.tagName != "pre") {
newElement("pre");
current.setAttribute("class", "art");
}
// if there's two short lines in a row outside a block,
// let's assume the user wanted that break for some reason
// and honor it too. (I've never seen anyone deliberately wrap
// normal text to less than about 25 characters, so I'll use that
// number as the abnormally short value)
// Note it excludes blockquotes because they are currently original
// whitespace preserved anyway, and we don't want to double up
assert(current !is null);
assert(holder !is null);
if(current.tagName != "pre" && holder.tagName != "blockquote" && line.length < 25 && peek().length < 25) {
// if we're at the end of input or beginning of a paragraph,
// this is meaningless
if(current.innerText.length > 0 && peek().length > 0) {
current.appendChild(document.createElement("br"));
}
}
/*
// never want a
inside a pre!
if(line[0] == '>' && current.tagName != "pre") {
current.appendChild(document.createElement("br"));
}
// if we're in a blockquote, we can chop off the quote character at this point,
// since the HTML tag is doing its job
if(current.tagName == "blockquote" && line.length >= 1 && line[0] == '>')
line = line[1..$];
*/
// looks like an ordered list -- make sure there's at least newlines for it so it is legible
if(/*current.tagName != "ol" &&*/ line.length > 3 && (line[1..3] == ". " || line[1..3] == ") ")) {
if(current.tagName != "pre" && current.childNodes.length) // there should never be a break at the beginning!
current.appendChild(document.createElement("br"));
}
//auto urlFinder = std.regex.regex(std.regex.url); // FIXME: this regex sucks!
//auto matches = std.regex.match(line, urlFinder);
//if(matches.empty || matches.front.hit.length == 0)
addSomeText(line, current);
current.appendText("\n"); // we want to keep these lines around so view source is easy
// and they might take the place of a space anyway
}
newElement(null); // make sure the current is appended before we return
// make sure we get all the way to the top....
while(holder.parentNode !is null)
holder = holder.parentNode;
assert(holder !is null);
// if we didn't detect any paragraphs, parsing probably failed
if(holder.getElementsByTagName("p").length == 0)
return giveUp;
return holder;
} catch(Exception e) {
auto holder = giveUp;
holder.addChild("pre", "Exception: \n" ~ e.toString());
return holder;
}
}
bool looksLikeCitation(string line) {
if(line is null)
return false;
line = line.strip;
if(line.length != 0 && (line[$-1] == ':' || line.indexOf("wrote") != -1 || line.indexOf("news:") != -1)) {
return true;
}
return false;
}
void addSomeText(string line, Element current) {
// returns true if you should try something else
bool tryAMatch(ItemMatch function(string) thingToTry) {
auto match = checkForUrls(line);
if(match.empty) {
return true;
} else {
addSomeText(match.pre, current);
current.appendChild(new Link(match.hit, match.hit));
addSomeText(match.post, current);
}
return false;
}
if(tryAMatch(&checkForUrls))
if(tryAMatch(&checkForBold))
// finally...
current.appendText(line);
}
ItemMatch checkForBold(string line) {
ItemMatch u;
u.empty = true;
return u;
}
struct ItemMatch {
bool empty;
string pre, hit, post;
ItemMatch front() { return this;}
}
ItemMatch checkForUrls(string line) {
ItemMatch u;
int idx = line.indexOf("http://");
if(idx == -1)
idx = line.indexOf("https://");
if(idx == -1)
idx = line.indexOf("ftp://");
if(idx == -1)
idx = line.indexOf("www.");
if(idx == -1)
idx = line.indexOf("digitalmars.com"); // if all else fails, look for some common D domains
if(idx == -1)
idx = line.indexOf("d-programming-language.org");
if(idx == -1)
idx = line.indexOf("dprogramming.com");
if(idx == -1)
idx = line.indexOf("dsource.org");
if(idx == -1)
idx = line.indexOf("prowiki.org"); // the D wiki resides here
if(idx == -1)
idx = line.indexOf("d.puremagic.com");
if(idx == -1)
idx = line.indexOf("dpldocs.info");
if(idx == -1) {
u.empty = true;
return u;
}
u.empty = false;
u.pre = line[0..idx];
line = line[idx .. $];
// URLs in prose tend to end with a >, whitespace, or a period
int space = line.indexOf(" ");
if(space == -1 || space == 0) // the beginning of the string being a space is nonsense
space = int.max;
else {
// if there's a period right before the space, don't
// include it in the url, it's probably punctuation
if(line[space - 1] == '.')
space--; // exclude it
}
int bracket = line.indexOf(">");
if(bracket == -1)
bracket = int.max;
int ending = std.algorithm.min(space, bracket); // whichever comes first
// no apparent ending, use the rest of the string
if(ending == int.max) {
u.hit = line;
return u;
}
u.hit = line[0 .. ending];
u.post = line[ending .. $];
return u;
}
bool looksLikeCommandLine(string line) {
// if it starts with
line = line.strip;
if(line.length < 3) // too short
return false;
// the common unix prompts have spaces - most English uses don't
if(line[0..2] == "$ " || line[0..2] == "# ")
return true;
return false;
}
bool looksLikeAsciiArt(string line) {
// if the author put in this much whitespace, it is probably
// deliberate, so let's honor it
if(line.indexOf(" ") != -1 || line.indexOf("\t") != -1)
return true;
return false;
}
bool looksLikeCode(string line) {
if(line.length == 0)
return false;
// D ends with semicolons or braces at the end of a line
// much more often than English
// but it does sometimes happen in English. So
// we'll check for an English style period in the line
// to try to differentiate
if((line[$-1] == ';' && line.indexOf(". ") == -1) || line[$-1] == '{' || line[$-1] == '}')
return true;
// an comment marker is pretty rare in non-code too,
// so probably a safe bet
if(line.indexOf("/*") != -1 || line.indexOf("/+") != -1)
return true;
// one big exception: URLs have the single line comment marker,
// but there's a colon right before it - not common in code
auto idx = line.indexOf("//");
if(idx != -1 && (idx == 0 || line[idx-1] != ':'))
return true;
return false;
}
mixin FancyMain!(Newsreader);
Link linkToPost(Post post, string text = null) {
if(text is null)
text = format("%s by %s", post.subject, post.author);
return new Link (
"get-message?newsgroup=" ~ std.uri.encodeComponent(post.newsgroup) ~
"&messageId=" ~ std.uri.encodeComponent(post.messageId),
text);
}
Link linkToPost(string newsgroup, string messageId, string text) {
return new Link (
"get-message?newsgroup=" ~ std.uri.encodeComponent(newsgroup) ~
"&messageId=" ~ std.uri.encodeComponent(messageId),
text);
}
Post getIndividualPostFromNewsServer(Database db, string newsgroup, string messageID, Newsreader newsreader) {
newsgroup = sanitizeNewsgroupName(newsgroup);
auto f = openNetwork("news.digitalmars.com", 119);
f.readln(); // skip the hello line
f.writeln("group " ~ newsgroup);
f.readln(); // we don't really care about this either
f.writeln("article " ~ messageID);
auto response = f.readln().strip;
if(response == "430 No such article")
throw new Exception("no such article");
enforce(response[0 .. 5] == "220 0");
string postContents;
response = f.readln().stripRight;
while(response != ".") {
postContents ~= response.idup ~ "\n";
response = f.readln().stripRight;
}
f.writeln("QUIT"); // say goodbye
f.readln(); // it says bye too
return postFromArticleText(db, postContents, newsgroup, newsreader);
}
Post postFromArticleText(Database db, string contents, string newsgroup, Newsreader newsreader) {
newsgroup = sanitizeNewsgroupName(newsgroup);
auto post = new Post(db, newsreader);
post.mode = UpdateOrInsertMode.AlwaysInsert;
post.newsgroup = newsgroup;
string contentType;
string encoding;
contents = contents.strip;
int idx = contents.indexOf("\n\n");
enforce(idx != -1);
string headers = contents[0..idx];
string message = contents[idx+2 .. $];
string currentHeader;
foreach(header; headers.split("\n")) {
int colon = header.indexOf(":");
if(colon == -1)
currentHeader ~= header.replace("\t", " ");
else {
if(currentHeader.length == 0)
currentHeader = header;
// load up the contents of this header
colon = currentHeader.indexOf(":");
if(colon == -1)
continue; // not a header
if(colon + 2 >= currentHeader.length)
continue; // not a name/value pair apparently
string name = currentHeader[0 .. colon].strip;
string value = currentHeader[colon + 2 .. $].strip;
switch(name) {
case "Content-Type":
contentType = value;
break;
case "Content-Transfer-Encoding":
encoding = value;
break;
case "Xref":
auto parts = value.split(" ") [1]; // [0] is the server, which we don't care about
post.articleId = to!int(parts.split(":")[1]); // [0] is the newsgroup name which we already know
break;
case "Date":
post.datePosted = std.date.parse(value);
break;
case "Subject":
post.subject = translateEncodedWord(value);
break;
case "References":
string[] refs;
foreach(reference; value.split(" "))
if(reference.indexOf("localhost") == -1) // localhost ids aren't usable, so cut them out
refs ~= reference.strip;
if(refs.length) {
post.threadRoot = refs[0].strip;
post.inReplyTo = refs[$ - 1].strip;
}
break;
case "From":
post.author = translateEncodedWord(value);
break;
case "Message-ID":
post.messageId = value;
break;
default:
// we aren't interested in it
}
currentHeader = header;
}
}
if(contentType.indexOf("multipart") != -1) {
// it is a text and html posting, we only care for the text portion (the first part)
string boundary = contentType[contentType.indexOf("boundary") + "boundary".length + 1 .. $];
if(boundary[0] == '"')
boundary = boundary[1 .. $ - 1]; // cutting off the quotes
if(message.indexOf(boundary) != -1) {
message = message[message.indexOf(boundary) .. $]; // cut off until the boundary
auto mimeHeaders = message[0 .. message.indexOf("\n\n")];
if(mimeHeaders.indexOf("quoted-printable\n\n") != -1)
encoding = "quoted-printable"; // kinda hackish
if(mimeHeaders.indexOf("base64\n\n") != -1)
encoding = "base64"; // kinda hackish
message = message[message.indexOf("\n\n") .. $]; // cut off the portion's MIME headers
message = message[0 .. message.indexOf(boundary) - 4]; // cut off everything after the boundary. Note it is preceded
message = translateTransferEncoding(message, encoding);
}
// by \n--\n which we also slice off
} else {
if(contentType.indexOf("html") != -1)
message = htmlToText(
translateTransferEncoding(message, encoding));
else
message = translateTransferEncoding(message, encoding);
}
// FIXME: strip attachments out too, we don't care about that
// FIXME: it ignores character sets, just assuming everything is utf8
post.message = message;
return post;
}
string translateEncodedWord(string message) {
ubyte[] ret;
bool inThing = false;
int starting;
foreach(i, c; message) {
if(c == '=') {
if(!inThing)
starting = i;
else {
if(i && message[i - 1] != '?')
continue; // not at the end yet...
string stuff = message[starting + 2 .. i - 1]; // cutting off the delimiters
auto parts = stuff.split("?");
assert(parts.length == 3, stuff ~ " :::: " ~ message);
// parts[0] // FIXME: ignores charset, assuming utf8
if(parts[1] == "Q") {
// quoted-printable
int count;
bool inside;
foreach(a, b; parts[2]) {
if(inside) {
count--;
if(count == 0) {
inside = false;
//assert(0, parts[2]);
int character = parse!int(parts[2][a - 1 .. a + 1], 16u);
ret ~= cast(ubyte) character;
}
} else {
if(b == '=') {
inside = true;
count = 2;
continue;
}
if(b == '_')
ret ~= ' ';
else
ret ~= b;
}
}
} else {
// base 64
while(parts[2].length % 3)
parts[2] ~= "=";
ret ~= Base64.decode(parts[2]);
}
}
inThing = !inThing;
continue;
}
if(!inThing)
ret ~= c;
}
auto ret2 = cast(string) ret;
return ret2;
}
string beautifyName(string name) {
auto idx = name.indexOf("<");
if(idx != -1)
name = name[0 .. idx];
name = name.strip();
if(name.length > 2 && name[0] == '"' && name[$-1] == '"')
name = name[1 .. $-1];
return name;
}
string translateTransferEncoding(string message, string encoding) {
message = message.replace("\r\n", "\n");
switch(encoding) {
case "quoted-printable":
ubyte[] ret;
int es, escapeCount;
bool escaping;
foreach(i, c; message) {
if(c == '=') {
escaping = true;
es = i;
escapeCount = 2;
continue;
}
if(escaping) {
if(c == '\n')
escaping = false;
else {
escapeCount--;
if(escapeCount == 0) {
try {
int character = parse!int(message[es + 1 .. i + 1], 16u);
ret ~= cast(ubyte) character;
escaping = false;
} catch(Throwable t) {
assert(0, "parsing " ~ message);
}
}
}
} else
ret ~= c;
}
message = cast(string) ret;
break;
default:
}
return message;
}
string sanitizeNewsgroupName(string ng) {
if(ng.indexOf("digitalmars.") == -1)
throw new Exception("invalid newsgroup");
foreach(dchar d; ng) {
if(!( (d >= 'A' && d <= 'Z')
|| (d >= 'a' && d <= 'z')
|| (d >= '0' && d <= '9')
|| (d == '.' || d == '+' || d == '_')
))
throw new Exception("invalid newsgroup");
}
return ng;
}
string getUserPreference(Cgi cgi, string key, string def = null) {
if("arsd_newsgroup_viewer-" ~ key in cgi.cookies)
return cgi.cookies["arsd_newsgroup_viewer-" ~ key];
return def;
}
string setUserPreference(Cgi cgi, string key, string value) {
cgi.setCookie("arsd_newsgroup_viewer-" ~ key, value, 1000 * 3600 * 7);
return value;
}
///////////////////////////
T objectFromResult(T)(Database db, Row r, Newsreader newsreader) {
auto obj = new T(db, newsreader);
foreach(k, v; r) {
obj.fields[k] = v;
}
// obj.mode = UpdateOrInsertMode.AlwaysUpdate;
return obj;
}
T[] arrayFromResult(T)(Database db, ResultSet r, Newsreader newsreader) {
T[] ret;
foreach(item; r) {
ret ~= objectFromResult!(T)(db, item, newsreader);
}
return ret;
}