From 6f5739b9d8fae513bcb782fc15a7be217501c9b5 Mon Sep 17 00:00:00 2001
From: zzz <zzz@mail.i2p>
Date: Mon, 14 Sep 2015 11:06:35 +0000
Subject: [PATCH] News: Store/load individual news entries as XML in separate
 files by UUID. WIP, not yet hooked in.

---
 .../net/i2p/router/news/NewsXMLParser.java    |  27 +-
 .../src/net/i2p/router/news/PersistNews.java  | 263 ++++++++++++++++++
 2 files changed, 286 insertions(+), 4 deletions(-)
 create mode 100644 apps/routerconsole/java/src/net/i2p/router/news/PersistNews.java

diff --git a/apps/routerconsole/java/src/net/i2p/router/news/NewsXMLParser.java b/apps/routerconsole/java/src/net/i2p/router/news/NewsXMLParser.java
index 01ffe4d945..8850cb1411 100644
--- a/apps/routerconsole/java/src/net/i2p/router/news/NewsXMLParser.java
+++ b/apps/routerconsole/java/src/net/i2p/router/news/NewsXMLParser.java
@@ -247,6 +247,10 @@ public class NewsXMLParser {
         return rv;
     }
 
+    /**
+     *  This does not check for any missing values.
+     *  Any fields in any NewsEntry may be null.
+     */
     private List<NewsEntry> extractNewsEntries(Node feed) throws I2PParserException {
         List<NewsEntry> rv = new ArrayList<NewsEntry>();
         List<Node> entries = getNodes(feed, "entry");
@@ -345,8 +349,10 @@ public class NewsXMLParser {
 
     /**
      *  Helper to get all Nodes matching the name
+     *
+     *  @return non-null
      */
-    private static List<Node> getNodes(Node node, String name) {
+    static List<Node> getNodes(Node node, String name) {
         List<Node> rv = new ArrayList<Node>();
         int count = node.getNNodes();
         for (int i = 0; i < count; i++) {
@@ -432,8 +438,8 @@ public class NewsXMLParser {
     }
 
     public static void main(String[] args) {
-        if (args.length != 1) {
-            System.err.println("Usage: NewsXMLParser file.xml");
+        if (args.length <= 0 || args.length > 2) {
+            System.err.println("Usage: NewsXMLParser file.xml [parserMode]");
             System.exit(1);
         }
         try {
@@ -454,9 +460,22 @@ public class NewsXMLParser {
             System.out.println("Release timestamp: " + latestRelease.date);
             System.out.println("Feed timestamp: " + ud.feedUpdated);
             System.out.println("Found " + entries.size() + " news entries");
+            Set<String> uuids = new HashSet<String>(entries.size());
             for (int i = 0; i < entries.size(); i++) {
                 NewsEntry e = entries.get(i);
-                System.out.println("News #" + (i+1) + ": " + e.title + '\n' + e.content);
+                System.out.println("\n****** News #" + (i+1) + ": " + e.title + '\n' + e.content);
+                if (e.id == null)
+                    throw new IOException("missing ID");
+                if (e.title == null)
+                    throw new IOException("missing title");
+                if (e.content == null)
+                    throw new IOException("missing content");
+                if (e.authorName == null)
+                    throw new IOException("missing author");
+                if (e.updated == 0)
+                    throw new IOException("missing updated");
+                if (!uuids.add(e.id))
+                    throw new IOException("duplicate ID");
             }
         } catch (IOException ioe) {
             ioe.printStackTrace();
diff --git a/apps/routerconsole/java/src/net/i2p/router/news/PersistNews.java b/apps/routerconsole/java/src/net/i2p/router/news/PersistNews.java
new file mode 100644
index 0000000000..96b8e062f1
--- /dev/null
+++ b/apps/routerconsole/java/src/net/i2p/router/news/PersistNews.java
@@ -0,0 +1,263 @@
+package net.i2p.router.news;
+
+import java.io.BufferedInputStream;
+import java.util.Collections;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+import net.i2p.I2PAppContext;
+import net.i2p.data.Base64;
+import net.i2p.data.DataHelper;
+import net.i2p.data.Hash;
+import net.i2p.util.Log;
+import net.i2p.util.SecureDirectory;
+import net.i2p.util.SecureFileOutputStream;
+
+import org.cybergarage.util.Debug;
+import org.cybergarage.xml.Node;
+import org.cybergarage.xml.ParserException;
+
+/**
+ *  Store and retrieve news entries from disk.
+ *  Each entry is stored in a separate file, with the name
+ *  derived from the UUID.
+ *
+ *  @since 0.9.23
+ */
+public class PersistNews {
+
+    private static final String DIR = "docs/news";
+    private static final String PFX = "news-";
+    private static final String SFX = ".xml.gz";
+    private static final String XML_START = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
+
+    /**
+     *  Store each entry.
+     *  Old entries are always overwritten, as they may change even without the updated date changing.
+     *
+     *  @param entries each one should be "entry" at the root
+     *  @return success
+     */
+    public static boolean store(I2PAppContext ctx, List<Node> entries) {
+        Log log = ctx.logManager().getLog(PersistNews.class);
+        File dir = new SecureDirectory(ctx.getConfigDir(), DIR);
+        if (!dir.exists())
+            dir.mkdirs();
+        StringBuilder buf = new StringBuilder();
+        boolean rv = true;
+        for (Node entry : entries) {
+            Node nid = entry.getNode("id");
+            if (nid == null) {
+                if (log.shouldWarn())
+                    log.warn("entry without UUID");
+                continue;
+            }
+            String id = nid.getValue();
+            if (id == null) {
+                if (log.shouldWarn())
+                    log.warn("entry without UUID");
+                continue;
+            }
+            String name = idToName(ctx, id);
+            File file = new File(dir, name);
+            Writer out = null;
+            try {
+                out = new OutputStreamWriter(new GZIPOutputStream(new SecureFileOutputStream(file)));
+                out.write(XML_START);
+                XMLParser.toString(buf, entry);
+                out.write(buf.toString());
+                buf.setLength(0);
+            } catch (IOException ioe) {
+                if (log.shouldWarn())
+                    log.warn("failed store to " + file, ioe);
+                rv = false;
+            } finally {
+                if (out != null) try { out.close(); } catch (IOException ioe) {}
+            }
+        }
+        return rv;
+    }
+
+    /**
+     *  This does not check for any missing values.
+     *  Any fields in any NewsEntry may be null.
+     *  Content is not sanitized by NewsXMLParser here, do that before storing.
+     *
+     *  @return non-null, sorted by updated date, newest first
+     */
+    public static List<NewsEntry> load(I2PAppContext ctx) {
+        Log log = ctx.logManager().getLog(PersistNews.class);
+        File dir = new File(ctx.getConfigDir(), DIR);
+        List<NewsEntry> rv = new ArrayList<NewsEntry>();
+        File[] files = dir.listFiles();
+        if (files == null)
+            return rv;
+        for (File file : files) {
+            String name = file.getName();
+            if (!name.startsWith(PFX) || !name.endsWith(SFX))
+                continue;
+            XMLParser parser = new XMLParser(ctx);
+            InputStream in = null;
+            Node node;
+            boolean error = false;
+            try {
+                in = new GZIPInputStream(new FileInputStream(file));
+                node = parser.parse(in);
+                NewsEntry entry = extract(node);
+                if (entry != null) {
+                    rv.add(entry);
+                } else {
+                    if (log.shouldWarn())
+                        log.warn("load error from " + file);
+                    error = true;
+                }
+            } catch (ParserException pe) {
+                if (log.shouldWarn())
+                    log.warn("load error from " + file, pe);
+                error = true;
+            } catch (IOException ioe) {
+                if (log.shouldWarn())
+                    log.warn("load error from " + file, ioe);
+                error = true;
+            } finally {
+                if (in != null) try { in.close(); } catch (IOException ioe) {}
+            }
+            if (error)
+                file.delete();
+        }
+        Collections.sort(rv);
+        return rv;
+    }
+
+    /**
+     *  This does not check for any missing values.
+     *  Any fields in any NewsEntry may be null.
+     *  Content is not sanitized by NewsXMLParser here, do that before storing.
+     *
+     *  @return non-null, throws on errors
+     */
+    private static NewsEntry extract(Node entry) {
+        NewsEntry e = new NewsEntry();
+        Node n = entry.getNode("title");
+        if (n != null) {
+            e.title = n.getValue();
+            if (e.title != null)
+                e.title = e.title.trim();
+        }
+        n = entry.getNode("link");
+        if (n != null) {
+            e.link = n.getValue();
+            if (e.link != null)
+                e.link = e.link.trim();
+        }
+        n = entry.getNode("id");
+        if (n != null) {
+            e.id = n.getValue();
+            if (e.id != null)
+                e.id = e.id.trim();
+        }
+        n = entry.getNode("updated");
+        if (n != null) {
+            String v = n.getValue();
+            if (v != null) {
+                long time = RFC3339Date.parse3339Date(v.trim());
+                if (time > 0)
+                    e.updated = time;
+            }
+        }
+        n = entry.getNode("summary");
+        if (n != null) {
+            e.summary = n.getValue();
+            if (e.summary != null)
+                e.summary = e.summary.trim();
+        }
+        n = entry.getNode("author");
+        if (n != null) {
+            n = n.getNode("name");
+            if (n != null) {
+                e.authorName = n.getValue();
+                if (e.authorName != null)
+                    e.authorName = e.authorName.trim();
+            }
+        }
+        n = entry.getNode("content");
+        if (n != null) {
+            String a = n.getAttributeValue("type");
+            if (a.length() > 0)
+                e.contentType = a;
+            // now recursively sanitize
+            // and convert everything in the content to string
+            StringBuilder buf = new StringBuilder(256);
+            for (int i = 0; i < n.getNNodes(); i++) {
+                Node sn = n.getNode(i);
+                XMLParser.toString(buf, sn);
+            }
+            e.content = buf.toString();
+        }
+        return e;
+    }
+
+    /**
+     *  @return success
+     */
+    public static boolean delete(I2PAppContext ctx, NewsEntry entry) {
+        String id = entry.id;
+        if (id == null)
+            return false;
+        String name = idToName(ctx, id);
+        File dir = new File(ctx.getConfigDir(), DIR);
+        File file = new File(dir, name);
+        return file.delete();
+    }
+
+    /**
+     *  @param id non-null
+     */
+    private static String idToName(I2PAppContext ctx, String id) {
+        byte[] bid = DataHelper.getUTF8(id);
+        byte[] hash = new byte[Hash.HASH_LENGTH];
+        ctx.sha().calculateHash(bid, 0, bid.length, hash, 0);
+        return PFX + Base64.encode(hash) + SFX;
+    }
+
+    public static void main(String[] args) {
+        if (args.length != 1) {
+            System.err.println("Usage: PersistNews file.xml");
+            System.exit(1);
+        }
+        I2PAppContext ctx = new I2PAppContext();
+        Debug.initialize(ctx);
+        XMLParser parser = new XMLParser(ctx);
+        InputStream in = null;
+        try {
+            in = new FileInputStream(args[0]);
+            Node root = parser.parse(in);
+            List<Node> entries = NewsXMLParser.getNodes(root, "entry");
+            store(ctx, entries);
+            System.out.println("Stored " + entries.size() + " entries");
+        } catch (ParserException pe) {
+            System.out.println("load error from " + args[0]);
+            pe.printStackTrace();
+        } catch (IOException ioe) {
+            System.out.println("load error from " + args[0]);
+            ioe.printStackTrace();
+        } finally {
+            if (in != null) try { in.close(); } catch (IOException ioe) {}
+        }
+        List<NewsEntry> entries = load(ctx);
+        System.out.println("Loaded " + entries.size() + " news entries");
+        for (int i = 0; i < entries.size(); i++) {
+            NewsEntry e = entries.get(i);
+            System.out.println("\n****** News #" + (i+1) + ": " + e.title + '\n' + e.content);
+        }
+    }
+}
-- 
GitLab