Excel data extractor
From Trephine
Live Demos
- Executing arbitrary script
- Shell commands
- File browser
- Excel data extractor new!
- Persistent client side database
- Groovy demo
- JavaFX demo
- Ruby demo
[subscribe] Recent blog entries
- Simple prototypal inheritance new!
- Adventures in Rhino - setters and getters
- Site improvements - fighting with Disqus
- JavaScript task chaining
- JavaScript string building benchmarks
- Efficient JavaScript string building
- Alternative JavaScript worker thread API
- Implementing JavaScript worker threads
- Thread safe DOM manipulation
- Site improvements - CSS sprites
- Trephine worker threads made easy
- Pitfalls of multithreaded browser development
- Site improvements - reducing dependencies
- The unsplittability of XML
Excel data extractor
This demo shows how to use trephine with Apache POI to extract data from a Microsoft Excel file.
The data is displayed in a series of HTML tables, one for each sheet, and also as raw JSON data.
You must have JavaScript enabled to run this demo.
// Status and element functions. var e = function(tag,index){ return demotab.getElementsByTagName(tag)[index||0]; }; var status = function(msg) { if (!msg) return e('p').style.display = 'none'; e('p').style.display = ''; e('p').innerHTML = msg; }; // Setup demo var setup = function() { // Test for POI (may already be loaded) try { var result = trephine.js( function(){ java.lang.Class.forName('org.apache.poi.ss.usermodel.WorkbookFactory'); } ); } catch(err) { var result = { success: false, error: err.message }; } if (!result.error) { var status = 'ready'; } else { // Test whether dependencies are ready for loading var result = trephine.js( function(){ // Get handle on lib directory or create it if necessary var homedir = new java.io.File( java.lang.System.getProperty('user.home') ); var libdir = new java.io.File( homedir, '.trephine/lib' ); if (!libdir.exists()) { if (!libdir.mkdirs()) throw "Error: Could not create " + libdir.toString(); } // Check for Groovy dependencies var jars = [ 'poi-3.5-beta5-20090219.jar', 'poi-ooxml-3.5-beta5-20090219.jar', 'xmlbeans-2.3.0.jar', 'ooxml-schemas-1.0.jar', 'log4j-1.2.13.jar', 'dom4j-1.6.1.jar' ]; for (var i=0, l=jars.length; i<l; i++) { var jarfile = new java.io.File( libdir, jars[i] ); if (!jarfile.exists()) return 'missing'; }; return 'loadable'; } ); if (result.error) return demotab.innerHTML = '<div class="error">' + result.error + '</div>'; var status = result.result.toString() + ''; } if (status=='missing') { e('div',1).style.display = ''; } else if (status=='ready') { e('form').style.display = ''; } else if (status=='loadable') { var result = trephine.js( function(){ var jars = [ 'poi-3.5-beta5-20090219.jar', 'poi-ooxml-3.5-beta5-20090219.jar', 'xmlbeans-2.3.0.jar', 'ooxml-schemas-1.0.jar', 'log4j-1.2.13.jar', 'dom4j-1.6.1.jar' ]; var homedir = new java.io.File( java.lang.System.getProperty('user.home') ); var libdir = new java.io.File( homedir, '.trephine/lib' ); var executor = java.lang.Thread.currentThread(); for (var i=0, l=jars.length; i<l; i++) { var jarfile = new java.io.File(libdir, jars[i]); executor.addSystemJar(jarfile); } return true; } ); if (result.error) return demotab.innerHTML = '<div class="error">' + result.error.toString() + '</div>'; e('form').style.display = ''; } else { return demotab.innerHTML = '<div class="error">Unknown status [' + status + '] returned.</div>'; } // Setup download link e('a').onclick = function(evt) { if (!evt) evt = window.event; evt.cancelBubble = true; if (evt.stopPropagation) evt.stopPropagation(); var result = trephine.js( function() { // Shorthand mechanism for creating and starting background threads. Function.prototype.run = function() { var thread = new java.lang.Thread( new java.lang.Runnable( { run: this } ) ); thread.start(); return thread; }; // Helper function for downloading a file asynchronously function download( fromURL, toFile, options ) { options = options || {}; var onprogress = typeof options.onprogress == "undefined" ? null : options.onprogress; var interval = typeof options.interval == "undefined" ? 500 : options.interval; var async = typeof options.async == "undefined" ? true : options.async; var f = function(){ var conn = fromURL.openConnection(); var len = conn.contentLength; var bis = new java.io.BufferedInputStream(conn.inputStream); var out = new java.io.BufferedOutputStream(new java.io.FileOutputStream(toFile)); var b, count = 0, n = 0; var buf = java.lang.reflect.Array.newInstance(java.lang.Byte.TYPE, 4096); if (!onprogress) { while ((n = bis.read(buf, 0, buf.length)) != -1) out.write(buf, 0, n); out.flush(); bis.close(); out.close(); return; } var t = (function(){ while (count < len) { onprogress(count, len); java.lang.Thread.currentThread().sleep(interval); } }).run(); while ((n = bis.read(buf, 0, buf.length)) != -1) { count += n; out.write(buf, 0, n); } t.stop(); out.flush(); bis.close(); out.close(); onprogress(count, len); }; return async ? f.run() : f(); }; // Download POI dependencies, add them to the system classpath and register the engine factory var executor = java.lang.Thread.currentThread(); (function(){ var jars = [ 'poi-3.5-beta5-20090219.jar', 'poi-ooxml-3.5-beta5-20090219.jar', 'xmlbeans-2.3.0.jar', 'ooxml-schemas-1.0.jar', 'log4j-1.2.13.jar', 'dom4j-1.6.1.jar' ]; var jarids = ['poi-beta', 'poi-ooxml', 'xmlbeans', 'ooxml-schemas', 'log4j', 'dom4j']; var homedir = new java.io.File( java.lang.System.getProperty('user.home') ); var libdir = new java.io.File( homedir, '.trephine/lib' ); var base = applet['c'+'lass'].protectionDomain.codeSource.location.toString().replaceFirst('^(.*'+'/).*$', "$1"); for (var i=0, l=jarids.length; i<l; i++) (function(id, jar){ var jarfile = new java.io.File( libdir, jar ); var msg = "<span class=\"spinner\"></span> " + jar + " <span>0</span>% complete." window.eval("window.top.document.getElementById('"+id+"').innerHTML = '"+msg+"';"); download(new java.net.URL(base + jar), jarfile, { async: false, onprogress: function(count, len) { if (count >= len) { var msg = "<span class=\"check\"></span> " + jar + " Done!"; return window.eval([ "window.top.document.getElementById('", id, "').innerHTML = '", msg, "';" ].join('')); } window.eval([ "window.top.document.getElementById('", id, "').getElementsByTagName('span')[1].innerHTML = ", "'" + Math.round(100 * count / len) + "';" ].join('')); } }); executor.addSystemJar(jarfile); })(jarids[i], jars[i]); // Switch back from download to main demo form var code = '(' + function(){ var e = function(tag,index){ return window.top.document.getElementById('demotab').getElementsByTagName(tag)[index||0]; }; e('div',1).style.display = 'none'; e('p').innerHTML = 'Dependencies successfully loaded!'; e('p').style.display = ''; e('form').style.display = 'block'; setTimeout(function(){ e('p').style.display='none'; }, 2000); } + ')()'; window.eval( code.replace(new RegExp('\\r?\\n','g'), '') ); }).run(); } ); e('p',2).innerHTML = 'Download in progress...'; return false; }; // Setup form e('form').onsubmit = function(event) { try { if (!trephine || !trephine.loaded) throw "Error: trephine not loaded."; if (!trephine.hasPermission()) throw "Error: elevated permissions have not been granted."; e('div').innerHTML = '<p class="notice" style="float:none"><span class="spinner"></span> Extracting data ...</p>'; setTimeout( function(){ // Here's where the real work gets done var result = trephine.js( function(filename) { // First, confirm that the file is actually a file that we can read filename = new java.lang.String(filename); var file = new java.io.File(filename); if (!file.exists()) throw "Error: File does not exist " + file.toString(); if (!file.isFile()) throw "Error: Selected path is not a file " + file.toString(); if (!file.canRead()) throw "Error: The selected file cannot be read " + file.toString(); // Now we try to create a WorkBook try { var inp = new java.io.FileInputStream(file); var wb = Packages.org.apache.poi.ss.usermodel.WorkbookFactory.create(inp); if (!wb) throw "WorkBook is null"; } catch(err) { throw "Error creating workbook from file - " + err.message; } // Cell value extractor var cellValue = function(cell) { if (!cell) return ''; var t = cell.cellType, c = Packages.org.apache.poi.ss.usermodel.Cell; var du = Packages.org.apache.poi.ss.usermodel.DateUtil; if (t == c.CELL_TYPE_NUMERIC) return du.isCellDateFormatted(cell) ? cell.dateCellValue + '' : cell.numericCellValue + 0.0; if (t == c.CELL_TYPE_STRING) return cell.stringCellValue + ''; if (t == c.CELL_TYPE_FORMULA) { try { return du.isCellDateFormatted(cell) ? cell.dateCellValue + '' : cell.numericCellValue + 0.0; } catch(err) { } try { return cell.stringCellValue + ''; } catch(err) { } try { return cell.numericCellValue + 0.0; } catch(err) { } try { return !!cell.booleanCellValue; } catch(err) { } try { return cell.errorCellValue + ''; } catch(err) { } return ''; } if (t == c.CELL_TYPE_BLANK) return ''; if (t == c.CELL_TYPE_BOOLEAN) return !!cell.booleanCellValue; if (t == c.CELL_TYPE_ERROR) return cell.errorCellValue + ''; return ''; }; // Build a JSON data model of the workbook's sheets and return it var sheets = []; var p = function(s) { java.lang.System.out.println(s); }; for (var i=0, l=wb.numberOfSheets; i<l; i++) { var sheet = { data: [], cols: 0, name: wb.getSheetName(i) + '' }; var rit = wb.getSheetAt(i).rowIterator(); while (rit.hasNext()) { var row = [], cols = 0, cit = rit.next().cellIterator(); while (cit.hasNext()) row[cols++] = cellValue(cit.next()); sheet.data[sheet.data.length] = row; if (cols > sheet.cols) sheet.cols = cols; } if (sheet.data.length) sheets[sheets.length] = sheet; } return JSON.stringify(sheets); }, e('input').value ); // Show an error message if anything went wrong if (result.success.toString() != 'true') { var msg = result.error; try { msg = msg.toString() + ''; } catch(err) { } return e('div').innerHTML = [ '<p class="error">', msg, '</p>', '<p><strong>Tip</strong>: Enable the Java Console to see full traces.<br />' ].join(''); } var json = result.result; try { json = json.toString() + ''; } catch(err) {} try { var sheets = eval('(' + json + ')'); } catch(err) { return e('div').innerHTML = '<p class="error">Error extracting data: ' + err.message + '</p>'; } // Extraction successful, build HTML table var output = ['<h4>HTML Data:</h4>']; for (var i=0, l=sheets.length; i<l; i++) { var sheet = sheets[i]; output[output.length] = "<h5>" + sheet.name + "</h5><div class=\"datatable\"><table class=\"wikitable\"><tbody>"; for (var j=0, m=sheet.data.length; j<m; j++) { var row = sheet.data[j]; output[output.length] = '<tr>'; for (var k=0, n=sheet.cols; k<n; k++) { output[output.length] = '<td>'; if (k<row.length) output[output.length] = row[k]; output[output.length] = '</td>'; } output[output.length] = '</tr>'; } output[output.length] = '</tbody></table></div>'; } output[output.length] = '<h4>JSON Data:</h4><textarea id="jsondata"></textarea>'; // Set the output e('div').innerHTML = output.join(''); // Shrink any datatable div's containing small tables var divs = e('div').getElementsByTagName('div'); for (var i=0, l=divs.length; i<l; i++) { var div = divs[i], table = div.getElementsByTagName('table')[0]; if (!table) continue; if (table.clientHeight < div.clientHeight) div.style.height = table.clientHeight + 'px'; } // Inject the JSON data as a new text node document.getElementById('jsondata').value = json; }, 10 ); } catch(err) { e('div').innerHTML = '<p class="errorbox" style="float:none">' + err + '</p>'; } return false; }; // Setup the browse button e('input', 1).onclick = function(evt) { window._browseCallback = function(path) { e('input').value = path.toString() + ''; delete window._browseCallback; }; var result = trephine.js( function() { (new java.lang.Thread(new java.lang.Runnable({ run: function() { var dialog = new java.awt.FileDialog( new java.awt.Frame(), "Choose a directory", java.awt.FileDialog.LOAD ); dialog.show(); var path = (dialog.directory + dialog.file + '').replace(new RegExp('\\\\','g'), '\\\\'); path = JSON.stringify(path); window.eval("setTimeout('window.top._browseCallback(" + path + ")', 10)"); } }))).start(); return true; } ); if (result.error) throw "Error: " + result.error; }; }; // Load trephine status('<span class="spinner"></span> Loading trephine...'); trephine.load({ debug: true, // Enables debug logging to the Java Console onload: function() { // Immediately prompt for permissions status('<span class="spinner"></span> Asking for elevated permissions...'); trephine.askPermission( function(response) { if (!response) { // Short-circuit if permission request was denied return demotab.innerHTML = '<div class="error">Sorry, you must grant trephine privileges to enjoy this demo.</div>'; } setup(); status('Permission request granted.'); setTimeout( function(){ status(); }, 2000 ); } ); } });
<p class="notice"></p> <form style="display:none"> <h4>Excel File:</h4> <p> <input class="filepath" type="text" value=""> <input class="browse" type="button" value="browse"/> <input class="extract" type="submit" value="extract"/> </p> <div></div> </form> <div style="display:none"> <p class="notice"> This demo requires the following POI support jars. <strong><a href="" title="begin download">Click here to begin download</a></strong> </p> <ul> <li id="poi-beta">poi-3.5-beta5-20090219.jar (1.5 MB)</li> <li id="poi-ooxml">poi-ooxml-3.5-beta5-20090219.jar (333 KB)</li> <li id="xmlbeans">xmlbeans-2.3.0.jar (2.6 MB)</li> <li id="ooxml-schemas">ooxml-schemas-1.0.jar (13.6 MB)</li> <li id="log4j">log4j-1.2.13.jar (350 KB)</li> <li id="dom4j">dom4j-1.6.1.jar (307 KB)</li> </ul> </div>
.demotab h4 { font-weight: bold; } .demotab .filepath { width: 400px; } .demotab td { border-bottom: 1px solid #ccc; border-left: 1px solid #ccc; white-space: nowrap; min-width: 6em; } .demotab table { border-right: 1px solid #ccc; } .demotab .spinner { padding: 10px 10px; background: url("skins/common/images/spinner.gif"); background-position: center center; background-repeat: no-repeat; } .demotab .check { padding: 8px 10px; background: url("skins/trephine/accept.gif"); background-position: center center; background-repeat: no-repeat; } .demotab form div p { overflow: auto; } .demotab .datatable { height: 358px; margin-bottom: 18px; overflow: auto; border: 1px solid #ccc; border-top: none; } .demotab h5 { background: #C3D9FF; margin-bottom: 0; border: 1px solid #ccc; height: 16px; padding: 0 1em; } .demotab textarea { height: 358px; overflow: auto; margin-bottom: 18px; border: 1px solid #ccc; background: #fffff0; width: 602px; }
Tip: All code on this page is available under the MIT license as explained on the about page.
Activation
- Activate the demo by clicking the specified link.
- You may see a Java Security dialog, if so, click "Run" or "Trust" to indicate your acceptance
- In the "Trephine permission request" dialog, answer the math challenge and click "OK" to grant elevated privileges.
- If this is your first time running the demo, you will be prompted to download the POI dependencies, click the link to do so.
- Once all the dependency jars have been downloaded, the demo will be ready to use.
Usage
- Click the browse button and select an Excel file to open, or type in a path directly.
- Click the execute button to initiate data extraction, data will appear below.
- Repeat as many times as you like.
Caveats and Gotchas
Support for Microsoft Office 2007 Excel files is still in beta. Once POI releases a stable version with xlsx support, this shouldn't be a problem.
The Excel input files I've used so far have been very simple, so it's very likely that using a complex spreadsheet may break the demo. If that happens, and you're able to share the Excel file that did it, please leave a comment below, or email me directly. Thanks in advance!
--Jim R. Wilson (jimbojw) 07:29, 28 March 2009 (UTC)