--[=====================================================================[ v0.8 Copyright © 2013-2018 Gavin Kistner ; MIT Licensed See http://github.com/Phrogz/SLAXML for details. --]=====================================================================] local SLAXML = { VERSION = "0.8", _call = { pi = function(target,content) print(string.format("",target,content)) end, comment = function(content) print(string.format("",content)) end, startElement = function(name,nsURI,nsPrefix) io.write("<") if nsPrefix then io.write(nsPrefix,":") end io.write(name) if nsURI then io.write(" (ns='",nsURI,"')") end print(">") end, attribute = function(name,value,nsURI,nsPrefix) io.write(' ') if nsPrefix then io.write(nsPrefix,":") end io.write(name,'=',string.format('%q',value)) if nsURI then io.write(" (ns='",nsURI,"')") end io.write("\n") end, text = function(text,cdata) print(string.format(" %s: %q",cdata and 'cdata' or 'text',text)) end, closeElement = function(name,nsURI,nsPrefix) io.write("") end, } } function SLAXML:parser(callbacks) return { _call=callbacks or self._call, parse=SLAXML.parse } end function SLAXML:parse(xml,options) if not options then options = { stripWhitespace=false } end -- Cache references for maximum speed local find, sub, gsub, char, push, pop, concat = string.find, string.sub, string.gsub, string.char, table.insert, table.remove, table.concat local first, last, match1, match2, match3, pos2, nsURI local unpack = unpack or table.unpack local pos = 1 local state = "text" local textStart = 1 local currentElement={} local currentAttributes={} local currentAttributeCt -- manually track length since the table is re-used local nsStack = {} local anyElement = false local utf8markers = { {0x7FF,192}, {0xFFFF,224}, {0x1FFFFF,240} } local function utf8(decimal) -- convert unicode code point to utf-8 encoded character string if decimal<128 then return char(decimal) end local charbytes = {} for bytes,vals in ipairs(utf8markers) do if decimal<=vals[1] then for b=bytes+1,2,-1 do local mod = decimal%64 decimal = (decimal-mod)/64 charbytes[b] = char(128+mod) end charbytes[1] = char(vals[2]+decimal) return concat(charbytes) end end end local entityMap = { ["lt"]="<", ["gt"]=">", ["amp"]="&", ["quot"]='"', ["apos"]="'" } local entitySwap = function(orig,n,s) return entityMap[s] or n=="#" and utf8(tonumber('0'..s)) or orig end local function unescape(str) return gsub( str, '(&(#?)([%d%a]+);)', entitySwap ) end local function finishText() if first>textStart and self._call.text then local text = sub(xml,textStart,first-1) if options.stripWhitespace then text = gsub(text,'^%s+','') text = gsub(text,'%s+$','') if #text==0 then text=nil end end if text then self._call.text(unescape(text),false) end end end local function findPI() first, last, match1, match2 = find( xml, '^<%?([:%a_][:%w_.-]*) ?(.-)%?>', pos ) if first then finishText() if self._call.pi then self._call.pi(match1,match2) end pos = last+1 textStart = pos return true end end local function findComment() first, last, match1 = find( xml, '^', pos ) if first then finishText() if self._call.comment then self._call.comment(match1) end pos = last+1 textStart = pos return true end end local function nsForPrefix(prefix) if prefix=='xml' then return 'http://www.w3.org/XML/1998/namespace' end -- http://www.w3.org/TR/xml-names/#ns-decl for i=#nsStack,1,-1 do if nsStack[i][prefix] then return nsStack[i][prefix] end end error(("Cannot find namespace for prefix %s"):format(prefix)) end local function startElement() anyElement = true first, last, match1 = find( xml, '^<([%a_][%w_.-]*)', pos ) if first then currentElement[2] = nil -- reset the nsURI, since this table is re-used currentElement[3] = nil -- reset the nsPrefix, since this table is re-used finishText() pos = last+1 first,last,match2 = find(xml, '^:([%a_][%w_.-]*)', pos ) if first then currentElement[1] = match2 currentElement[3] = match1 -- Save the prefix for later resolution match1 = match2 pos = last+1 else currentElement[1] = match1 for i=#nsStack,1,-1 do if nsStack[i]['!'] then currentElement[2] = nsStack[i]['!']; break end end end currentAttributeCt = 0 push(nsStack,{}) return true end end local function findAttribute() first, last, match1 = find( xml, '^%s+([:%a_][:%w_.-]*)%s*=%s*', pos ) if first then pos2 = last+1 first, last, match2 = find( xml, '^"([^<"]*)"', pos2 ) -- FIXME: disallow non-entity ampersands if first then pos = last+1 match2 = unescape(match2) else first, last, match2 = find( xml, "^'([^<']*)'", pos2 ) -- FIXME: disallow non-entity ampersands if first then pos = last+1 match2 = unescape(match2) end end end if match1 and match2 then local currentAttribute = {match1,match2} local prefix,name = string.match(match1,'^([^:]+):([^:]+)$') if prefix then if prefix=='xmlns' then nsStack[#nsStack][name] = match2 else currentAttribute[1] = name currentAttribute[4] = prefix end else if match1=='xmlns' then nsStack[#nsStack]['!'] = match2 currentElement[2] = match2 end end currentAttributeCt = currentAttributeCt + 1 currentAttributes[currentAttributeCt] = currentAttribute return true end end local function findCDATA() first, last, match1 = find( xml, '^', pos ) if first then finishText() if self._call.text then self._call.text(match1,true) end pos = last+1 textStart = pos return true end end local function closeElement() first, last, match1 = find( xml, '^%s*(/?)>', pos ) if first then state = "text" pos = last+1 textStart = pos -- Resolve namespace prefixes AFTER all new/redefined prefixes have been parsed if currentElement[3] then currentElement[2] = nsForPrefix(currentElement[3]) end if self._call.startElement then self._call.startElement(unpack(currentElement)) end if self._call.attribute then for i=1,currentAttributeCt do if currentAttributes[i][4] then currentAttributes[i][3] = nsForPrefix(currentAttributes[i][4]) end self._call.attribute(unpack(currentAttributes[i])) end end if match1=="/" then pop(nsStack) if self._call.closeElement then self._call.closeElement(unpack(currentElement)) end end return true end end local function findElementClose() first, last, match1, match2 = find( xml, '^', pos ) if first then nsURI = nil for i=#nsStack,1,-1 do if nsStack[i]['!'] then nsURI = nsStack[i]['!']; break end end else first, last, match2, match1 = find( xml, '^', pos ) if first then nsURI = nsForPrefix(match2) end end if first then finishText() if self._call.closeElement then self._call.closeElement(match1,nsURI) end pos = last+1 textStart = pos pop(nsStack) return true end end while pos<#xml do if state=="text" then if not (findPI() or findComment() or findCDATA() or findElementClose()) then if startElement() then state = "attributes" else first, last = find( xml, '^[^<]+', pos ) pos = (first and last or pos) + 1 end end elseif state=="attributes" then if not findAttribute() then if not closeElement() then error("Was in an element and couldn't find attributes or the close.") end end end end if not anyElement then error("Parsing did not discover any elements") end if #nsStack > 0 then error("Parsing ended with unclosed elements") end end return SLAXML