require 'strscan'

# A Simple Recursive Descending (LL) Parser for Ruby
class RDParser
  # change to true to get flooded with debug messages during parsing
  DEBUG = false
  
  # Class used to accept Ruby metafoo when specifying grammars with a block and virtual methods
  class GenericBlockToHashCreator; def method_missing(m, *args); @h ||= {}; @h[m] = args.first; @h; end; end
  
  # Creates a parser based on a certain grammar provided either as a hash or as
  # methods within a block (that then uses GrammarCreator)
  def initialize(grammar = '')
    @grammar = grammar == '' ? yield(GenericBlockToHashCreator.new) : grammar.dup
  end
  
  # Parse content using the parser object's grammar set
  def parse(rule, content, options = {})
    # @depth is used to make debugging output look nice and nested
    @depth = -1
    
    # Create a string scanner object based on the content
    @content = Scanner.new(content, :slurp_whitespace => !options[:manual_whitespace])
    
    # Kick off the show by parsing from the rule specified
    parse_section(rule.to_sym).flatten
  end
  
  # Parse the content based on the rulesets for a particular grammar context
  def parse_section(rule)
    # Increase the depth of parsing - used for debug messages currently
    @depth += 1
    
    # For each distinct set of rules within a ruleset, get parsing..
    sub_rulesets(rule).each do |ruleset|
      RDParser::DEBUG && debug_message("RULE SET : #{ruleset}")
      
      # By default, we assume failure!
      success = false
      output = []
      
      # Keep a local copy of the current position to roll back to if things get hairy..
      was_position = @content.position            

      # Go through each rule in this ruleset
      sub_rules(ruleset).each do |r|
        RDParser::DEBUG && debug_message("Running rule '#{r}' against '#{@content.lookahead}'")
        suboutput = []

        # Match a rule with 1 or more occurrences (e.g. "rule(s)") 
        if r =~ /(\w+)(\(s\))$/
          r = $1
          
          # Force the first occurrence to succeed or break this sub-ruleset
          unless result = match_for_rule(r.to_sym)
            success = false
            break
          end

          suboutput.append_or_blend result
          
          # Now pick up any of the "or more" occurrences for free
          while result = match_for_rule(r.to_sym)
            suboutput.append_or_blend result
          end
          
        # Match a rule with 0 or more occurrences (e.g. "rule(s?)")
        elsif r =~ /(\w+)(\(s\?\))$/
          r = $1
          while result = match_for_rule(r.to_sym)
            suboutput.append_or_blend result
          end
          
        # Match a rule with 0 or 1 occurrences (e.g. "rules(?)")
        elsif r =~ /(\w+)(\(\?\))$/
          r = $1
          if result = match_for_rule(r.to_sym)
            suboutput.append_or_blend result
          end
          
        # Match a rule that has one single occurrence
        else
          unless result = match_for_rule(r.to_sym)
            success = false
            break
          end
        
          suboutput.append_or_blend result
        end

        success = true
                      
        # Append the output from this rule to the output we'll use later..
        output += suboutput 
      end
      
      # We've either processed all the rules for this ruleset, or.. it failed      
      if success
        RDParser::DEBUG && debug_message("Success of all rules in #{ruleset}")
        
        # No need to check any more rulesets! We've just passed one,
        # so drop the depth a notch, we're headed back up the tree of recursion!
        @depth -= 1
        RDParser::DEBUG && debug_message("SUCCEED: #{ruleset}", :passback)
        return output
      else
        RDParser::DEBUG && debug_message("failed #{ruleset}.. moving on")

        # If the rule set failed, revert the position back to that we stored earlier
        @content.rollback_to(was_position)

        # And clean the output.. because any output we got from a broken rule is as useful
        # as an ashtray on a motorbike, a chocolate teapot, or ice cutlery.
        RDParser::DEBUG && debug_message("FAIL: #{ruleset}", :passback)
      end
    end
    
    # Well nothing passed, so this rule was totally bogus. Great. We've totally wasted our time.
    @depth -= 1
    return false
  end
  
  # Parse the content based on a single subrule
  def match_for_rule(rule)
    RDParser::DEBUG && debug_message("TRYING #{rule}")
    output = []
    rule_data = @grammar[rule]

    # If the rule is a string literal "likethis" then match it
    if rule.to_s =~ /^\"(.+)\"$/
      m = $1
      if @content.scan(m)
        RDParser::DEBUG && debug_message("GOT #{m}")
        output << {rule => m}
      else
        return false
      end
      
    # Is the rule a regular expression?
    elsif rule_data.class == Regexp
      # If we get a match.. do stuff!
      if c = @content.scan(rule_data)
        RDParser::DEBUG && debug_message("GOT IT --> #{c}")
        output << {rule => c}
      else
        # If we get no match, go and cry to mommy^H^H^H^H^Hhead up the recursion ladder
        return false
      end
    
    # Is the rule a string of other rules?
    elsif rule_data.class == String
      # A RULE THAT HAS RULES?? RECURSION IN THE HOUSE!
      response = parse_section(rule)
      
      # But did it really work out as planned?
      if response
        # Yes.. so celebrate and process the response.
        RDParser::DEBUG && debug_message("GOT #{rule}")
        return {rule => response}
      else
        # No.. so throw a hissyfit
        RDParser::DEBUG && debug_message("NOT GOT #{rule}")
        return false
      end
    end    
    output
  end
  
  # Splits a ruleset into its constituent rules or matches by whitespace
  def sub_rules(ruleset); ruleset.split(/\s+/); end
  
  # Extracts all rulesets from a single rule
  # e.g. 'rule1 rule2 | rule3 | rule4 rule 5' is three rule sets separated by bars
  def sub_rulesets(rule); @grammar[rule].split(/\s+\|\s+/); end
  
  # A 'pretty printer' for RDParser's syntax trees (pp just doesn't cut it for these beasties)
  # There's probably a nice iterative way of doing this but I'm tired.
  def RDParser.text_syntax_tree(v, depth = 0, output = '')
    if v.class == Array
      v.each { |a| output = text_syntax_tree(a, depth, output) }
    elsif v.class == Hash
      v.each do |a, b|
        output += ("  " * depth) + a.to_s
        b.class == String ? output += " => #{b}\n" : output = text_syntax_tree(b, depth + 1, output + "\n")
      end
    end
    output
  end  
  
  # Prints a debugging message if debug mode is enabled
  def debug_message(message, priority = true)
    # Let's different types of message through
    return if priority != DEBUG
    puts "#{("  " * @depth.to_i) if @depth && @depth > -1 } #{message}"
  end
end

# Provide Array with method that appends hashes, but concatenates arrays
class Array
  def append_or_blend(i)
    i.class == Hash ? self << i : self.concat(i)
  end
end

# Attached class below for simplicity of transfer at the moment.. but this should really be
# separated.

# Scanner is basically a proxy class for StringScanner that adds some convenience features.
# Could probably be a subclass actually, but this was a quick, scrappy job so I could get
# onto the fun stuff in RDParser!
class Scanner
  def initialize(string, options = {})
    @options = options
    @scanner = StringScanner.new(string.dup)
    @rollback_pointers = []
  end
  
  def position
    @scanner.pos
  end
  
  def rollback_position
    @rollback_pointers.last
  end
  
  def lookahead
    @scanner.peek(10)
  end
  
  def scan(regexp)
    space = @options[:slurp_whitespace] && @options[:slurp_whitespace] == true ? /\s*/ : //
    if regexp.class == String
      regexp = Regexp.new(regexp.gsub(/\(|\)|\+|\|/) { |a| '\\' + a })
    end
    if match = @scanner.scan(/(#{space}(#{regexp}))/)
      @rollback_pointers << (@scanner.pos - match.length)
      @options[:slurp_whitespace] && @options[:slurp_whitespace] == true ? match.sub(/^\s*/, '') : match
    else
      nil
    end
  end
  
  def rollback
    begin
      (@scanner.pos = @rollback_pointers.pop) && true
    rescue
      nil
    end
  end
  
  def rollback_to(posi)
    @scanner.pos = posi
    @rollback_pointers.delete_if { |p| p >= posi }
  end
end