#!/usr/bin/env ruby # Usage: getheadings delimiter = "," pdf = ARGV.shift headings = ARGV.join("").split(delimiter) raise "#{pdf} does not exist" unless File.exist?(pdf) raise "Failed to read #{pdf}." unless File.stat(pdf).readable? && File.stat(pdf).file? raise "#{pdf} is not a PDF file" unless /^\%PDF-[\d\.]+\s*$/i === File.new(pdf).readline cmd = "pdftohtml -q -stdout -noframes #{pdf}" pages = [] @returns = {} manual = IO.popen(cmd).read.split("
")[0..-3] manual.each do |page| /^<\/a>\w+\<\/b>
\n(\w+)
$/.match(page) pages << [$1,$2] end headings.each do |heading| manual.each do |page| pnum = pages[manual.index(page)] regex = /^#{Regexp.escape(heading)}<\/b>
$/ @returns[heading] = "#{pnum[0]},#{pnum[1]}" if regex =~ page end end @returns.each { |r| puts r[1] }