diff --git a/ghidra-plugin/src/main/scala/grease/GreaseAnalyzer.scala b/ghidra-plugin/src/main/scala/grease/GreaseAnalyzer.scala index 306fc8c6..eb49729e 100644 --- a/ghidra-plugin/src/main/scala/grease/GreaseAnalyzer.scala +++ b/ghidra-plugin/src/main/scala/grease/GreaseAnalyzer.scala @@ -26,41 +26,16 @@ import ghidra.util.Msg import scala.concurrent.duration._ import ghidra.app.services.AnalysisPriority import ghidra.program.model.listing.CommentType - -object GreaseBackgroundCmd { - val GREASE_BOOKMARK_TYPE = "GREASE" - - def greaseBookmark( - prog: Program, - addr: Address, - category: String, - note: String - ): Unit = { - prog - .getBookmarkManager() - .setBookmark(addr, GREASE_BOOKMARK_TYPE, category, note) - } - - def addComment(comm: String, prog: Program, toAddr: Address): Unit = { - val prevCom = Option( - prog - .getListing() - .getComment(CommentType.PRE, toAddr) - ) - val nextCom = - prevCom - .getOrElse("") + comm - prog - .getListing() - .setComment(toAddr, CommentType.PRE, nextCom) - } -} +import java.io.File class GreaseBackgroundCmd( val entrypoints: AddressSetView, timeout: Option[FiniteDuration], loadBase: Option[Long], - rawMode: Boolean + rawMode: Boolean, + overridesFile: Option[File], + loopBound: Option[Int], + useDebug: Boolean ) extends BackgroundCommand[Program] { override def applyTo(prog: Program, monitor: TaskMonitor): Boolean = { @@ -79,31 +54,29 @@ class GreaseBackgroundCmd( val res = GreaseWrapper( prog ).runGrease( - GreaseConfiguration(targetBin, item, timeout, loadBase, rawMode) + GreaseConfiguration( + targetBin, + item, + timeout, + loadBase, + rawMode, + overridesFile, + loopBound, + useDebug + ) ) res match case Failure(exception) => { Msg.warn(this, s"GREASE could not analyze ${item}, ${exception}") - GreaseBackgroundCmd.addComment( + ParsedBatch.addComment( s"\n Failed to analyze function with GREASE", prog, item ) } - case scala.util.Success(bugs) => { - for bug <- bugs.possibleBugs do - GreaseBackgroundCmd.addComment( - s"\n Possible bug: ${bug.description.render()}", - prog, - bug.appliedTo - ) - GreaseBackgroundCmd.greaseBookmark( - prog, - bug.appliedTo, - "Possible bug", - "GREASE detected a potential bug" - ) + case scala.util.Success(items) => { + for bug <- items.possibleBugs do bug.printToProg(prog) } monitor.increment() @@ -127,6 +100,9 @@ object GreaseAnalyzer { val RAW_MODE_OPT: String = "Raw mode" val LOAD_BASE_OPT: String = "Load base" val USE_IMAGE_BASE_AS_LOAD_BASE_OPT: String = "Use image base as load base" + val OVERRIDE_FILE: String = "Override file" + val LOOP_BOUND_OPT: String = "Loop bound" + val SHOULD_USE_DEBUG_OPT: String = "Use debug info for shapes" def getOption[T](options: Options, optName: String): Option[T] = { Option(options.getObject(optName, null)) @@ -150,9 +126,12 @@ class GreaseAnalyzer var timeoutDuration: Option[FiniteDuration] = None var loadBase: Option[Long] = None + var loopBound: Option[Int] = None var shouldLoadRaw = false var useImageBaseAsLoadBase = true + var overridesFile: Option[File] = None val supportedProcs: Set[String] = Set("ARM", "PowerPC", "x86") + var shouldUseDebug = false setSupportsOneTimeAnalysis() @@ -180,6 +159,13 @@ class GreaseAnalyzer useImageBaseAsLoadBase = GreaseAnalyzer .getOption[Boolean](x, GreaseAnalyzer.USE_IMAGE_BASE_AS_LOAD_BASE_OPT) .getOrElse(true) + overridesFile = + GreaseAnalyzer.getOption[File](x, GreaseAnalyzer.OVERRIDE_FILE) + loopBound = GreaseAnalyzer + .getOption[Int](x, GreaseAnalyzer.LOOP_BOUND_OPT).filter(l => l != 0) + shouldUseDebug = GreaseAnalyzer + .getOption[Boolean](x, GreaseAnalyzer.SHOULD_USE_DEBUG_OPT) + .getOrElse(false) } override def registerOptions(options: Options, program: Program): Unit = { @@ -214,6 +200,30 @@ class GreaseAnalyzer null, "Uses the image base of this Ghidra binary as the load base when in raw mode (this is the default, disable to use custom image base)" ) + + options.registerOption( + GreaseAnalyzer.OVERRIDE_FILE, + OptionType.FILE_TYPE, + null, + null, + "Override file to use in GREASE" + ) + + options.registerOption( + GreaseAnalyzer.LOOP_BOUND_OPT, + OptionType.INT_TYPE, + 0, + null, + "Loop bound limit for analysis" + ) + + options.registerOption( + GreaseAnalyzer.SHOULD_USE_DEBUG_OPT, + OptionType.BOOLEAN_TYPE, + true, + null, + "Uses any available DWARF information to populate shapes with type signatures" + ) } @throws(classOf[CancelledException]) @@ -223,7 +233,15 @@ class GreaseAnalyzer monitor: TaskMonitor, log: MessageLog ): Boolean = { - GreaseBackgroundCmd(set, timeoutDuration, loadBase, shouldLoadRaw) + GreaseBackgroundCmd( + set, + timeoutDuration, + loadBase, + shouldLoadRaw, + overridesFile, + loopBound, + shouldUseDebug + ) .applyTo(program, monitor) true } diff --git a/ghidra-plugin/src/main/scala/grease/GreaseWrapper.scala b/ghidra-plugin/src/main/scala/grease/GreaseWrapper.scala index e9eaf9c0..0de2f7b7 100644 --- a/ghidra-plugin/src/main/scala/grease/GreaseWrapper.scala +++ b/ghidra-plugin/src/main/scala/grease/GreaseWrapper.scala @@ -13,15 +13,21 @@ import scala.util.{Failure, Success} import scala.Serializable import scala.jdk.CollectionConverters._ import ghidra.framework.Application +import java.io.File +import ghidra.program.model.listing.CommentType object AddrConversions { - def greaseOffsetToAddr(greaseOffset: Long, prog: Program): Address = + def greaseOffsetToAddr( + greaseOffset: Long, + prog: Program, + loadOffset: Long + ): Address = // Convert a greaseOffset in an ELF to a Ghidra address. // This function assumes the greaseOffset is in memory in the default address space (where code will be) // GREASE loads the binary at the elf base so the offset is relative to the ELF base. // Then we add the raw offset to image base in Ghidra. - val addressOffset = (greaseOffset - ElfLoader + val addressOffset = (greaseOffset - loadOffset - ElfLoader .getElfOriginalImageBase(prog)) prog.getImageBase().add(addressOffset) @@ -33,7 +39,7 @@ object GreaseConfiguration { trait AddressingMode { def ghidraAddressToGreaseOffset(addr: Address): Long - def greaseOffsettoGhidraAddress(greaseOff: Long): Address + def greaseOffsettoGhidraAddress(greaseOff: Long, loadOffset: Long): Address val loadBase: Option[Long] } @@ -43,8 +49,11 @@ class ELFAddressing(val prog: Program) extends AddressingMode { addr.subtract(prog.getImageBase()) + ElfLoader .getElfOriginalImageBase(prog) - override def greaseOffsettoGhidraAddress(greaseOff: Long): Address = - AddrConversions.greaseOffsetToAddr(greaseOff, prog) + override def greaseOffsettoGhidraAddress( + greaseOff: Long, + loadOffset: Long + ): Address = + AddrConversions.greaseOffsetToAddr(greaseOff, prog, loadOffset) override val loadBase: Option[Long] = None @@ -71,7 +80,10 @@ class RawBase(val loadBaseOption: Option[Long], prog: Program) override def ghidraAddressToGreaseOffset(addr: Address): Long = addr.getOffset() - override def greaseOffsettoGhidraAddress(greaseOff: Long): Address = + override def greaseOffsettoGhidraAddress( + greaseOff: Long, + loadOffset: Long + ): Address = toRAMAddress(greaseOff) override val loadBase: Option[Long] = Some(loadBaseAddr.getOffset()) @@ -83,7 +95,10 @@ case class GreaseConfiguration( // None uses the default for grease timeout: Option[FiniteDuration], loadBase: Option[Long], - isRawBinary: Boolean + isRawBinary: Boolean, + overridesFile: Option[File], + loopBound: Option[Int], + useDebugInfo: Boolean ) { val GHIDRA_THUMB_REG_NAME = "TMode" @@ -121,6 +136,14 @@ case class GreaseConfiguration( "--address", GreaseConfiguration.renderAddress(modAddr) ) + + def optionalCommandLine[A]( + name: String, + value: Option[A], + f: A => String + ): Seq[String] = + value.map(x => Seq(name, f(x))).getOrElse(Seq()) + val rawLine = if isRawBinary then Seq("--raw-binary") else Seq() val timeoutLine = timeout.map(x => Seq("--timeout", x.toMillis.toString)).getOrElse(Seq()) @@ -128,25 +151,40 @@ case class GreaseConfiguration( .filter(_ => isRawBinary) .map(x => Seq("--load-base", GreaseConfiguration.renderAddress(x))) .getOrElse(Seq()) + val overrideLine = + overridesFile.map(x => Seq("--overrides", x.getPath())).getOrElse(Seq()) + + val useDebugLine = + if useDebugInfo then Seq("--use-debug-info-types") else Seq() + baseLine ++ rawLine ++ timeoutLine ++ baseAddr + ++ overrideLine ++ optionalCommandLine( + "--loop-bound", + loopBound, + x => x.toString + ) ++ useDebugLine } } case class GreaseException(val msg: String) extends Exception object GreaseResult { - def parseBatch(batch: String, addrs: AddressingMode): Option[PossibleBug] = { - val js = ujson.read(batch) - val hasBug = js("batchStatus")("tag").str == "BatchBug" - if !hasBug then return None + def parseLoc(locStr: String, addrs: AddressingMode, loadOffset: Long) = { + // drops 0x, I dont know of a java method that parses based on format + val loc = Integer.parseInt(locStr.drop(2), 16).toLong + addrs.greaseOffsettoGhidraAddress(loc, loadOffset) + } - val contents = js("batchStatus")("contents") + def parseBug( + ent: Address, + addrs: AddressingMode, + contents: ujson.Value, + loadOffset: Long + ): Option[PossibleBug] = { val desc = contents("bugDesc") + try { - val base = 16 - // drops 0x, I dont know of a java method that parses based on format - val loc = Integer.parseInt(desc("bugLoc").str.drop(2), base).toLong - val addr = addrs.greaseOffsettoGhidraAddress(loc) + val addr = GreaseResult.parseLoc(desc("bugLoc").str, addrs, loadOffset) Some( PossibleBug( addr, @@ -156,17 +194,77 @@ object GreaseResult { ) ) } catch { - // GREASE can dump "Multiple Locations:" from checkMustFail currently we dont handle this - case e: NumberFormatException => { None } + // GREASE can dump "Multiple Locations:" from checkMustFail currently we use the original addr + case e: NumberFormatException => { + Some( + PossibleBug( + ent, + desc, + contents("bugArgs").arr, + contents("bugShapes").str + ) + ) + } } + } + def parseCouldNotInfer( + ent: Address, + addrs: AddressingMode, + contents: ujson.Value, + loadOffset: Long + ): Option[FailedToRefine] = { + def parsePred(x: ujson.Value): Option[FailedPredicate] = + try { + Some(FailedPredicate( + parseLoc(x("_failedPredicateLocation").str, addrs, loadOffset), + x("_failedPredicateMessage").str, + x("_failedPredicateConcShapes").str + )) + } catch { + case e: NumberFormatException => None + } + + Some(FailedToRefine(ent, contents.arr.toSeq.flatMap(parsePred))) } - def parse(chunks: String, addrs: AddressingMode): Try[GreaseResult] = { + def parseBatch( + ent: Address, + batch: String, + addrs: AddressingMode + ): Option[ParsedBatch] = { + val js = ujson.read(batch) + + def getLoadOffset(): Long = + js("batchLoadOffset").num.toLong + + js("batchStatus")("tag").str match + case "BatchBug" => + parseBug(ent, addrs, js("batchStatus")("contents"), getLoadOffset()) + case "BatchCouldNotInfer" => + parseCouldNotInfer( + ent, + addrs, + js("batchStatus")("contents"), + getLoadOffset() + ) + case _ => None + } + + def parse( + ent: Address, + chunks: String, + addrs: AddressingMode + ): Try[GreaseResult] = { // each line should have a batch Success( GreaseResult( - chunks.lines().iterator().asScala.flatMap(parseBatch(_, addrs)).toList + chunks + .lines() + .iterator() + .asScala + .flatMap(parseBatch(ent, _, addrs)) + .toList ) ) } @@ -177,10 +275,84 @@ case class PossibleBug( description: ujson.Value, args: ujson.Arr, shapes: String -) +) extends ParsedBatch { + def printToProg(prog: Program): Unit = + ParsedBatch.addComment( + s"\n Possible bug: ${description.render()}", + prog, + appliedTo + ) + ParsedBatch.greaseBookmark( + prog, + appliedTo, + "Possible bug", + "GREASE detected a potential bug" + ) +} + +case class FailedPredicate(loc: Address, message: String, concShapes: String) {} + +case class FailedToRefine(inFunc: Address, preds: Seq[FailedPredicate]) + extends ParsedBatch { + + override def printToProg(prog: Program): Unit = { + for (pred <- preds) + Msg.info(this, s"Adding safety pred comment for $inFunc") + ParsedBatch.addComment( + s"Safety predicate failed: ${pred.message}", + prog, + pred.loc + ) + ParsedBatch.addComment( + s"Concretized invalidating shape ${pred.concShapes}", + prog, + pred.loc + ) + + ParsedBatch.greaseBookmark( + prog, + inFunc, + "Failed to Refine", + "could not refine to satisfy a safety condition" + ) + } +} + +object ParsedBatch { + val GREASE_BOOKMARK_TYPE = "GREASE" + + def greaseBookmark( + prog: Program, + addr: Address, + category: String, + note: String + ): Unit = { + prog + .getBookmarkManager() + .setBookmark(addr, GREASE_BOOKMARK_TYPE, category, note) + } + + def addComment(comm: String, prog: Program, toAddr: Address): Unit = { + val prevCom = Option( + prog + .getListing() + .getComment(CommentType.PRE, toAddr) + ) + val nextCom = + prevCom + .getOrElse("") + comm + "\n" + prog + .getListing() + .setComment(toAddr, CommentType.PRE, nextCom) + } +} + +sealed abstract class ParsedBatch { + def printToProg(prog: Program): Unit +} // Coarse grained results for now, grab potential bugs only -case class GreaseResult(val possibleBugs: List[PossibleBug]) {} +case class GreaseResult(val possibleBugs: List[ParsedBatch]) {} object GreaseWrapper { def apply(prog: Program): GreaseWrapper = { @@ -201,8 +373,9 @@ class GreaseWrapper(val localRunner: os.Path, val prog: Program) { if result.exitCode != 0 then Msg.error(this, s"GREASE failed to run with ${result.err}") return Failure(GreaseException("GREASE process failed")) - - GreaseResult.parse(result.out.text(), conf.addressResolver(prog)) + val res = result.out.text() + Msg.info(this, s"Received: $res") + GreaseResult.parse(conf.entrypoint, res, conf.addressResolver(prog)) } } diff --git a/grease/src/Grease/Macaw/Dwarf.hs b/grease/src/Grease/Macaw/Dwarf.hs index 05d3a9eb..e55c28b9 100644 --- a/grease/src/Grease/Macaw/Dwarf.hs +++ b/grease/src/Grease/Macaw/Dwarf.hs @@ -44,6 +44,7 @@ import Lang.Crucible.CFG.Core qualified as C import Lang.Crucible.LLVM.Bytes (toBytes) import Lang.Crucible.LLVM.MemModel qualified as Mem import Lang.Crucible.Types qualified as CT +import Data.Maybe qualified as Maybe -- | Find the pc for DWARF which is relative to the ELF absent its base load address. -- We find this address by finding the segment that corresponds to the target address @@ -156,7 +157,7 @@ constructPtrTarget tyUnrollBound sprog visitCount tyApp = -- even if in practice this should not occur. shapeSeq (MDwarf.StructType sdecl) = let structSize = MDwarf.structByteSize sdecl - in do + mbStruct = do (endLoc, seqs) <- foldM ( \(currLoc, seqs) mem -> @@ -167,10 +168,14 @@ constructPtrTarget tyUnrollBound sprog visitCount tyApp = (0, Seq.empty) (MDwarf.structMembers sdecl) let endPad = if endLoc >= structSize then Seq.empty else Seq.singleton (padding $ structSize - endLoc) - Just $ (seqs Seq.>< endPad) + Just $ (seqs Seq.>< endPad) in + -- TODO this is unsound because we init padding + Just $ Maybe.fromMaybe (Seq.singleton $ Initialized NoTag (toBytes structSize)) mbStruct + shapeSeq (MDwarf.PointerType _ maybeRef) = let mshape = constructPtrMemShapeFromRef tyUnrollBound sprog visitCount =<< maybeRef in Seq.singleton <$> mshape + shapeSeq (MDwarf.TypedefType (MDwarf.Typedef _ _ _ tyRef)) = shapeSeq =<< extractType sprog tyRef shapeSeq _ = Nothing type VisitCount = Map.Map MDwarf.TypeRef Int