Module Bap_byteweight.Make

Parameters

module Corpus : Corpus
module Trie : Bap.Std.Trie.S with type key = Corpus.key

Signature

type t
include Bin_prot.Binable.S with type t := t
val bin_size_t : t Bin_prot.Size.sizer
val bin_write_t : t Bin_prot.Write.writer
val bin_read_t : t Bin_prot.Read.reader
val __bin_read_t__ : (int -> t) Bin_prot.Read.reader
val bin_shape_t : Bin_prot.Shape.t
val bin_writer_t : t Bin_prot.Type_class.writer
val bin_reader_t : t Bin_prot.Type_class.reader
val bin_t : t Bin_prot.Type_class.t
include Ppx_sexp_conv_lib.Sexpable.S with type t := t
val t_of_sexp : Sexplib0__.Sexp.t -> t
val sexp_of_t : t -> Sexplib0__.Sexp.t
type key = Corpus.key
type corpus = Corpus.t
val create : unit -> t

create () creates an empty instance of the byteweigth decider.

val train : t -> max_length:int -> (key -> bool) -> corpus -> unit

train decider ~max_length test corpus train the decider on the specified corpus. The test function classifies extracted substrings. The max_length parameter binds the maximum length of substrings.

val length : t -> int

length decider total amount of different substrings known to a decider.

next t ~length ~threshold data begin the next positive chunk.

Returns an offset that is greater than begin of the next longest substring up to the given length, for which h1 / (h0 + h1) > threshold.

This is a specialization of the next_if function from the extended V1.V2.S interface.

val next : t -> length:int -> threshold:float -> corpus -> int -> int option
val pp : Stdlib.Format.formatter -> t -> unit

pp ppf decider prints all known to decider chunks.