Module type V2.S

include V1.S
type t
include Bin_prot.Binable.S with type t := t
val bin_size_t : t Bin_prot.Size.sizer
val bin_write_t : t Bin_prot.Write.writer
val bin_read_t : t Bin_prot.Read.reader
val __bin_read_t__ : ( int -> t ) Bin_prot.Read.reader
val bin_shape_t : Bin_prot.Shape.t
val bin_writer_t : t Bin_prot.Type_class.writer
val bin_reader_t : t Bin_prot.Type_class.reader
val bin_t : t Bin_prot.Type_class.t
include Ppx_sexp_conv_lib.Sexpable.S with type t := t
val t_of_sexp : Sexplib0__.Sexp.t -> t
val sexp_of_t : t -> Sexplib0__.Sexp.t
type key
type corpus
val create : unit -> t

create () creates an empty instance of the byteweigth decider.

val train : t -> max_length:int -> ( key -> bool ) -> corpus -> unit

train decider ~max_length test corpus train the decider on the specified corpus. The test function classifies extracted substrings. The max_length parameter binds the maximum length of substrings.

val length : t -> int

length decider total amount of different substrings known to a decider.

next t ~length ~threshold data begin the next positive chunk.

Returns an offset that is greater than begin of the next longest substring up to the given length, for which h1 / (h0 + h1) > threshold.

This is a specialization of the next_if function from the extended V1.V2.S interface.

val next : t -> length:int -> threshold:float -> corpus -> int -> int option
val pp : Stdlib.Format.formatter -> t -> unit

pp ppf decider prints all known to decider chunks.

type token
val next_if : t -> length:int -> f:( key -> int -> stats -> bool ) -> corpus -> int -> int option

next_if t ~length ~f data begin the next chunk that f.

Finds the next offset greater than begin of a string of the given length for which there was an observing of a substring s with length n and statistics stats, such that f s n stats is true.

val fold : t -> init:'b -> f:( 'b -> token list -> stats -> 'b ) -> 'b

fold t ~init ~f applies f to all chunks known to the decider.