Library Linearizeproof

Correctness proof for code linearization

Require Import Coqlib.
Require Import Maps.
Require Import Ordered.
Require Import FSets.
Require Import AST.
Require Import Integers.
Require Import Values.
Require Import Mem.
Require Import Events.
Require Import Globalenvs.
Require Import Errors.
Require Import Smallstep.
Require Import Op.
Require Import Locations.
Require Import LTL.
Require Import LTLtyping.
Require Import LTLin.
Require Import Linearize.
Require Import Lattice.

Module NodesetFacts := FSetFacts.Facts(Nodeset).

Section LINEARIZATION.

Variable prog: LTL.program.
Variable tprog: LTLin.program.

Hypothesis TRANSF: transf_program prog = OK tprog.

Let ge := Genv.globalenv prog.
Let tge := Genv.globalenv tprog.

Lemma functions_translated:
  forall v f,
  Genv.find_funct ge v = Some f ->
  exists tf,
  Genv.find_funct tge v = Some tf /\ transf_fundef f = OK tf.
Proof (Genv.find_funct_transf_partial transf_fundef TRANSF).

Lemma function_ptr_translated:
  forall v f,
  Genv.find_funct_ptr ge v = Some f ->
  exists tf,
  Genv.find_funct_ptr tge v = Some tf /\ transf_fundef f = OK tf.
Proof (Genv.find_funct_ptr_transf_partial transf_fundef TRANSF).

Lemma symbols_preserved:
  forall id,
  Genv.find_symbol tge id = Genv.find_symbol ge id.
Proof (Genv.find_symbol_transf_partial transf_fundef _ TRANSF).

Lemma sig_preserved:
  forall f tf,
  transf_fundef f = OK tf ->
  LTLin.funsig tf = LTL.funsig f.


Lemma find_function_translated:
  forall ros ls f,
  LTL.find_function ge ros ls = Some f ->
  exists tf,
  find_function tge ros ls = Some tf /\ transf_fundef f = OK tf.


Correctness of reachability analysis


The entry point of the function is reachable.

Lemma reachable_entrypoint:
  forall f, (reachable f)!!(f.(fn_entrypoint)) = true.


The successors of a reachable instruction are reachable.

Lemma reachable_successors:
  forall f pc pc' i,
  f.(LTL.fn_code)!pc = Some i -> In pc' (successors_instr i) ->
  (reachable f)!!pc = true ->
  (reachable f)!!pc' = true.


Properties of node enumeration


An enumeration of CFG nodes is correct if the following conditions hold:
  • All nodes for reachable basic blocks must be in the list.
  • The list is without repetition (so that no code duplication occurs).


We prove that the result of the enumerate function satisfies both conditions.

Lemma nodeset_of_list_correct:
  forall l s s',
  nodeset_of_list l s = OK s' ->
  list_norepet l
  /\ (forall pc, Nodeset.In pc s' <-> Nodeset.In pc s \/ In pc l)
  /\ (forall pc, In pc l -> ~Nodeset.In pc s).


Lemma check_reachable_correct:
  forall f reach s pc i,
  check_reachable f reach s = true ->
  f.(LTL.fn_code)!pc = Some i ->
  reach!!pc = true ->
  Nodeset.In pc s.


Lemma enumerate_complete:
  forall f enum pc i,
  enumerate f = OK enum ->
  f.(LTL.fn_code)!pc = Some i ->
  (reachable f)!!pc = true ->
  In pc enum.


Lemma enumerate_norepet:
  forall f enum,
  enumerate f = OK enum ->
  list_norepet enum.


Properties related to labels


If labels are globally unique and the LTLin code c contains a subsequence Llabel lbl :: c1, then find_label lbl c returns c1.

Fixpoint unique_labels (c: code) : Prop :=
  match c with
  | nil => True
  | Llabel lbl :: c => ~(In (Llabel lbl) c) /\ unique_labels c
  | i :: c => unique_labels c
  end.

Lemma find_label_unique:
  forall lbl c1 c2 c3,
  is_tail (Llabel lbl :: c1) c2 ->
  unique_labels c2 ->
  find_label lbl c2 = Some c3 ->
  c1 = c3.


Correctness of the starts_with test.

Lemma starts_with_correct:
  forall lbl c1 c2 c3 s f sp ls m,
  is_tail c1 c2 ->
  unique_labels c2 ->
  starts_with lbl c1 = true ->
  find_label lbl c2 = Some c3 ->
  plus step tge (State s f sp c1 ls m)
             E0 (State s f sp c3 ls m).


Connection between find_label and linearization.

Lemma find_label_add_branch:
  forall lbl k s,
  find_label lbl (add_branch s k) = find_label lbl k.


Lemma find_label_lin_instr:
  forall lbl k b,
  find_label lbl (linearize_instr b k) = find_label lbl k.


Lemma find_label_lin_rec:
  forall f enum pc b,
  In pc enum ->
  f.(LTL.fn_code)!pc = Some b ->
  exists k, find_label pc (linearize_body f enum) = Some (linearize_instr b k).


Lemma find_label_lin:
  forall f tf pc b,
  transf_function f = OK tf ->
  f.(LTL.fn_code)!pc = Some b ->
  (reachable f)!!pc = true ->
  exists k,
  find_label pc (fn_code tf) = Some (linearize_instr b k).


Lemma find_label_lin_inv:
  forall f tf pc b k,
  transf_function f = OK tf ->
  f.(LTL.fn_code)!pc = Some b ->
  (reachable f)!!pc = true ->
  find_label pc (fn_code tf) = Some k ->
  exists k', k = linearize_instr b k'.


Lemma find_label_lin_succ:
  forall f tf s,
  transf_function f = OK tf ->
  valid_successor f s ->
  (reachable f)!!s = true ->
  exists k,
  find_label s (fn_code tf) = Some k.


Unique label property for linearized code.

Lemma label_in_add_branch:
  forall lbl s k,
  In (Llabel lbl) (add_branch s k) -> In (Llabel lbl) k.


Lemma label_in_lin_instr:
  forall lbl k b,
  In (Llabel lbl) (linearize_instr b k) -> In (Llabel lbl) k.


Lemma label_in_lin_rec:
  forall f lbl enum,
  In (Llabel lbl) (linearize_body f enum) -> In lbl enum.


Lemma unique_labels_add_branch:
  forall lbl k,
  unique_labels k -> unique_labels (add_branch lbl k).


Lemma unique_labels_lin_instr:
  forall k b,
  unique_labels k -> unique_labels (linearize_instr b k).


Lemma unique_labels_lin_rec:
  forall f enum,
  list_norepet enum ->
  unique_labels (linearize_body f enum).


Lemma unique_labels_transf_function:
  forall f tf,
  transf_function f = OK tf ->
  unique_labels (fn_code tf).


Correctness of add_branch.

Lemma is_tail_find_label:
  forall lbl c2 c1,
  find_label lbl c1 = Some c2 -> is_tail c2 c1.


Lemma is_tail_add_branch:
  forall lbl c1 c2, is_tail (add_branch lbl c1) c2 -> is_tail c1 c2.


Lemma add_branch_correct:
  forall lbl c k s f tf sp ls m,
  transf_function f = OK tf ->
  is_tail k tf.(fn_code) ->
  find_label lbl tf.(fn_code) = Some c ->
  plus step tge (State s tf sp (add_branch lbl k) ls m)
             E0 (State s tf sp c ls m).


Correctness of linearization


The proof of semantic preservation is a simulation argument based on diagrams of the following form:
           st1 --------------- st2
            |                   |
           t|                  +|t
            |                   |
            v                   v
           st1'--------------- st2'


The invariant (horizontal lines above) is the match_states predicate defined below. It captures the fact that the flow of data is the same in the source and linearized codes. Moreover, whenever the source state is at node pc in its control-flow graph, the transformed state is at a code sequence c that starts with the label pc.

Inductive match_stackframes: LTL.stackframe -> LTLin.stackframe -> Prop :=
  | match_stackframe_intro:
      forall res f sp pc ls tf c,
      transf_function f = OK tf ->
      (reachable f)!!pc = true ->
      valid_successor f pc ->
      is_tail c (fn_code tf) ->
      wt_function f ->
      match_stackframes
        (LTL.Stackframe res f sp ls pc)
        (LTLin.Stackframe res tf sp ls (add_branch pc c)).

Inductive match_states: LTL.state -> LTLin.state -> Prop :=
  | match_states_intro:
      forall s f sp pc ls m tf ts c
        (STACKS: list_forall2 match_stackframes s ts)
        (TRF: transf_function f = OK tf)
        (REACH: (reachable f)!!pc = true)
        (AT: find_label pc (fn_code tf) = Some c)
        (WTF: wt_function f),
      match_states (LTL.State s f sp pc ls m)
                   (LTLin.State ts tf sp c ls m)
  | match_states_call:
      forall s f ls m tf ts,
      list_forall2 match_stackframes s ts ->
      transf_fundef f = OK tf ->
      wt_fundef f ->
      match_states (LTL.Callstate s f ls m)
                   (LTLin.Callstate ts tf ls m)
  | match_states_return:
      forall s ls m ts,
      list_forall2 match_stackframes s ts ->
      match_states (LTL.Returnstate s ls m)
                   (LTLin.Returnstate ts ls m).

Hypothesis wt_prog: wt_program prog.

Theorem transf_step_correct:
  forall s1 t s2, LTL.step ge s1 t s2 ->
  forall s1' (MS: match_states s1 s1'),
  exists s2', plus LTLin.step tge s1' t s2' /\ match_states s2 s2'.


Lemma transf_initial_states:
  forall st1, LTL.initial_state prog st1 ->
  exists st2, LTLin.initial_state tprog st2 /\ match_states st1 st2.


Lemma transf_final_states:
  forall st1 st2 r,
  match_states st1 st2 -> LTL.final_state st1 r -> LTLin.final_state st2 r.


Theorem transf_program_correct:
  forall (beh: program_behavior), not_wrong beh ->
  LTL.exec_program prog beh -> LTLin.exec_program tprog beh.


End LINEARIZATION.