dokumenta-semantiska-analize/algorithms/vdvc_semantic_extractor.card.yaml

kind: uapf.algorithm.card

id: algo.semantic_document_analysis.vdvc_semantic_extractor
version: "1.0.0"
name: "VDVC semantic metadata extractor"
intent: >
  Extracts a VDVC v1.1-conformant structured semantic summary from
  the redacted document text — primary topic, keywords,
  classification, summary, sensitivity signals. Output validates
  against resources/schemas/vdvc-semantic-summary.schema.json. This
  is the sole model-inference step in the process; everything else
  in the package is deterministic.

algorithm_kind: extractor

io:
  inputs:
    - id: redacted_content
      type: string
      cardinality: single
      constraints:
        maxLength: 200000
      documentation: "Output of the upstream PII redactor."
    - id: schema_ref
      type: string
      documentation: "Path to the JSON Schema the output must validate against."
  outputs:
    - id: semantic_summary
      type: object
      schema: "../resources/schemas/vdvc-semantic-summary.schema.json"
    - id: sensitivity_control
      type: object
    - id: ai_confidence_score
      type: probability
    - id: output_pii_error_count
      type: integer
      constraints: { minimum: 0 }

implementation:
  type: external
  medium: llm_prompt
  uri: "uapf-ip://capability/ai.extract@1"
  hash: "sha256:0000000000000000000000000000000000000000000000000000000000000000"
  runtime:
    capability: "ai.extract@1"
    note: "Host-fulfilled UAPF-IP capability. Specific model identity and prompt hash are runtime concerns of the host; the Card declares the contract, not the implementation choice."

determinism: stochastic
side_effects: external_call

confidence:
  type: probability
  threshold: 0.70
  below_threshold: "route-to:human.legal_reviewer (enforced by DMN human-validation-gate)"

complexity:
  typical_latency_ms: 8000
  max_latency_ms: 60000

failure_mode: "default:null + flag — DMN human-validation-gate routes low-confidence outputs to PENDING_REVIEW."

limitations:
  - "Garie dokumenti (>50 000 znaki) tiek apgriezti — pirmie 50K + pēdējie 5K"
  - "Nav juridisks vērtējums — tikai semantiska klasifikācija"
  - "Latviešu valodas juridiskā retorika var samazināt recall"

reference:
  legal: "EU AI Act 2024/1689, Pielikums III (augstā riska MI sistēmas), 13. pants (caurspīdība)."
  url: "https://eur-lex.europa.eu/eli/reg/2024/1689/oj"

owners:
  - type: team
    id: uapf-stewards
    contact: stewards@uapf.dev

lifecycle:
  status: draft
  since: "2026-05-20"

audit:
  log_inputs: redacted
  log_outputs: full
  retention: "7y"

risk:
  aiActRiskClass: high
  humanOversight: mandatory
  transparencyTier: tier-3-full