v1.0.0: dev.uapf.semantic-document-analysis

UAPF v1.1 SSOT-conformant Level 4 process package — reusable semantic document analysis, shareable across DMS / intake / mailroom systems. Structure: - uapf.yaml (kind: uapf.package, level 4) + manifest.json engine-compat - bpmn/semantic-document-analysis.bpmn.xml — 3 service tasks invoking reserved UAPF-IP capabilities ai.redact@1, ai.extract@1, event.emit@1 - resources/mappings.yaml — task->target bindings with I/O contracts - resources/schemas/vdvc-semantic-summary.schema.json — output contract - resources/guardrails.yaml — GDPR + EU AI Act constraints - metadata/ownership.yaml + metadata/lifecycle.yaml - docs/, fixtures/, tests/eval-set.json Validates clean against UAPFormat/UAPF-specification schemas.
2026-05-16 09:32:55 +00:00
commit ae0c646021
16 changed files with 422 additions and 0 deletions
--- a/resources/guardrails.yaml
+++ b/resources/guardrails.yaml
@@ -0,0 +1,32 @@
+# Non-normative supplementary file. UAPF v1.1 does NOT cornerstone guardrails;
+# they live under resources/ as a host-readable policy snapshot.
+authority: dev.uapf.stewards
+version: "1.0.0"
+
+privacy:
+  forbidden_in_output:
+    - personal_name
+    - personal_id_number
+    - postal_address
+    - phone_number
+    - email_address
+    - bank_account
+    - iban
+    - health_record_value
+    - biometric_value
+  pii_handling:
+    - "Detected PII MUST be listed in sensitivityControl.detectedEntityTypes as TYPE names only, never values."
+    - "Set personalDataRisk according to detected types: NONE < LOW < MEDIUM < HIGH."
+
+eu_ai_act:
+  classification: "Annex III §5(a) and §8(a) — high-risk per Regulation 2024/1689"
+  required_transparency_fields:
+    - "semanticSummary.summarySource MUST be \"AI\""
+    - "semanticSummary.aiConfidenceScore MUST be 0.0–1.0"
+    - "semanticSummary.aiModelVersion MUST be the exact model identifier"
+  human_oversight: "humanValidationStatus MUST be PENDING or REQUIRED on completion; consuming higher-level process MUST surface to a human before any consequential action."
+
+accuracy:
+  - "Do not fabricate fields not supported by source text."
+  - "Set aiConfidenceScore below 0.3 when classification is uncertain."
+  - "If document is unreadable or too short, set humanValidationStatus to REQUIRED."
--- a/resources/mappings.yaml
+++ b/resources/mappings.yaml
@@ -0,0 +1,54 @@
+kind: uapf.resources.mapping
+
+targets:
+  - id: agent.semantic-extractor
+    type: ai_agent
+    name: Semantic Extraction AI Agent
+    description: |
+      Host-provided AI agent that fulfils ai.redact@1, ai.extract@1, and
+      event.emit@1 for this process. Implementation is the host's choice
+      (Claude, GPT, on-prem LLM, etc.); this package supplies the BPMN
+      flow, the output schema, and the guardrails.
+    capabilities:
+      - capability.ai.redact
+      - capability.ai.extract
+      - capability.event.emit
+
+bindings:
+  - source: { type: bpmn.serviceTask, ref: Task_RedactPii }
+    targetId: agent.semantic-extractor
+    mode: autonomous
+    contract:
+      input:
+        - { name: text,       type: string, required: true }
+        - { name: categories, type: array,  required: false, description: "Optional PII categories; defaults to host policy." }
+      output:
+        - { name: redactedText, type: string }
+        - { name: detections,   type: array }
+      timeout: "10s"
+    requiredCapabilities: [capability.ai.redact]
+
+  - source: { type: bpmn.serviceTask, ref: Task_ExtractSemantics }
+    targetId: agent.semantic-extractor
+    mode: autonomous
+    contract:
+      input:
+        - { name: text,   type: string, required: true,  description: "Redacted text from previous task." }
+        - { name: schema, type: object, required: true,  description: "VDVC v1.1 output schema. Reference: resources/schemas/vdvc-semantic-summary.schema.json" }
+      output:
+        - { name: extracted,  type: object, description: "Validates against resources/schemas/vdvc-semantic-summary.schema.json" }
+        - { name: confidence, type: number }
+        - { name: modelUsed,  type: string }
+      timeout: "30s"
+      retries: { maxAttempts: 2, backoffMs: 2000 }
+    requiredCapabilities: [capability.ai.extract]
+
+  - source: { type: bpmn.serviceTask, ref: Task_EmitResultEvent }
+    targetId: agent.semantic-extractor
+    mode: autonomous
+    contract:
+      input:
+        - { name: eventType, type: string, required: true }
+        - { name: payload,   type: object, required: true }
+      timeout: "5s"
+    requiredCapabilities: [capability.event.emit]
--- a/resources/schemas/vdvc-semantic-summary.schema.json
+++ b/resources/schemas/vdvc-semantic-summary.schema.json
@@ -0,0 +1,49 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://uapf.dev/schemas/vdvc/semantic-summary-v1.1.json",
+  "title": "VDVC Semantic Summary v1.1",
+  "description": "Output contract for ai.extract@1 when invoked by Process_SemanticDocumentAnalysis. The host's AI agent MUST produce output validating against this schema.",
+  "type": "object",
+  "required": ["semanticSummary", "sensitivityControl"],
+  "properties": {
+    "semanticSummary": {
+      "type": "object",
+      "required": ["primaryTopic", "summary", "summarySource", "aiConfidenceScore", "aiModelVersion", "humanValidationStatus"],
+      "properties": {
+        "primaryTopic":          { "type": "string", "maxLength": 200 },
+        "subTopics":             { "type": "array", "items": { "type": "string" } },
+        "summary":               { "type": "string", "maxLength": 4000 },
+        "documentPurpose":       { "type": "string", "maxLength": 200 },
+        "requestedAction":       { "type": "string", "maxLength": 200 },
+        "involvedPartyTypes":    { "type": "array", "items": { "type": "string" }, "description": "Party TYPES only, never names." },
+        "geographicScope":       { "type": "string" },
+        "sectorTags":            { "type": "array", "items": { "type": "string" } },
+        "legalDomain":           { "type": "string" },
+        "estimatedRiskLevel":    { "enum": ["LOW", "MEDIUM", "HIGH", "CRITICAL"] },
+        "urgencyLevel":          { "enum": ["LOW", "NORMAL", "HIGH", "URGENT"] },
+        "keywords":              { "type": "array", "maxItems": 20, "items": { "type": "string" } },
+        "detectedLanguage":      { "type": "string", "pattern": "^[a-z]{2}$" },
+        "summarySource":         { "const": "AI" },
+        "aiConfidenceScore":     { "type": "number", "minimum": 0, "maximum": 1 },
+        "aiModelVersion":        { "type": "string" },
+        "humanValidationStatus": { "enum": ["PENDING", "REQUIRED", "VALIDATED", "REJECTED"] },
+        "mentions_child":        { "type": "boolean" },
+        "ongoing_harm":          { "type": "boolean" },
+        "vulnerable_group":      { "type": "boolean" },
+        "criminal_indication":   { "type": "boolean" }
+      }
+    },
+    "sensitivityControl": {
+      "type": "object",
+      "required": ["personalDataRisk", "allowCentralization", "redactionLevel"],
+      "properties": {
+        "personalDataRisk":      { "enum": ["NONE", "LOW", "MEDIUM", "HIGH"] },
+        "allowCentralization":   { "type": "boolean" },
+        "redactionLevel":        { "enum": ["NONE", "PARTIAL", "FULL"] },
+        "accessRestrictionBasis":{ "type": "string" },
+        "classifiedInformation": { "type": "boolean" },
+        "detectedEntityTypes":   { "type": "array", "items": { "type": "string" } }
+      }
+    }
+  }
+}