1
0

v1.0.0: dev.uapf.semantic-document-analysis

UAPF v1.1 SSOT-conformant Level 4 process package — reusable semantic
document analysis, shareable across DMS / intake / mailroom systems.

Structure:
- uapf.yaml (kind: uapf.package, level 4) + manifest.json engine-compat
- bpmn/semantic-document-analysis.bpmn.xml — 3 service tasks invoking
  reserved UAPF-IP capabilities ai.redact@1, ai.extract@1, event.emit@1
- resources/mappings.yaml — task->target bindings with I/O contracts
- resources/schemas/vdvc-semantic-summary.schema.json — output contract
- resources/guardrails.yaml — GDPR + EU AI Act constraints
- metadata/ownership.yaml + metadata/lifecycle.yaml
- docs/, fixtures/, tests/eval-set.json

Validates clean against UAPFormat/UAPF-specification schemas.
This commit is contained in:
2026-05-16 09:32:55 +00:00
commit ae0c646021
16 changed files with 422 additions and 0 deletions

32
resources/guardrails.yaml Normal file
View File

@@ -0,0 +1,32 @@
# Non-normative supplementary file. UAPF v1.1 does NOT cornerstone guardrails;
# they live under resources/ as a host-readable policy snapshot.
authority: dev.uapf.stewards
version: "1.0.0"
privacy:
forbidden_in_output:
- personal_name
- personal_id_number
- postal_address
- phone_number
- email_address
- bank_account
- iban
- health_record_value
- biometric_value
pii_handling:
- "Detected PII MUST be listed in sensitivityControl.detectedEntityTypes as TYPE names only, never values."
- "Set personalDataRisk according to detected types: NONE < LOW < MEDIUM < HIGH."
eu_ai_act:
classification: "Annex III §5(a) and §8(a) — high-risk per Regulation 2024/1689"
required_transparency_fields:
- "semanticSummary.summarySource MUST be \"AI\""
- "semanticSummary.aiConfidenceScore MUST be 0.0–1.0"
- "semanticSummary.aiModelVersion MUST be the exact model identifier"
human_oversight: "humanValidationStatus MUST be PENDING or REQUIRED on completion; consuming higher-level process MUST surface to a human before any consequential action."
accuracy:
- "Do not fabricate fields not supported by source text."
- "Set aiConfidenceScore below 0.3 when classification is uncertain."
- "If document is unreadable or too short, set humanValidationStatus to REQUIRED."

54
resources/mappings.yaml Normal file
View File

@@ -0,0 +1,54 @@
kind: uapf.resources.mapping
targets:
- id: agent.semantic-extractor
type: ai_agent
name: Semantic Extraction AI Agent
description: |
Host-provided AI agent that fulfils ai.redact@1, ai.extract@1, and
event.emit@1 for this process. Implementation is the host's choice
(Claude, GPT, on-prem LLM, etc.); this package supplies the BPMN
flow, the output schema, and the guardrails.
capabilities:
- capability.ai.redact
- capability.ai.extract
- capability.event.emit
bindings:
- source: { type: bpmn.serviceTask, ref: Task_RedactPii }
targetId: agent.semantic-extractor
mode: autonomous
contract:
input:
- { name: text, type: string, required: true }
- { name: categories, type: array, required: false, description: "Optional PII categories; defaults to host policy." }
output:
- { name: redactedText, type: string }
- { name: detections, type: array }
timeout: "10s"
requiredCapabilities: [capability.ai.redact]
- source: { type: bpmn.serviceTask, ref: Task_ExtractSemantics }
targetId: agent.semantic-extractor
mode: autonomous
contract:
input:
- { name: text, type: string, required: true, description: "Redacted text from previous task." }
- { name: schema, type: object, required: true, description: "VDVC v1.1 output schema. Reference: resources/schemas/vdvc-semantic-summary.schema.json" }
output:
- { name: extracted, type: object, description: "Validates against resources/schemas/vdvc-semantic-summary.schema.json" }
- { name: confidence, type: number }
- { name: modelUsed, type: string }
timeout: "30s"
retries: { maxAttempts: 2, backoffMs: 2000 }
requiredCapabilities: [capability.ai.extract]
- source: { type: bpmn.serviceTask, ref: Task_EmitResultEvent }
targetId: agent.semantic-extractor
mode: autonomous
contract:
input:
- { name: eventType, type: string, required: true }
- { name: payload, type: object, required: true }
timeout: "5s"
requiredCapabilities: [capability.event.emit]

View File

@@ -0,0 +1,49 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://uapf.dev/schemas/vdvc/semantic-summary-v1.1.json",
"title": "VDVC Semantic Summary v1.1",
"description": "Output contract for ai.extract@1 when invoked by Process_SemanticDocumentAnalysis. The host's AI agent MUST produce output validating against this schema.",
"type": "object",
"required": ["semanticSummary", "sensitivityControl"],
"properties": {
"semanticSummary": {
"type": "object",
"required": ["primaryTopic", "summary", "summarySource", "aiConfidenceScore", "aiModelVersion", "humanValidationStatus"],
"properties": {
"primaryTopic": { "type": "string", "maxLength": 200 },
"subTopics": { "type": "array", "items": { "type": "string" } },
"summary": { "type": "string", "maxLength": 4000 },
"documentPurpose": { "type": "string", "maxLength": 200 },
"requestedAction": { "type": "string", "maxLength": 200 },
"involvedPartyTypes": { "type": "array", "items": { "type": "string" }, "description": "Party TYPES only, never names." },
"geographicScope": { "type": "string" },
"sectorTags": { "type": "array", "items": { "type": "string" } },
"legalDomain": { "type": "string" },
"estimatedRiskLevel": { "enum": ["LOW", "MEDIUM", "HIGH", "CRITICAL"] },
"urgencyLevel": { "enum": ["LOW", "NORMAL", "HIGH", "URGENT"] },
"keywords": { "type": "array", "maxItems": 20, "items": { "type": "string" } },
"detectedLanguage": { "type": "string", "pattern": "^[a-z]{2}$" },
"summarySource": { "const": "AI" },
"aiConfidenceScore": { "type": "number", "minimum": 0, "maximum": 1 },
"aiModelVersion": { "type": "string" },
"humanValidationStatus": { "enum": ["PENDING", "REQUIRED", "VALIDATED", "REJECTED"] },
"mentions_child": { "type": "boolean" },
"ongoing_harm": { "type": "boolean" },
"vulnerable_group": { "type": "boolean" },
"criminal_indication": { "type": "boolean" }
}
},
"sensitivityControl": {
"type": "object",
"required": ["personalDataRisk", "allowCentralization", "redactionLevel"],
"properties": {
"personalDataRisk": { "enum": ["NONE", "LOW", "MEDIUM", "HIGH"] },
"allowCentralization": { "type": "boolean" },
"redactionLevel": { "enum": ["NONE", "PARTIAL", "FULL"] },
"accessRestrictionBasis":{ "type": "string" },
"classifiedInformation": { "type": "boolean" },
"detectedEntityTypes": { "type": "array", "items": { "type": "string" } }
}
}
}
}