メモ > 技術 > IDE: Xcode > SwiftUI+リアルタイム顔認識
SwiftUI+リアルタイム顔認識
■検証中
SwiftUI-Vision/Realtime-Face-Tracking/Realtime-Face-Tracking at main - SatoTakeshiX/SwiftUI-Vision
https://github.com/SatoTakeshiX/SwiftUI-Vision/tree/main/Realtime-Face-Tracking/Realtime-Face-Tracki...
をもとに検証中。
Info.plist にKey「Privacy - Camera Usage Description」を追加し、Valueに「顔を検出します。」と記載しておく。
ファイルの内容を直接確認すると、dictタグ内に以下が追加されている。
Info.plist
<key>NSCameraUsageDescription</key>
<string>顔を検出します。</string>
ContentView.swift
import SwiftUI
struct ContentView: View {
@StateObject var viewModel = TrackingViewModel()
var body: some View {
ZStack {
PreviewLayerView(previewLayer: viewModel.previewLayer, detectedRect: viewModel.detectedRects, pixelSize: viewModel.pixelSize)
}
.edgesIgnoringSafeArea(.all)
.onAppear {
viewModel.startSession()
}
}
}
#Preview {
ContentView()
}
PreviewLayerView.swift
import SwiftUI
import AVFoundation
/// UIViewRepresentableを使うとview.frameがzeroになりlayerが描画されない。
/// UIViewControllerRepresentableを利用するとviewController.viewは端末サイズが与えられる
struct PreviewLayerView: UIViewControllerRepresentable {
typealias UIViewControllerType = UIViewController
let previewLayer: AVCaptureVideoPreviewLayer
let detectedRect: [CGRect]
let pixelSize: CGSize
func makeUIViewController(context: Context) -> UIViewController {
let viewController = UIViewController()
viewController.view.layer.addSublayer(previewLayer)
previewLayer.frame = viewController.view.layer.frame
return viewController
}
func updateUIViewController(_ uiViewController: UIViewController, context: Context) {
previewLayer.frame = uiViewController.view.layer.frame
drawFaceObservations(detectedRect)
}
func drawFaceObservations(_ detectedRects: [CGRect]) {
// sublayerを削除
previewLayer.sublayers?.removeSubrange(1...)
// pixelSizeで矩形作成
let captureDeviceBounds = CGRect(
x: 0,
y: 0,
width: pixelSize.width,
height: pixelSize.height
)
let overlayLayer = CALayer()
overlayLayer.name = "DetectionOverlay"
overlayLayer.bounds = captureDeviceBounds
overlayLayer.position = CGPoint(
x: captureDeviceBounds.midX,
y: captureDeviceBounds.midY
)
print("overlay: befor: \(overlayLayer.frame)")
//
let videoPreviewRect = previewLayer.layerRectConverted(fromMetadataOutputRect: CGRect(x: 0, y: 0, width: 1, height: 1))
let (rotation, scaleX, scaleY) = makerotationAndScale(videoPreviewRect: videoPreviewRect, pixelSize: pixelSize)
// Scale and mirror the image to ensure upright presentation.
let affineTransform = CGAffineTransform(rotationAngle: radiansForDegrees(rotation)).scaledBy(x: scaleX, y: -scaleY)
overlayLayer.setAffineTransform(affineTransform)
overlayLayer.position = CGPoint(x: previewLayer.bounds.midX, y: previewLayer.bounds.midY)
previewLayer.addSublayer(overlayLayer)
print("overlay: after: \(overlayLayer.frame)")
let layers = detectedRects.compactMap { detectedRect -> CALayer in
let xMin = detectedRect.minX
let yMax = detectedRect.maxY
let detectedX = xMin * overlayLayer.frame.size.width + overlayLayer.frame.minX
let detectedY = (1 - yMax) * overlayLayer.frame.size.height
let detectedWidth = detectedRect.width * overlayLayer.frame.size.width
let detectedHeight = detectedRect.height * overlayLayer.frame.size.height
let layer = CALayer()
layer.frame = CGRect(x: detectedX, y: detectedY, width: detectedWidth, height: detectedHeight)
layer.borderWidth = 2.0
layer.borderColor = UIColor.green.cgColor
return layer
}
layers.forEach { self.previewLayer.addSublayer($0) }
}
private func radiansForDegrees(_ degrees: CGFloat) -> CGFloat {
return CGFloat(Double(degrees) * Double.pi / 180.0)
}
private func makerotationAndScale(videoPreviewRect: CGRect, pixelSize: CGSize) -> (rotation: CGFloat, scaleX: CGFloat, scaleY: CGFloat) {
var rotation: CGFloat
var scaleX: CGFloat
var scaleY: CGFloat
// Rotate the layer into screen orientation.
switch UIDevice.current.orientation {
case .portraitUpsideDown:
rotation = 180
scaleX = videoPreviewRect.width / pixelSize.width
scaleY = videoPreviewRect.height / pixelSize.height
case .landscapeLeft:
rotation = 90
scaleX = videoPreviewRect.height / pixelSize.width
scaleY = scaleX
case .landscapeRight:
rotation = -90
scaleX = videoPreviewRect.height / pixelSize.width
scaleY = scaleX
default:
rotation = 0
scaleX = videoPreviewRect.width / pixelSize.width
scaleY = videoPreviewRect.height / pixelSize.height
}
return (rotation, scaleX, scaleY)
}
}
TrackingViewModel.swift
import Combine
import UIKit
import Vision
import AVKit
final class TrackingViewModel: ObservableObject {
let captureSession = CaptureSession()
let visionClient = VisionClient()
var previewLayer: AVCaptureVideoPreviewLayer {
return captureSession.previewLayer
}
@Published var detectedRects: [CGRect] = []
private var cancellables: Set<AnyCancellable> = []
init() {
bind()
}
@Published var pixelSize: CGSize = .zero
func bind() {
captureSession.outputs
.receive(on: RunLoop.main)
.sink { [weak self] output in
guard let self = self else { return }
var requestHandlerOptions: [VNImageOption: AnyObject] = [:]
// 内部データをVisionリクエストにオプションとして設定
requestHandlerOptions[VNImageOption.cameraIntrinsics] = output.cameraIntrinsicData
// 画像サイズは保持する
self.pixelSize = output.pixelBufferSize
self.visionClient.request(cvPixelBuffer: output.pixelBuffer,
orientation: self.makeOrientation(with: UIDevice.current.orientation),
options: requestHandlerOptions)
}
.store(in: &cancellables)
visionClient.$visionObjectObservations
.receive(on: RunLoop.main)
.map { observations -> [CGRect] in
return observations.map { $0.boundingBox }
}
.assign(to: &$detectedRects)
}
func startSession() {
captureSession.startSettion()
}
func makeOrientation(with deviceOrientation: UIDeviceOrientation) -> CGImagePropertyOrientation {
switch deviceOrientation {
case .portraitUpsideDown:
return .rightMirrored
case .landscapeLeft:
return .downMirrored
case .landscapeRight:
return .upMirrored
default:
return .leftMirrored
}
}
}
CaptureSession.swift
import Foundation
import AVKit
import Combine
import SwiftUI
final class CaptureSession: NSObject, ObservableObject {
struct Outputs {
let cameraIntrinsicData: CFTypeRef
let pixelBuffer: CVImageBuffer
let pixelBufferSize: CGSize
}
private let captureSession = AVCaptureSession()
private var captureDevice: AVCaptureDevice?
private var videoDataOutput: AVCaptureVideoDataOutput?
private var videoDataOutputQueue: DispatchQueue?
private(set) var previewLayer = AVCaptureVideoPreviewLayer()
var outputs = PassthroughSubject<Outputs, Never>()
private var cancellable: AnyCancellable?
override init() {
super.init()
setupCaptureSession()
}
// MARK: - Create capture session
private func setupCaptureSession() {
captureSession.sessionPreset = .photo
// use front camera
if let availableDevice = AVCaptureDevice.DiscoverySession(
deviceTypes: [.builtInWideAngleCamera],
mediaType: .video,
position: .front
).devices.first {
captureDevice = availableDevice
do {
let captureDeviceInput = try AVCaptureDeviceInput(device: availableDevice)
captureSession.addInput(captureDeviceInput)
} catch {
print(error.localizedDescription)
}
}
makePreviewLayser(session: captureSession)
// ここだけcombine。TODO: fix later
cancellable = NotificationCenter.default.publisher(for: UIDevice.orientationDidChangeNotification)
.map { _ in () }
.prepend(()) // initial run
.sink { [previewLayer] in
let interfaceOrientation = UIApplication.shared.windows.first?.windowScene?.interfaceOrientation
if let interfaceOrientation = interfaceOrientation,
let orientation = AVCaptureVideoOrientation(interfaceOrientation: interfaceOrientation)
{
previewLayer.connection?.videoOrientation = orientation
}
}
makeDataOutput()
}
func startSettion() {
if captureSession.isRunning { return }
captureSession.startRunning()
}
func stopSettion() {
if !captureSession.isRunning { return }
captureSession.stopRunning()
}
private func makePreviewLayser(session: AVCaptureSession) {
let previewLayer = AVCaptureVideoPreviewLayer(session: session)
previewLayer.name = "CameraPreview"
previewLayer.videoGravity = .resizeAspectFill
previewLayer.backgroundColor = UIColor.green.cgColor
//previewLayer.borderWidth = 2
//previewLayer.borderColor = UIColor.black.cgColor
self.previewLayer = previewLayer
}
private func makeDataOutput() {
let videoDataOutput = AVCaptureVideoDataOutput()
videoDataOutput.videoSettings = [
(kCVPixelBufferPixelFormatTypeKey as String): kCVPixelFormatType_32BGRA
]
// frame落ちたら捨てる処理
videoDataOutput.alwaysDiscardsLateVideoFrames = true
let videoDataOutputQueue = DispatchQueue(label: "com.Personal-Factory.Realtime-Face-Tracking")
videoDataOutput.setSampleBufferDelegate(self, queue: videoDataOutputQueue)
captureSession.beginConfiguration()
if captureSession.canAddOutput(videoDataOutput) {
captureSession.addOutput(videoDataOutput)
}
// to use CMGetAttachment in sampleBuffer
if let captureConnection = videoDataOutput.connection(with: .video) {
if captureConnection.isCameraIntrinsicMatrixDeliverySupported {
captureConnection.isCameraIntrinsicMatrixDeliveryEnabled = true
}
}
self.videoDataOutput = videoDataOutput
self.videoDataOutputQueue = videoDataOutputQueue
captureSession.commitConfiguration()
}
}
extension CaptureSession: AVCaptureVideoDataOutputSampleBufferDelegate {
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
guard let cameraIntrinsicData = CMGetAttachment(sampleBuffer, key: kCMSampleBufferAttachmentKey_CameraIntrinsicMatrix, attachmentModeOut: nil) else {
return
}
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
print("Failed to obtain a CVPixelBuffer for the current output frame.")
return
}
let width = CVPixelBufferGetWidth(pixelBuffer)
let hight = CVPixelBufferGetHeight(pixelBuffer)
self.outputs.send(.init(
cameraIntrinsicData: cameraIntrinsicData,
pixelBuffer: pixelBuffer,
pixelBufferSize: CGSize(width: width, height: hight)
))
}
}
// MARK: - AVCaptureVideoOrientation
extension AVCaptureVideoOrientation: CustomDebugStringConvertible {
public var debugDescription: String {
switch self {
case .portrait:
return "portrait"
case .portraitUpsideDown:
return "portraitUpsideDown"
case .landscapeRight:
return "landscapeRight"
case .landscapeLeft:
return "landscapeLeft"
@unknown default:
return "unknown"
}
}
public init?(deviceOrientation: UIDeviceOrientation) {
switch deviceOrientation {
case .portrait:
self = .portrait
case .portraitUpsideDown:
self = .portraitUpsideDown
case .landscapeLeft:
self = .landscapeRight
case .landscapeRight:
self = .landscapeLeft
case .faceUp,
.faceDown,
.unknown:
return nil
@unknown default:
return nil
}
}
public init?(interfaceOrientation: UIInterfaceOrientation) {
switch interfaceOrientation {
case .portrait:
self = .portrait
case .portraitUpsideDown:
self = .portraitUpsideDown
case .landscapeLeft:
self = .landscapeLeft
case .landscapeRight:
self = .landscapeRight
case .unknown:
return nil
@unknown default:
return nil
}
}
}
VisionClient.swift
import Foundation
import Vision
import Combine
// tracking face via CVPixelBuffer
final class VisionClient: NSObject, ObservableObject {
enum State {
case stop
case tracking(trackingRequests: [VNTrackObjectRequest])
}
@Published var visionObjectObservations: [VNDetectedObjectObservation] = []
@Published var state: State = .stop
private var subscriber: Set<AnyCancellable> = []
private lazy var sequenceRequestHandler = VNSequenceRequestHandler()
func request(cvPixelBuffer pixelBuffer: CVPixelBuffer, orientation: CGImagePropertyOrientation, options: [VNImageOption : Any] = [:]) {
switch state {
case .stop:
initialRequest(cvPixelBuffer: pixelBuffer, orientation: orientation, options: options)
case .tracking(let trackingRequests):
guard !trackingRequests.isEmpty else {
initialRequest(cvPixelBuffer: pixelBuffer, orientation: orientation, options: options)
break
}
do {
try sequenceRequestHandler.perform(trackingRequests, on: pixelBuffer, orientation: orientation)
} catch {
print(error.localizedDescription)
}
// 次のトラッキングを設定
// perform実行後はresultsプロパティが更新されている
let newTrackingRequests = trackingRequests.compactMap { request -> VNTrackObjectRequest? in
guard let results = request.results else {
return nil
}
guard let observation = results[0] as? VNDetectedObjectObservation else {
return nil
}
if !request.isLastFrame {
if observation.confidence > 0.3 {
request.inputObservation = observation
} else {
request.isLastFrame = true
}
return request
} else {
return nil
}
}
state = .tracking(trackingRequests: newTrackingRequests)
if newTrackingRequests.isEmpty {
// トラックするものがない
self.visionObjectObservations = []
return
}
newTrackingRequests.forEach { request in
guard let result = request.results as? [VNDetectedObjectObservation] else { return }
self.visionObjectObservations = result
}
}
}
// MARK: Performing Vision Requests
private func prepareRequest(completion: @escaping (Result<[VNTrackObjectRequest], Error>) -> Void) -> VNDetectFaceRectanglesRequest {
var requests = [VNTrackObjectRequest]()
let faceRequest = VNDetectFaceRectanglesRequest(completionHandler: { (request, error) in
if let error = error {
completion(.failure(error))
}
guard let faceDetectionRequest = request as? VNDetectFaceRectanglesRequest,
let results = faceDetectionRequest.results as? [VNFaceObservation] else {
return
}
// Add the observations to the tracking list
for obs in results {
let faceTrackingRequest = VNTrackObjectRequest(detectedObjectObservation: obs)
requests.append(faceTrackingRequest)
}
completion(.success(requests))
})
return faceRequest
}
private func initialRequest(cvPixelBuffer pixelBuffer: CVPixelBuffer, orientation: CGImagePropertyOrientation, options: [VNImageOption : Any] = [:]) {
// No tracking object detected, so perform initial detection
let imageRequestHandler = VNImageRequestHandler(
cvPixelBuffer: pixelBuffer,
orientation: orientation,
options: options
)
do {
let faceDetectionRequest = prepareRequest() { [weak self] result in
switch result {
case .success(let trackingRequests):
self?.state = .tracking(trackingRequests: trackingRequests)
case .failure(let error):
print("error: \(String(describing: error)).")
}
}
try imageRequestHandler.perform([faceDetectionRequest])
} catch let error as NSError {
NSLog("Failed to perform FaceRectangleRequest: %@", error)
}
}
}
プレビューを UIViewRepresentable ではなく UIViewControllerRepresentable で作成している。
UIViewRepresentable を使うとサイズ調整が厄介そうなためらしいが、使えないわけでは無いみたい。
以下でも最低限のサンプルとともに解説されているので、挙動を比較しつつ試したい。
SwiftUIでAVFundationを導入する【Video Capture偏】
https://blog.personal-factory.com/2020/06/14/introduce-avfundation-by-swiftui/