#include <windows.h>
#include <wincodec.h>
#include <tchar.h>
#include <assert.h>
#include <d3d11.h>
#include <amp.h>
#include <amp_graphics.h>
#include <DirectXTex.h>
#pragma comment(lib, "d3d11.lib")
using namespace concurrency;
using namespace concurrency::graphics;
using namespace concurrency::direct3d;
#define DIMENSION 2
ID3D11Device* g_pd3dDevice = nullptr;
ID3D11DeviceContext* g_pImmediateContext = nullptr ;
ID3D11Texture2D* g_pInputTexture = nullptr;
texture<unorm_4, 2>* g_pAmpProcessedTexture = nullptr;
// タイルのサイズ
static const UINT sTileSize = 32;
//static const UINT sTileSize = 48; // compile ERROR
// 「C++ AMP でのタイリングの概要」で検索
// extentは範囲
tiled_extent<sTileSize, sTileSize> GetTiledExtent(const extent<DIMENSION>& ext)
{
// タイルエクステント内の総数は1024以下ならOK
tiled_extent<sTileSize, sTileSize> text(ext);
return text.pad();
}
// カーネル関数
void ApplyEffectKernelFunc001(const texture<unorm_4, 2>& input_tex, writeonly_texture_view<unorm_4, 2> output_tex_view, tiled_index<sTileSize, sTileSize> idx) restrict(amp)
{
// タイル毎の共有メモリ
tile_static float_4 local_pixels[sTileSize][sTileSize];
const UINT globalY = idx.global[0];
const UINT globalX = idx.global[1];
const UINT localY = idx.local[0];
const UINT localX = idx.local[1];
local_pixels[localY][localX] = static_cast<float_4>(input_tex[idx.global].rgba);
// local_pixels[idx.local] = static_cast<float_4>(input_tex[idx.global].rgba); // ERROR
idx.barrier.wait();
if((globalY >= 100 && globalY < 250) && (globalX > 500 && globalX < 720)) {
// 特定の領域にモザイクをかける
output_tex_view.set(idx.global, unorm_4(local_pixels[0][0].r, local_pixels[0][0].g, local_pixels[0][0].b, 1.0));
}
else
{
// 入力値をコピー
float_4 pixel = static_cast<float_4>(input_tex[idx.global].rgba);
output_tex_view.set(idx.global, unorm_4(pixel.r, pixel.g, pixel.b, 1.0));
}
}
void ApplyEffect(const texture<unorm_4, DIMENSION> & input_tex, writeonly_texture_view<unorm_4, DIMENSION>& output_tex_view)
{
// タイルのエクステント
tiled_extent<sTileSize, sTileSize> computeDomain = GetTiledExtent(input_tex.extent);
// 並列計算を起動
parallel_for_each(computeDomain, [=, &input_tex](tiled_index<sTileSize, sTileSize> idx) restrict(amp)
{
ApplyEffectKernelFunc001(input_tex, output_tex_view, idx);
});
}
void TestAMP(_TCHAR* filePath)
{
HRESULT hr;
hr = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
unsigned int createDeviceFlags = 0;
#ifdef _DEBUG
createDeviceFlags |= D3D11_CREATE_DEVICE_DEBUG;
#endif
// DirectX11の初期化
D3D_FEATURE_LEVEL FeatureLevel = D3D_FEATURE_LEVEL_11_0;
hr = D3D11CreateDevice( nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, createDeviceFlags, &FeatureLevel, 1, D3D11_SDK_VERSION, &g_pd3dDevice, nullptr, &g_pImmediateContext );
assert( hr == S_OK );
// リソースが解放されない? 実行すると何かが残る
accelerator_view g_av = create_accelerator_view(reinterpret_cast<IUnknown *>(g_pd3dDevice));
std::wstring desc = g_av.get_accelerator().get_description();
bool is_debug = g_av.get_is_debug();
queuing_mode qmode = g_av.get_queuing_mode();
// 画像ファイルを読み込む
DirectX::TexMetadata mdata;
DirectX::ScratchImage image;
hr = DirectX::LoadFromWICFile(filePath, DirectX::DDS_FLAGS_NONE, &mdata, image);
assert( hr == S_OK );
// ID3D11Texture2Dを作成
hr = DirectX::CreateTexture( g_pd3dDevice, image.GetImages(), image.GetImageCount(), mdata, reinterpret_cast<ID3D11Resource **>(&g_pInputTexture) );
assert( hr == S_OK );
// Concurrency::graphics::textureを作成
UINT img_width = mdata.width;
UINT img_height = mdata.height;
#if 1
// OK
g_pAmpProcessedTexture = new texture<unorm_4, DIMENSION>(static_cast<int>(img_height), static_cast<int>(img_width), 8U, g_av);
#else
// NG ApplyEffect内でエラー発生する
// g_pAmpProcessedTexture = new texture<unorm_4, DIMENSION>(static_cast<int>(img_height), static_cast<int>(img_width), 8U);
#endif
// 画像データを加工する
// 出力先
writeonly_texture_view<unorm_4, DIMENSION> output_tex_view(*g_pAmpProcessedTexture);
// 入力データ
// ID3D11Texture2D g_pInputTextureからConcurrency::graphics::textureを作成
const texture<unorm_4, DIMENSION> input_tex = graphics::direct3d::make_texture<unorm_4, DIMENSION>(g_av, reinterpret_cast<IUnknown *>(g_pInputTexture));
// 加工処理
ApplyEffect(input_tex, output_tex_view);
// 加工結果を取り出す
// Concurrency::graphics::texture の g_pAmpProcessedTexture から ID3D11Texture2Dを取得
ID3D11Texture2D* processedTexture = reinterpret_cast<ID3D11Texture2D *>(graphics::direct3d::get_texture<unorm_4, DIMENSION>(*g_pAmpProcessedTexture));
DirectX::ScratchImage output_image;
// processedTextureをoutput_imageにキャプチャーする
hr = DirectX::CaptureTexture(g_pd3dDevice, g_pImmediateContext, reinterpret_cast<ID3D11Resource *>(processedTexture), output_image);
assert( hr == S_OK );
// Jpeg形式で画像を保存
GUID containerFormat = GUID_ContainerFormatJpeg;
DWORD flags = 0;
const DirectX::Image* pImage = output_image.GetImages();
size_t numImage = output_image.GetImageCount();
hr = DirectX::SaveToWICFile(pImage, numImage, flags, containerFormat, L"output.jpg");
assert( hr == S_OK );
// 解放
processedTexture->Release();
if (g_pInputTexture) g_pInputTexture->Release();
if (g_pAmpProcessedTexture) delete g_pAmpProcessedTexture;
if (g_pImmediateContext) g_pImmediateContext->Release();
if (g_pd3dDevice) g_pd3dDevice->Release();
CoUninitialize();
}
int _tmain(int argc, _TCHAR* argv[])
{
if(argc > 1) {
TestAMP(argv[1]);
}
return 0;
}
|