Skip to content

Commit 7fed97b

Browse files
qnixsynapseNeoZhangJianyu
authored andcommitted
SYCL: Migrate away from deprecated ggml_tensor->backend (ggml-org#10840)
* Migrate to tensor->buffer for checking backend buffer type: 1 * SYCL: common.cpp try to migrate away from tensor->backend * SYCL: fix assertions and add proper comments * SYCL: remove extra space * SYCL: Add back static to ggml_backend_buffer_is_sycl_split function * SYCL: Add pragma directive to suppress warning spam * SYCL: Integrate debug logs with GGML_LOG and other fixes * Revert "SYCL: Integrate debug logs with GGML_LOG and other fixes" This reverts commit 2607b7d. Let's keep the current SYCL specific logging mechanism for now * SYCL: Use GGML_SYCL_DEBUG after reverting * SYCL: reg_get_proc_address func, update to the current func signature * SYCL: Refactor SYCL buffer checks in ggml_sycl_cpy_tensor_2d
1 parent 968b56a commit 7fed97b

File tree

3 files changed

+33
-21
lines changed

3 files changed

+33
-21
lines changed

ggml/src/ggml-sycl/common.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
//
1212

1313
#include "common.hpp"
14+
15+
#include "ggml-backend-impl.h"
1416
#include "ggml-impl.h"
1517

1618
int get_current_device_id() {
@@ -211,9 +213,9 @@ void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
211213
const ggml_sycl_op_flatten_t op) try {
212214

213215
const bool use_src1 = src1 != nullptr;
214-
215-
GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
216-
GGML_ASSERT( dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
216+
if(use_src1)
217+
GGML_ASSERT(strcmp(src1->buffer->buft->iface.get_name(src1->buffer->buft), GGML_SYCL_NAME "_Split") != 0);
218+
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0);
217219

218220
// dd = data device
219221
float * src0_ddf = (float *) src0->data;

ggml/src/ggml-sycl/common.hpp

+4
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,11 @@
2929

3030
#define GGML_COMMON_DECL_SYCL
3131
#define GGML_COMMON_IMPL_SYCL
32+
/* suppress warning spam */
33+
#pragma clang diagnostic push
34+
#pragma clang diagnostic ignored "-Wnested-anon-types"
3235
#include "ggml-common.h"
36+
#pragma clang diagnostic pop
3337

3438

3539
void ggml_sycl_host_free(void* ptr);

ggml/src/ggml-sycl/ggml-sycl.cpp

+24-18
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,8 @@ ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
131131
ggml_tensor *tensor) try {
132132
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
133133

134-
if (tensor->view_src != NULL && tensor->view_offs == 0) {
134+
if (tensor->view_src != NULL) {
135135
assert(tensor->view_src->buffer->buft == buffer->buft);
136-
tensor->backend = tensor->view_src->backend;
137-
tensor->extra = tensor->view_src->extra;
138136
return;
139137
}
140138

@@ -580,7 +578,7 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
580578
size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
581579
}
582580

583-
// FIXME: do not crash if cudaMalloc fails
581+
// FIXME: do not crash if SYCL Buffer alloc fails
584582
// currently, init_tensor cannot fail, it needs to be fixed in ggml-backend first
585583
ggml_sycl_set_device(id);
586584
const queue_ptr stream = ctx->streams[id];
@@ -622,7 +620,6 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
622620
CHECK_TRY_ERROR(extra->events[id][is] = new sycl::event()));
623621
}
624622
}
625-
tensor->backend = GGML_BACKEND_TYPE_GPU_SPLIT;
626623
tensor->extra = extra;
627624
}
628625
catch (sycl::exception const &exc) {
@@ -2211,12 +2208,22 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst,
22112208

22122209
dpct::memcpy_direction kind;
22132210
char * src_ptr;
2214-
if (src->backend == GGML_BACKEND_TYPE_CPU) {
2211+
if (ggml_backend_buffer_is_host(src->buffer)) {
22152212
kind = dpct::host_to_device;
2213+
//GGML_SYCL_DEBUG("%s: Host buffer type src tensor\n", __func__);
22162214
src_ptr = (char *) src->data;
22172215
// GGML_SYCL_DEBUG("ggml_sycl_cpy_tensor_2d GGML_BACKEND_TYPE_CPU src_ptr %p\n", src_ptr);
2218-
} else if (src->backend == GGML_BACKEND_TYPE_GPU || src->backend == GGML_BACKEND_TYPE_GPU_SPLIT) {
2219-
GGML_ASSERT(src->backend != GGML_BACKEND_TYPE_GPU_SPLIT || (i1_low == 0 && i1_high == src->ne[1]));
2216+
} else if (ggml_backend_buffer_is_sycl(src->buffer)) {
2217+
// If buffer is a SYCL buffer
2218+
//GGML_SYCL_DEBUG("%s: SYCL buffer type src tensor\n", __func__);
2219+
kind = dpct::device_to_device;
2220+
src_ptr = (char *) src->data;
2221+
} else if (ggml_backend_buffer_is_sycl_split(src->buffer)) {
2222+
/*
2223+
If buffer is a SYCL split buffer
2224+
*/
2225+
//GGML_SYCL_DEBUG("%s: Split buffer type src tensor\n", __func__);
2226+
GGML_ASSERT(i1_low == 0 && i1_high == src->ne[1]);
22202227
kind = dpct::device_to_device;
22212228
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra;
22222229
int id;
@@ -2721,8 +2728,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
27212728
const int nb2 = dst->nb[2];
27222729
const int nb3 = dst->nb[3];
27232730

2724-
GGML_ASSERT(dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
2725-
GGML_ASSERT(src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
2731+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
2732+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src1->buffer));
27262733
GGML_ASSERT(src1->type == GGML_TYPE_F32 || (src1->ne[2] == 1 && src1->ne[3] == 1));
27272734

27282735
GGML_ASSERT(ne12 >= ne02 && ne12 % ne02 == 0);
@@ -2742,7 +2749,7 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
27422749

27432750
int64_t src1_padded_col_size = GGML_PAD(ne10, MATRIX_ROW_PADDING);
27442751

2745-
const bool split = src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
2752+
const bool split = ggml_backend_buffer_is_sycl_split(src0->buffer);
27462753
GGML_ASSERT(!(split && ne02 > 1));
27472754
GGML_ASSERT(!(split && ne03 > 1));
27482755
GGML_ASSERT(!(split && ne02 < ne12));
@@ -3067,7 +3074,7 @@ static void ggml_sycl_mul_mat_vec_p021(ggml_backend_sycl_context & ctx, const gg
30673074
const ggml_tensor *src1,
30683075
ggml_tensor *dst) try {
30693076
GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1));
3070-
GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
3077+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer));
30713078
GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // 0213 permutation
30723079
GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // 0213 permutation
30733080
GGML_ASSERT(src0->type == GGML_TYPE_F16);
@@ -3100,7 +3107,7 @@ static void ggml_sycl_mul_mat_vec_nc(ggml_backend_sycl_context & ctx, const ggml
31003107
GGML_ASSERT(!ggml_is_transposed(src0));
31013108
GGML_ASSERT(!ggml_is_transposed(src1));
31023109
GGML_ASSERT(!ggml_is_permuted(src0));
3103-
GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
3110+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer));
31043111
GGML_ASSERT(src0->type == GGML_TYPE_F16);
31053112
GGML_ASSERT(src1->type == GGML_TYPE_F32);
31063113

@@ -3162,7 +3169,7 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx,
31623169
ggml_tensor *dst) try {
31633170
GGML_ASSERT(!ggml_is_transposed(src0));
31643171
GGML_ASSERT(!ggml_is_transposed(src1));
3165-
GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
3172+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer));
31663173
GGML_ASSERT(src0->type == GGML_TYPE_F16);
31673174

31683175
GGML_TENSOR_BINARY_OP_LOCALS
@@ -4543,10 +4550,9 @@ static ggml_backend_dev_t ggml_backend_sycl_reg_get_device(ggml_backend_reg_t re
45434550
static void *ggml_backend_sycl_reg_get_proc_address(ggml_backend_reg_t reg, const char *name) {
45444551
GGML_UNUSED(reg);
45454552

4546-
// TODO: update to the current function signature
4547-
//if (strcmp(name, "ggml_backend_split_buffer_type") == 0) {
4548-
// return (void *)ggml_backend_sycl_split_buffer_type;
4549-
//}
4553+
if (strcmp(name, "ggml_backend_split_buffer_type") == 0) {
4554+
return (void *)ggml_backend_sycl_split_buffer_type;
4555+
}
45504556

45514557
// SYCL doesn't support registering host memory, left here for reference
45524558
// "ggml_backend_register_host_buffer"

0 commit comments

Comments
 (0)